Source code for pmdarima.preprocessing.endog.boxcox

# -*- coding: utf-8 -*-

from scipy import stats

import numpy as np
import warnings

from ...compat import check_is_fitted, pmdarima as pm_compat
from .base import BaseEndogTransformer

__all__ = ['BoxCoxEndogTransformer']


[docs]class BoxCoxEndogTransformer(BaseEndogTransformer): r"""Apply the Box-Cox transformation to an endogenous array The Box-Cox transformation is applied to non-normal data to coerce it more towards a normal distribution. It's specified as:: (((y + lam2) ** lam1) - 1) / lam1, if lmbda != 0, else log(y + lam2) Parameters ---------- lmbda : float or None, optional (default=None) The lambda value for the Box-Cox transformation, if known. If not specified, it will be estimated via MLE. lmbda2 : float, optional (default=0.) The value to add to ``y`` to make it non-negative. If, after adding ``lmbda2``, there are still negative values, a ValueError will be raised. neg_action : str, optional (default="raise") How to respond if any values in ``y <= 0`` after adding ``lmbda2``. One of ('raise', 'warn', 'ignore'). If anything other than 'raise', values <= 0 will be truncated to the value of ``floor``. floor : float, optional (default=1e-16) A positive value that truncate values to if there are values in ``y`` that are zero or negative and ``neg_action`` is not 'raise'. Note that if values are truncated, invertibility will not be preserved, and the transformed array may not be perfectly inverse-transformed. """
[docs] def __init__(self, lmbda=None, lmbda2=0, neg_action="raise", floor=1e-16): self.lmbda = lmbda self.lmbda2 = lmbda2 self.neg_action = neg_action self.floor = floor
[docs] def fit(self, y, X=None, **kwargs): # TODO: kwargs go away """Fit the transformer Learns the value of ``lmbda``, if not specified in the constructor. If defined in the constructor, is not re-learned. Parameters ---------- y : array-like or None, shape=(n_samples,) The endogenous (time-series) array. X : array-like or None, shape=(n_samples, n_features), optional The exogenous array of additional covariates. Not used for endogenous transformers. Default is None, and non-None values will serve as pass-through arrays. """ lam1 = self.lmbda lam2 = self.lmbda2 # Temporary shim until we remove `exogenous` support completely X, _ = pm_compat.get_X(X, **kwargs) if lam2 < 0: raise ValueError("lmbda2 must be a non-negative scalar value") if lam1 is None: y, _ = self._check_y_X(y, X) _, lam1 = stats.boxcox(y + lam2, lmbda=None, alpha=None) self.lam1_ = lam1 self.lam2_ = lam2 return self
[docs] def transform(self, y, X=None, **kwargs): """Transform the new array Apply the Box-Cox transformation to the array after learning the lambda parameter. Parameters ---------- y : array-like or None, shape=(n_samples,) The endogenous (time-series) array. X : array-like or None, shape=(n_samples, n_features), optional The exogenous array of additional covariates. Not used for endogenous transformers. Default is None, and non-None values will serve as pass-through arrays. Returns ------- y_transform : array-like or None The Box-Cox transformed y array X : array-like or None The X array """ check_is_fitted(self, "lam1_") # Temporary shim until we remove `exogenous` support completely X, _ = pm_compat.get_X(X, **kwargs) lam1 = self.lam1_ lam2 = self.lam2_ y, exog = self._check_y_X(y, X) y += lam2 neg_mask = y <= 0. if neg_mask.any(): action = self.neg_action msg = "Negative or zero values present in y" if action == "raise": raise ValueError(msg) elif action == "warn": warnings.warn(msg, UserWarning) y[neg_mask] = self.floor if lam1 == 0: return np.log(y), exog return (y ** lam1 - 1) / lam1, exog
[docs] def inverse_transform(self, y, X=None, **kwargs): # TODO: kwargs go away """Inverse transform a transformed array Inverse the Box-Cox transformation on the transformed array. Note that if truncation happened in the ``transform`` method, invertibility will not be preserved, and the transformed array may not be perfectly inverse-transformed. Parameters ---------- y : array-like or None, shape=(n_samples,) The transformed endogenous (time-series) array. X : array-like or None, shape=(n_samples, n_features), optional The exogenous array of additional covariates. Not used for endogenous transformers. Default is None, and non-None values will serve as pass-through arrays. Returns ------- y : array-like or None The inverse-transformed y array X : array-like or None The inverse-transformed X array """ check_is_fitted(self, "lam1_") # Temporary shim until we remove `exogenous` support completely X, _ = pm_compat.get_X(X, **kwargs) lam1 = self.lam1_ lam2 = self.lam2_ y, exog = self._check_y_X(y, X) if lam1 == 0: return np.exp(y) - lam2, exog numer = y * lam1 # remove denominator numer += 1. # add 1 back to it de_exp = numer ** (1. / lam1) # de-exponentiate return de_exp - lam2, exog