Source code for pmdarima.preprocessing.endog.boxcox

# -*- coding: utf-8 -*-

from scipy import stats
from sklearn.utils.validation import check_is_fitted

import numpy as np
import warnings

from .base import BaseEndogTransformer

__all__ = ['BoxCoxEndogTransformer']


[docs]class BoxCoxEndogTransformer(BaseEndogTransformer):
    r"""Apply the Box-Cox transformation to an endogenous array

    The Box-Cox transformation is applied to non-normal data to coerce it more
    towards a normal distribution. It's specified as::

        (((y + lam2) ** lam1) - 1) / lam1, if lmbda != 0, else
        log(y + lam2)

    Parameters
    ----------
    lmbda : float or None, optional (default=None)
        The lambda value for the Box-Cox transformation, if known. If not
        specified, it will be estimated via MLE.

    lmbda2 : float, optional (default=0.)
        The value to add to ``y`` to make it non-negative. If, after adding
        ``lmbda2``, there are still negative values, a ValueError will be
        raised.

    neg_action : str, optional (default="raise")
        How to respond if any values in ``y <= 0`` after adding ``lmbda2``.
        One of ('raise', 'warn', 'ignore'). If anything other than 'raise',
        values <= 0 will be truncated to the value of ``floor``.

    floor : float, optional (default=1e-16)
        A positive value that truncate values to if there are values in ``y``
        that are zero or negative and ``neg_action`` is not 'raise'. Note that
        if values are truncated, invertibility will not be preserved, and the
        transformed array may not be perfectly inverse-transformed.
    """
[docs]    def __init__(self, lmbda=None, lmbda2=0, neg_action="raise", floor=1e-16):

        self.lmbda = lmbda
        self.lmbda2 = lmbda2
        self.neg_action = neg_action
        self.floor = floor

[docs]    def fit(self, y, exogenous=None):
        """Fit the transformer

        Learns the value of ``lmbda``, if not specified in the constructor.
        If defined in the constructor, is not re-learned.

        Parameters
        ----------
        y : array-like or None, shape=(n_samples,)
            The endogenous (time-series) array.

        exogenous : array-like or None, shape=(n_samples, n_features), optional
            The exogenous array of additional covariates. Not used for
            endogenous transformers. Default is None, and non-None values will
            serve as pass-through arrays.
        """
        lam1 = self.lmbda
        lam2 = self.lmbda2

        if lam2 < 0:
            raise ValueError("lmbda2 must be a non-negative scalar value")

        if lam1 is None:
            y, _ = self._check_y_exog(y, exogenous)
            _, lam1 = stats.boxcox(y, lmbda=None, alpha=None)

        self.lam1_ = lam1
        self.lam2_ = lam2
        return self

[docs]    def transform(self, y, exogenous=None, **_):
        """Transform the new array

        Apply the Box-Cox transformation to the array after learning the
        lambda parameter.

        Parameters
        ----------
        y : array-like or None, shape=(n_samples,)
            The endogenous (time-series) array.

        exogenous : array-like or None, shape=(n_samples, n_features), optional
            The exogenous array of additional covariates. Not used for
            endogenous transformers. Default is None, and non-None values will
            serve as pass-through arrays.

        Returns
        -------
        y_transform : array-like or None
            The Box-Cox transformed y array

        exogenous : array-like or None
            The exog array
        """
        check_is_fitted(self, "lam1_")
        lam1 = self.lam1_
        lam2 = self.lam2_

        y, exog = self._check_y_exog(y, exogenous)
        y += lam2

        neg_mask = y <= 0.
        if neg_mask.any():
            action = self.neg_action
            msg = "Negative or zero values present in y"
            if action == "raise":
                raise ValueError(msg)
            elif action == "warn":
                warnings.warn(msg, UserWarning)
            y[neg_mask] = self.floor

        if lam1 == 0:
            return np.log(y), exog
        return (y ** lam1 - 1) / lam1, exog

[docs]    def inverse_transform(self, y, exogenous=None):
        """Inverse transform a transformed array

        Inverse the Box-Cox transformation on the transformed array. Note that
        if truncation happened in the ``transform`` method, invertibility will
        not be preserved, and the transformed array may not be perfectly
        inverse-transformed.

        Parameters
        ----------
        y : array-like or None, shape=(n_samples,)
            The transformed endogenous (time-series) array.

        exogenous : array-like or None, shape=(n_samples, n_features), optional
            The exogenous array of additional covariates. Not used for
            endogenous transformers. Default is None, and non-None values will
            serve as pass-through arrays.

        Returns
        -------
        y : array-like or None
            The inverse-transformed y array

        exogenous : array-like or None
            The inverse-transformed exogenous array
        """
        check_is_fitted(self, "lam1_")
        lam1 = self.lam1_
        lam2 = self.lam2_

        y, exog = self._check_y_exog(y, exogenous)
        if lam1 == 0:
            return np.exp(y) - lam2, exog

        numer = y * lam1  # remove denominator
        numer += 1.  # add 1 back to it
        de_exp = numer ** (1. / lam1)  # de-exponentiate
        return de_exp - lam2, exog