# -*- coding: utf-8 -*-
from .base import BaseExogFeaturizer
from ...compat import pmdarima as pm_compat
import numpy as np
import pandas as pd
import warnings
__all__ = [
"DateFeaturizer"
]
# TODO: future usecases might include with_hour_of_day
def _safe_hstack_numpy(left, right):
if left is None:
return right
return np.hstack([left, right])
[docs]class DateFeaturizer(BaseExogFeaturizer):
"""Create exogenous date features
Given an exogenous feature of dtype TimeStamp, creates a set of dummy and
ordinal variables indicating:
* Day of the week
Particular days of the week may align with quasi-seasonal trends.
* Day of the month
Useful for modeling things like the end-of-month effect, ie., a
department spends the remainder of its monthly budget to avoid future
budget cuts, and the last Friday of the month is heavy on spending.
The motivation for this featurizer comes from a blog post by Rob Hyndman
[1] on modeling quasi-seasonal patterns in time series. Note that an
exogenous array _must_ be provided at inference.
Parameters
----------
column_name : str
The name of the date column. This forces the exogenous array to be a
Pandas DataFrame, and does not permit a np.ndarray as others may.
with_day_of_week : bool, optional (default=True)
Whether to include dummy variables for the day of the week (in {0, 1}).
with_day_of_month : bool, optional (default=True)
Whether to include an ordinal feature for the day of the month (1-31).
prefix : str or None, optional (default=None)
The feature prefix
Examples
--------
>>> from pmdarima.datasets._base import load_date_example
>>> y, X = load_date_example()
>>> feat = DateFeaturizer(column_name='date')
>>> _, X_prime = feat.fit_transform(y, X)
>>> X_prime.head()
DATE-WEEKDAY-0 DATE-WEEKDAY-1 ... DATE-WEEKDAY-6 DATE-DAY-OF-MONTH
0 0 1 ... 0 1
1 0 0 ... 0 2
2 0 0 ... 0 3
3 0 0 ... 0 4
4 0 0 ... 0 5
Notes
-----
* In order to use time series with holes, it is required that an X
array be provided at prediction time. Other featurizers automatically
create exog arrays into the future for inference, but this is not
possible currently with the date featurizer. Your code must provide the
dates for which you are forecasting as exog features.
* The ``column_name`` field is dropped in the transformed exogenous array.
References
----------
.. [1] https://robjhyndman.com/hyndsight/monthly-seasonality/
"""
[docs] def __init__(self, column_name, with_day_of_week=True,
with_day_of_month=True, prefix=None):
super().__init__(prefix=prefix)
self.column_name = column_name
self.with_day_of_week = with_day_of_week
self.with_day_of_month = with_day_of_month
def _check_X(self, X):
# exog must be a pd.DataFrame, and the column_name must be a timestamp
if not isinstance(X, pd.DataFrame):
raise TypeError(
f"X must be a DataFrame to use the DateFeaturizer, but got "
f"type={type(X)}"
)
name = self.column_name
if not (name in X.columns and
'datetime64' in X[name].dtype.name):
raise ValueError("column '%s' must exist in exog as a "
"pd.Timestamp type"
% name)
def _get_prefix(self):
pfx = self.prefix
if pfx is None:
pfx = "DATE"
return pfx
# Overrides super abstract method
def _get_feature_names(self, X):
pfx = self._get_prefix()
out = []
# Something to note is that in Python, 0 is Monday (not Sunday). See
# comments here: https://stackoverflow.com/a/9847269/3015734
# E.g., ['DATE-WEEKDAY-0', 'DATE-WEEKDAY-1', ...]
if self.with_day_of_week:
out += ['%s-WEEKDAY-%i' % (pfx, i) for i in range(7)]
if self.with_day_of_month:
out += ['%s-DAY-OF-MONTH' % pfx]
return out
[docs] def fit(self, y, X=None, **kwargs): # TODO: remove kwargs later
"""Fit the transformer
Parameters
----------
y : array-like or None, shape=(n_samples,)
The endogenous (time-series) array.
X : array-like, shape=(n_samples, n_features)
The exogenous array of additional covariates. Must include the
``column_name`` feature, which must be a pd.Timestamp dtype.
"""
# Temporary shim until we remove `exogenous` support completely
X, _ = pm_compat.get_X(X, **kwargs)
y, X = self._check_y_X(y, X, null_allowed=False)
# enforce pd.DataFrame
self._check_X(X)
# we don't _technically_ need to do this, but it seems like a nice bit
# of friendly validation to make sure that at least _something_ will
# happen in this transformer.
if not (self.with_day_of_month or self.with_day_of_week):
warnings.warn("DateTransformer will have no effect given disabled "
"parameters")
return self