PK N[ example_simple_fit.py"""
=======================
Simple auto_arima model
=======================
This is a simple example of how we can fit an ARIMA model in several lines
without knowing anything about our data or optimal hyper parameters.
.. raw:: html
"""
print(__doc__)
# Author: Taylor Smith
import pmdarima as pm
import numpy as np
from matplotlib import pyplot as plt
# #############################################################################
# Load the data and split it into separate pieces
data = pm.datasets.load_wineind()
train, test = data[:150], data[150:]
# Fit a simple auto_arima model
arima = pm.auto_arima(train, error_action='ignore', trace=1,
seasonal=True, m=12)
# #############################################################################
# Plot actual test vs. forecasts:
x = np.arange(test.shape[0])
plt.scatter(x, test, marker='x')
plt.plot(x, arima.predict(n_periods=test.shape[0]))
plt.title('Actual test samples vs. forecasts')
plt.show()
PK [N$ example_pipeline.py"""
=========================
Pipelines with auto_arima
=========================
Like scikit-learn, ``pmdarima`` can fit "pipeline" models. That is, a pipeline
constitutes a list of arbitrary length comprised of any number of
``BaseTransformer`` objects strung together ordinally, and finished with an
``AutoARIMA`` object.
The benefit of a pipeline is the ability to condense a complex sequence of
stateful transformations into a single object that can call ``fit``,
``predict`` and ``update``. It can also be serialized into *one* pickle file,
which greatly simplifies your life.
.. raw:: html
"""
print(__doc__)
# Author: Taylor Smith
import numpy as np
import pmdarima as pm
from pmdarima import pipeline, preprocessing as ppc, arima
from matplotlib import pyplot as plt
# Load the data and split it into separate pieces
data = pm.datasets.load_wineind()
train, test = data[:150], data[150:]
# Let's create a pipeline with multiple stages... the Wineind dataset is
# seasonal, so we'll include a FourierFeaturizer so we can fit it without
# seasonality
pipe = pipeline.Pipeline([
("fourier", ppc.FourierFeaturizer(m=12)),
("arima", arima.AutoARIMA(stepwise=True, trace=1, error_action="ignore",
seasonal=False, # because we use Fourier
transparams=False,
suppress_warnings=True))
])
pipe.fit(train)
print("Model fit:")
print(pipe)
# We can compute predictions the same way we would on a normal ARIMA object:
preds, conf_int = pipe.predict(n_periods=10, return_conf_int=True)
print("\nForecasts:")
print(preds)
# Let's take a look at the actual vs. the predicted values:
fig, axes = plt.subplots(2, 1, figsize=(12, 8))
n_train = train.shape[0]
x = np.arange(n_train + preds.shape[0])
axes[0].plot(x[:n_train], train, alpha=0.75)
# axes[0].scatter(x[n_train:], preds, alpha=0.4, marker='o')
axes[0].scatter(x[n_train:], test[:preds.shape[0]], alpha=0.4, marker='x')
axes[0].fill_between(x[n_train:], conf_int[:, 0], conf_int[:, 1],
alpha=0.1, color='b')
axes[0].set_title('Actual test samples vs. forecasts')
axes[0].set_xlim((0, data.shape[0]))
# We can also call `update` directly on the pipeline object, which will update
# the intermittent transformers, where necessary:
newly_observed, still_test = test[:15], test[15:]
pipe.update(newly_observed, maxiter=10)
# Calling predict will now predict from newly observed values
new_preds = pipe.predict(still_test.shape[0])
print(new_preds)
x2 = np.arange(data.shape[0])
n_trained_on = n_train + newly_observed.shape[0]
axes[1].plot(x2[:n_train], train, alpha=0.75)
axes[1].plot(x2[n_train: n_trained_on], newly_observed, alpha=0.75, c='orange')
# axes[1].scatter(x2[n_trained_on:], new_preds, alpha=0.4, marker='o')
axes[1].scatter(x2[n_trained_on:], still_test, alpha=0.4, marker='x')
axes[1].set_title('Actual test samples vs. forecasts')
axes[1].set_xlim((0, data.shape[0]))
plt.show()
PK fN,J J arima/example_auto_arima.py"""
===========================
Fitting an auto_arima model
===========================
This example demonstrates how we can use the ``auto_arima`` function to
select an optimal time series model. We'll be fitting our model on the lynx
dataset available in the :ref:`datasets` submodule.
.. raw:: html
"""
print(__doc__)
# Author: Taylor Smith
import pmdarima as pm
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import numpy as np
# #############################################################################
# Load the data and split it into separate pieces
data = pm.datasets.load_lynx()
train, test = data[:90], data[90:]
# Fit a simple auto_arima model
modl = pm.auto_arima(train, start_p=1, start_q=1, start_P=1, start_Q=1,
max_p=5, max_q=5, max_P=5, max_Q=5, seasonal=True,
stepwise=True, suppress_warnings=True, D=10, max_D=10,
error_action='ignore')
# Create predictions for the future, evaluate on test
preds, conf_int = modl.predict(n_periods=test.shape[0], return_conf_int=True)
# Print the error:
print("Test RMSE: %.3f" % np.sqrt(mean_squared_error(test, preds)))
# #############################################################################
# Plot the points and the forecasts
x_axis = np.arange(train.shape[0] + preds.shape[0])
x_years = x_axis + 1821 # Year starts at 1821
plt.plot(x_years[x_axis[:train.shape[0]]], train, alpha=0.75)
plt.plot(x_years[x_axis[train.shape[0]:]], preds, alpha=0.75) # Forecasts
plt.scatter(x_years[x_axis[train.shape[0]:]], test,
alpha=0.4, marker='x') # Test data
plt.fill_between(x_years[x_axis[-preds.shape[0]:]],
conf_int[:, 0], conf_int[:, 1],
alpha=0.1, color='b')
plt.title("Lynx forecasts")
plt.xlabel("Year")
PK gN;cX8Z Z arima/example_add_new_samples.py"""
=====================================
Adding new observations to your model
=====================================
This example demonstrates how to add new ground truth
observations to your model so that forecasting continues
with respect to true, observed values. This also slightly
updates the model parameters, taking several new steps from
the existing model parameters.
.. raw:: html
"""
print(__doc__)
# Author: Taylor Smith
import pmdarima as pm
import matplotlib.pyplot as plt
import numpy as np
# #############################################################################
# Load the data and split it into separate pieces
data = pm.datasets.load_lynx()
train, test = data[:100], data[100:]
# #############################################################################
# Fit with some validation (cv) samples
arima = pm.auto_arima(train, start_p=1, start_q=1, d=0, max_p=5, max_q=5,
out_of_sample_size=10, suppress_warnings=True,
stepwise=True, error_action='ignore')
# Now plot the results and the forecast for the test set
preds, conf_int = arima.predict(n_periods=test.shape[0],
return_conf_int=True)
fig, axes = plt.subplots(2, 1, figsize=(12, 8))
x_axis = np.arange(train.shape[0] + preds.shape[0])
axes[0].plot(x_axis[:train.shape[0]], train, alpha=0.75)
axes[0].scatter(x_axis[train.shape[0]:], preds, alpha=0.4, marker='o')
axes[0].scatter(x_axis[train.shape[0]:], test, alpha=0.4, marker='x')
axes[0].fill_between(x_axis[-preds.shape[0]:], conf_int[:, 0], conf_int[:, 1],
alpha=0.1, color='b')
# fill the section where we "held out" samples in our model fit
axes[0].set_title("Train samples & forecasted test samples")
# Now add the actual samples to the model and create NEW forecasts
arima.update(test)
new_preds, new_conf_int = arima.predict(n_periods=10, return_conf_int=True)
new_x_axis = np.arange(data.shape[0] + 10)
axes[1].plot(new_x_axis[:data.shape[0]], data, alpha=0.75)
axes[1].scatter(new_x_axis[data.shape[0]:], new_preds, alpha=0.4, marker='o')
axes[1].fill_between(new_x_axis[-new_preds.shape[0]:],
new_conf_int[:, 0],
new_conf_int[:, 1],
alpha=0.1, color='g')
axes[1].set_title("Added new observed values with new forecasts")
plt.show()
PK cN,jRJ # arima/example_persisting_a_model.py"""
=========================
Persisting an ARIMA model
=========================
This example demonstrates how we can persist an ARIMA model to disk after
fitting it. It can then be loaded back up and used to generate forecasts.
.. raw:: html
"""
print(__doc__)
# Author: Taylor Smith
import pmdarima as pm
from sklearn.externals import joblib # for persistence
import os
# #############################################################################
# Load the data and split it into separate pieces
y = pm.datasets.load_wineind()
train, test = y[:125], y[125:]
# Fit an ARIMA
arima = pm.ARIMA(order=(1, 1, 2), seasonal_order=(0, 1, 1, 12))
arima.fit(y)
# #############################################################################
# Persist a model and create predictions after re-loading it
pickle_tgt = "arima.pkl"
try:
# Pickle it
joblib.dump(arima, pickle_tgt, compress=3)
# Load the model up, create predictions
arima_loaded = joblib.load(pickle_tgt)
preds = arima_loaded.predict(n_periods=test.shape[0])
print("Predictions: %r" % preds)
finally:
# Remove the pickle file at the end of this example
try:
os.unlink(pickle_tgt)
except OSError:
pass
PK gN6
datasets/example_load_data.py"""
===============
Dataset loading
===============
In this example, we demonstrate pyramid's built-in toy datasets that can be
used for benchmarking or experimentation. Pyramid has several built-in datasets
that exhibit seasonality, non-stationarity, and other time series nuances.
.. raw:: html
"""
print(__doc__)
# Author: Taylor Smith
import pmdarima as pm
# #############################################################################
# You can load the datasets via load_
lynx = pm.datasets.load_lynx()
print("Lynx array:")
print(lynx)
# You can also get a series, if you rather
print("\nLynx series head:")
print(pm.datasets.load_lynx(as_series=True).head())
# Several other datasets:
air_passengers = pm.datasets.load_airpassengers()
austres = pm.datasets.load_austres()
heart_rate = pm.datasets.load_heartrate()
wineind = pm.datasets.load_wineind()
woolyrnq = pm.datasets.load_woolyrnq()
PK gNs^` # utils/example_array_differencing.py"""
==================
Array differencing
==================
In this example, we demonstrate pyramid's array differencing, and how it's used
in conjunction with the ``d`` term to lag a time series.
.. raw:: html
"""
print(__doc__)
# Author: Taylor Smith
from pmdarima.utils import array
# Build an array and show first order differencing results
x = array.c(10, 4, 2, 9, 34)
lag_1 = array.diff(x, lag=1, differences=1)
# The result will be the same as: x[1:] - x[:-1]
print(lag_1) # [-6., -2., 7., 25.]
# Note that lag and differences are not the same! If we crank diff up by one,
# it performs the same differencing as above TWICE. Lag, therefore, controls
# the number of steps backward the ts looks when it differences, and the
# `differences` parameter controls how many times to repeat.
print(array.diff(x, lag=1, differences=2)) # [4., 9., 18.]
# Conversely, when we set lag to 2, the array looks two steps back for its
# differencing operation (only one).
print(array.diff(x, lag=2, differences=1)) # [-8., 5., 32.]
# The lag parameter is controlled by `m`, which is the seasonal periodicity of
# a time series. If your series is non-seasonal, lag will typically be 1.
PK gN\Z Z $ utils/example_array_concatenation.py"""
===================
Array concatenation
===================
In this example, we demonstrate pyramid's convenient ``c`` function, which is,
in essence, the same as R's. It's nothing more than a convenience function in
the package, but one you should understand if you're contributing.
.. raw:: html
"""
print(__doc__)
# Author: Taylor Smith
import pmdarima as pm
import numpy as np
# #############################################################################
# You can use the 'c' function to define an array from *args
array1 = pm.c(1, 2, 3, 4, 5)
# Or you can define an array from an existing iterable:
array2 = pm.c([1, 2, 3, 4, 5])
assert np.array_equal(array1, array2)
# You can even use 'c' to flatten arrays:
array_flat = pm.c(1, 2, 3, [4, 5])
assert np.array_equal(array_flat, np.arange(5) + 1)
PK N[ example_simple_fit.pyPK [N$ 5 example_pipeline.pyPK fN,J J 2 arima/example_auto_arima.pyPK gN;cX8Z Z arima/example_add_new_samples.pyPK cN,jRJ # M! arima/example_persisting_a_model.pyPK gN6
|&