Source code for pmdarima.datasets.woolyrnq

# -*- coding: utf-8 -*-
#
# Author: Taylor Smith <taylor.smith@alkaline-ml.com>
#
# This is the woolyrnq dataset found in the R forecast package.

import numpy as np
import pandas as pd

from ..compat import DTYPE

__all__ = [
    'load_woolyrnq'
]


[docs]def load_woolyrnq(as_series=False, dtype=DTYPE):
    """Quarterly production of woollen yarn in Australia.

    This time-series records the quarterly production (in tonnes) of woollen
    yarn in Australia between Mar 1965 and Sep 1994.

    Parameters
    ----------
    as_series : bool, optional (default=False)
        Whether to return a Pandas series. If True, the index will be set to
        the observed years/quarters. If False, will return a 1d numpy array.

    dtype : type, optional (default=np.float64)
        The type to return for the array. Default is np.float64, which is used
        throughout the package as the default type.

    Examples
    --------
    >>> from pmdarima.datasets import load_woolyrnq
    >>> load_woolyrnq()
    array([6172, 6709, 6633, 6660, 6786, 6800, 6730, 6765, 6720, 7133, 6946,
           7095, 7047, 6757, 6915, 6921, 7064, 7206, 7190, 7402, 7819, 7300,
           7105, 7259, 7001, 7475, 6840, 7061, 5845, 7529, 7819, 6943, 5714,
           6556, 7045, 5947, 5463, 6127, 5540, 4235, 3324, 4793, 5906, 5834,
           5240, 5458, 5505, 5002, 3999, 4826, 5318, 4681, 4442, 5305, 5466,
           4995, 4573, 5081, 5696, 5079, 4373, 4986, 5341, 4800, 4161, 5007,
           5464, 5127, 4240, 5338, 5129, 4437, 3642, 4602, 5524, 4895, 4380,
           5186, 6080, 5588, 5009, 5663, 6540, 6262, 5169, 5819, 6339, 5981,
           4766, 5976, 6590, 5590, 5135, 5762, 6077, 5882, 4247, 5264, 5146,
           4868, 4329, 4869, 5127, 4868, 3827, 4987, 5222, 4928, 3930, 4469,
           4954, 4752, 3888, 4588, 5309, 4732, 4837, 6135, 6396])

    >>> load_woolyrnq(True).head()
    Q1 1965    6172
    Q2 1965    6709
    Q3 1965    6633
    Q4 1965    6660
    Q1 1966    6786
    dtype: int64

    Notes
    -----
    This is quarterly data, so *m* should be set to 4 when using in a seasonal
    context.

    References
    ----------
    .. [1] https://www.rdocumentation.org/packages/forecast/versions/8.1/topics/woolyrnq

    Returns
    -------
    rslt : array-like, shape=(n_samples,)
        The woolyrnq dataset. There are 119 observations.
    """  # noqa: E501
    rslt = np.array([
        6172, 6709, 6633, 6660,
        6786, 6800, 6730, 6765,
        6720, 7133, 6946, 7095,
        7047, 6757, 6915, 6921,
        7064, 7206, 7190, 7402,
        7819, 7300, 7105, 7259,
        7001, 7475, 6840, 7061,
        5845, 7529, 7819, 6943,
        5714, 6556, 7045, 5947,
        5463, 6127, 5540, 4235,
        3324, 4793, 5906, 5834,
        5240, 5458, 5505, 5002,
        3999, 4826, 5318, 4681,
        4442, 5305, 5466, 4995,
        4573, 5081, 5696, 5079,
        4373, 4986, 5341, 4800,
        4161, 5007, 5464, 5127,
        4240, 5338, 5129, 4437,
        3642, 4602, 5524, 4895,
        4380, 5186, 6080, 5588,
        5009, 5663, 6540, 6262,
        5169, 5819, 6339, 5981,
        4766, 5976, 6590, 5590,
        5135, 5762, 6077, 5882,
        4247, 5264, 5146, 4868,
        4329, 4869, 5127, 4868,
        3827, 4987, 5222, 4928,
        3930, 4469, 4954, 4752,
        3888, 4588, 5309, 4732,
        4837, 6135, 6396]).astype(dtype)

    if not as_series:
        return rslt

    # Otherwise we want a series and have to cleverly create the index
    # (with quarters, and we don't want Q4 in 1994)
    index = [
        "Q%i %i" % (i + 1, year)
        for year in range(1965, 1995)
        for i in range(4)
    ][:-1]  # trim off the last one.

    return pd.Series(rslt, index=index)