# -*- coding: utf-8 -*-
#
# Author: Taylor Smith <taylor.smith@alkaline-ml.com>
#
# This is the wineind dataset found in R.
from __future__ import absolute_import
import numpy as np
import pandas as pd
import calendar
__all__ = [
    'load_wineind'
]
[docs]def load_wineind(as_series=False):
    """Australian total wine sales by wine makers in bottles <= 1 litre.
    This time-series records wine sales by Australian wine makers between
    Jan 1980 -- Aug 1994. This dataset is found in the R ``forecast`` package.
    Parameters
    ----------
    as_series : bool, optional (default=False)
        Whether to return a Pandas series. If True, the index will be set to
        the observed years/months. If False, will return a 1d numpy array.
    Examples
    --------
    >>> from pyramid.datasets import load_wineind
    >>> load_wineind()
    array([15136, 16733, 20016, 17708, 18019, 19227, 22893, 23739, 21133,
           22591, 26786, 29740, 15028, 17977, 20008, 21354, 19498, 22125,
           25817, 28779, 20960, 22254, 27392, 29945, 16933, 17892, 20533,
           23569, 22417, 22084, 26580, 27454, 24081, 23451, 28991, 31386,
           16896, 20045, 23471, 21747, 25621, 23859, 25500, 30998, 24475,
           23145, 29701, 34365, 17556, 22077, 25702, 22214, 26886, 23191,
           27831, 35406, 23195, 25110, 30009, 36242, 18450, 21845, 26488,
           22394, 28057, 25451, 24872, 33424, 24052, 28449, 33533, 37351,
           19969, 21701, 26249, 24493, 24603, 26485, 30723, 34569, 26689,
           26157, 32064, 38870, 21337, 19419, 23166, 28286, 24570, 24001,
           33151, 24878, 26804, 28967, 33311, 40226, 20504, 23060, 23562,
           27562, 23940, 24584, 34303, 25517, 23494, 29095, 32903, 34379,
           16991, 21109, 23740, 25552, 21752, 20294, 29009, 25500, 24166,
           26960, 31222, 38641, 14672, 17543, 25453, 32683, 22449, 22316,
           27595, 25451, 25421, 25288, 32568, 35110, 16052, 22146, 21198,
           19543, 22084, 23816, 29961, 26773, 26635, 26972, 30207, 38687,
           16974, 21697, 24179, 23757, 25013, 24019, 30345, 24488, 25156,
           25650, 30923, 37240, 17466, 19463, 24352, 26805, 25236, 24735,
           29356, 31234, 22724, 28496, 32857, 37198, 13652, 22784, 23565,
           26323, 23779, 27549, 29660, 23356])
    >>> load_wineind(True).head()
    Jan 1980    15136
    Feb 1980    16733
    Mar 1980    20016
    Apr 1980    17708
    May 1980    18019
    dtype: int64
    References
    ----------
    .. [1] https://www.rdocumentation.org/packages/forecast/versions/8.1/topics/wineind
    Returns
    -------
    rslt : array-like, shape=(n_samples,)
        The wineind dataset. There are 176 observations.
    """
    rslt = np.array([15136, 16733, 20016, 17708, 18019, 19227, 22893, 23739,
                     21133, 22591, 26786, 29740, 15028, 17977, 20008, 21354,
                     19498, 22125, 25817, 28779, 20960, 22254, 27392, 29945,
                     16933, 17892, 20533, 23569, 22417, 22084, 26580, 27454,
                     24081, 23451, 28991, 31386, 16896, 20045, 23471, 21747,
                     25621, 23859, 25500, 30998, 24475, 23145, 29701, 34365,
                     17556, 22077, 25702, 22214, 26886, 23191, 27831, 35406,
                     23195, 25110, 30009, 36242, 18450, 21845, 26488, 22394,
                     28057, 25451, 24872, 33424, 24052, 28449, 33533, 37351,
                     19969, 21701, 26249, 24493, 24603, 26485, 30723, 34569,
                     26689, 26157, 32064, 38870, 21337, 19419, 23166, 28286,
                     24570, 24001, 33151, 24878, 26804, 28967, 33311, 40226,
                     20504, 23060, 23562, 27562, 23940, 24584, 34303, 25517,
                     23494, 29095, 32903, 34379, 16991, 21109, 23740, 25552,
                     21752, 20294, 29009, 25500, 24166, 26960, 31222, 38641,
                     14672, 17543, 25453, 32683, 22449, 22316, 27595, 25451,
                     25421, 25288, 32568, 35110, 16052, 22146, 21198, 19543,
                     22084, 23816, 29961, 26773, 26635, 26972, 30207, 38687,
                     16974, 21697, 24179, 23757, 25013, 24019, 30345, 24488,
                     25156, 25650, 30923, 37240, 17466, 19463, 24352, 26805,
                     25236, 24735, 29356, 31234, 22724, 28496, 32857, 37198,
                     13652, 22784, 23565, 26323, 23779, 27549, 29660, 23356])
    if not as_series:
        return rslt
    # Otherwise we want a series and have to cleverly create the index
    # (we don't want after aug in 1994, so trip Sep, Oct, Nov and Dec)
    index = [
        "%s %i" % (calendar.month_abbr[i + 1], year)
        for year in range(1980, 1995)
        for i in range(12)
    ][:-4]
    return pd.Series(rslt, index=index)