# -*- coding: utf-8 -*-
#
# Author: Taylor Smith <taylor.smith@alkaline-ml.com>
#
# This is the wineind dataset found in R.
from __future__ import absolute_import
import numpy as np
import pandas as pd
import calendar
__all__ = [
'load_wineind'
]
[docs]def load_wineind(as_series=False):
"""Australian total wine sales by wine makers in bottles <= 1 litre.
This time-series records wine sales by Australian wine makers between
Jan 1980 -- Aug 1994. This dataset is found in the R ``forecast`` package.
Parameters
----------
as_series : bool, optional (default=False)
Whether to return a Pandas series. If True, the index will be set to
the observed years/months. If False, will return a 1d numpy array.
Examples
--------
>>> from pyramid.datasets import load_wineind
>>> load_wineind()
array([15136, 16733, 20016, 17708, 18019, 19227, 22893, 23739, 21133,
22591, 26786, 29740, 15028, 17977, 20008, 21354, 19498, 22125,
25817, 28779, 20960, 22254, 27392, 29945, 16933, 17892, 20533,
23569, 22417, 22084, 26580, 27454, 24081, 23451, 28991, 31386,
16896, 20045, 23471, 21747, 25621, 23859, 25500, 30998, 24475,
23145, 29701, 34365, 17556, 22077, 25702, 22214, 26886, 23191,
27831, 35406, 23195, 25110, 30009, 36242, 18450, 21845, 26488,
22394, 28057, 25451, 24872, 33424, 24052, 28449, 33533, 37351,
19969, 21701, 26249, 24493, 24603, 26485, 30723, 34569, 26689,
26157, 32064, 38870, 21337, 19419, 23166, 28286, 24570, 24001,
33151, 24878, 26804, 28967, 33311, 40226, 20504, 23060, 23562,
27562, 23940, 24584, 34303, 25517, 23494, 29095, 32903, 34379,
16991, 21109, 23740, 25552, 21752, 20294, 29009, 25500, 24166,
26960, 31222, 38641, 14672, 17543, 25453, 32683, 22449, 22316,
27595, 25451, 25421, 25288, 32568, 35110, 16052, 22146, 21198,
19543, 22084, 23816, 29961, 26773, 26635, 26972, 30207, 38687,
16974, 21697, 24179, 23757, 25013, 24019, 30345, 24488, 25156,
25650, 30923, 37240, 17466, 19463, 24352, 26805, 25236, 24735,
29356, 31234, 22724, 28496, 32857, 37198, 13652, 22784, 23565,
26323, 23779, 27549, 29660, 23356])
>>> load_wineind(True).head()
Jan 1980 15136
Feb 1980 16733
Mar 1980 20016
Apr 1980 17708
May 1980 18019
dtype: int64
References
----------
.. [1] https://www.rdocumentation.org/packages/forecast/versions/8.1/topics/wineind
Returns
-------
rslt : array-like, shape=(n_samples,)
The wineind dataset. There are 176 observations.
"""
rslt = np.array([15136, 16733, 20016, 17708, 18019, 19227, 22893, 23739,
21133, 22591, 26786, 29740, 15028, 17977, 20008, 21354,
19498, 22125, 25817, 28779, 20960, 22254, 27392, 29945,
16933, 17892, 20533, 23569, 22417, 22084, 26580, 27454,
24081, 23451, 28991, 31386, 16896, 20045, 23471, 21747,
25621, 23859, 25500, 30998, 24475, 23145, 29701, 34365,
17556, 22077, 25702, 22214, 26886, 23191, 27831, 35406,
23195, 25110, 30009, 36242, 18450, 21845, 26488, 22394,
28057, 25451, 24872, 33424, 24052, 28449, 33533, 37351,
19969, 21701, 26249, 24493, 24603, 26485, 30723, 34569,
26689, 26157, 32064, 38870, 21337, 19419, 23166, 28286,
24570, 24001, 33151, 24878, 26804, 28967, 33311, 40226,
20504, 23060, 23562, 27562, 23940, 24584, 34303, 25517,
23494, 29095, 32903, 34379, 16991, 21109, 23740, 25552,
21752, 20294, 29009, 25500, 24166, 26960, 31222, 38641,
14672, 17543, 25453, 32683, 22449, 22316, 27595, 25451,
25421, 25288, 32568, 35110, 16052, 22146, 21198, 19543,
22084, 23816, 29961, 26773, 26635, 26972, 30207, 38687,
16974, 21697, 24179, 23757, 25013, 24019, 30345, 24488,
25156, 25650, 30923, 37240, 17466, 19463, 24352, 26805,
25236, 24735, 29356, 31234, 22724, 28496, 32857, 37198,
13652, 22784, 23565, 26323, 23779, 27549, 29660, 23356])
if not as_series:
return rslt
# Otherwise we want a series and have to cleverly create the index
# (we don't want after aug in 1994, so trip Sep, Oct, Nov and Dec)
index = [
"%s %i" % (calendar.month_abbr[i + 1], year)
for year in range(1980, 1995)
for i in range(12)
][:-4]
return pd.Series(rslt, index=index)