Source code for pyramid.datasets.wineind

# -*- coding: utf-8 -*-
#
# Author: Taylor Smith <taylor.smith@alkaline-ml.com>
#
# This is the wineind dataset found in R.

from __future__ import absolute_import

import numpy as np
import pandas as pd

import calendar

__all__ = [
    'load_wineind'
]


[docs]def load_wineind(as_series=False): """Australian total wine sales by wine makers in bottles <= 1 litre. This time-series records wine sales by Australian wine makers between Jan 1980 -- Aug 1994. This dataset is found in the R ``forecast`` package. Parameters ---------- as_series : bool, optional (default=False) Whether to return a Pandas series. If True, the index will be set to the observed years/months. If False, will return a 1d numpy array. Examples -------- >>> from pyramid.datasets import load_wineind >>> load_wineind() array([15136, 16733, 20016, 17708, 18019, 19227, 22893, 23739, 21133, 22591, 26786, 29740, 15028, 17977, 20008, 21354, 19498, 22125, 25817, 28779, 20960, 22254, 27392, 29945, 16933, 17892, 20533, 23569, 22417, 22084, 26580, 27454, 24081, 23451, 28991, 31386, 16896, 20045, 23471, 21747, 25621, 23859, 25500, 30998, 24475, 23145, 29701, 34365, 17556, 22077, 25702, 22214, 26886, 23191, 27831, 35406, 23195, 25110, 30009, 36242, 18450, 21845, 26488, 22394, 28057, 25451, 24872, 33424, 24052, 28449, 33533, 37351, 19969, 21701, 26249, 24493, 24603, 26485, 30723, 34569, 26689, 26157, 32064, 38870, 21337, 19419, 23166, 28286, 24570, 24001, 33151, 24878, 26804, 28967, 33311, 40226, 20504, 23060, 23562, 27562, 23940, 24584, 34303, 25517, 23494, 29095, 32903, 34379, 16991, 21109, 23740, 25552, 21752, 20294, 29009, 25500, 24166, 26960, 31222, 38641, 14672, 17543, 25453, 32683, 22449, 22316, 27595, 25451, 25421, 25288, 32568, 35110, 16052, 22146, 21198, 19543, 22084, 23816, 29961, 26773, 26635, 26972, 30207, 38687, 16974, 21697, 24179, 23757, 25013, 24019, 30345, 24488, 25156, 25650, 30923, 37240, 17466, 19463, 24352, 26805, 25236, 24735, 29356, 31234, 22724, 28496, 32857, 37198, 13652, 22784, 23565, 26323, 23779, 27549, 29660, 23356]) >>> load_wineind(True).head() Jan 1980 15136 Feb 1980 16733 Mar 1980 20016 Apr 1980 17708 May 1980 18019 dtype: int64 References ---------- .. [1] https://www.rdocumentation.org/packages/forecast/versions/8.1/topics/wineind Returns ------- rslt : array-like, shape=(n_samples,) The wineind dataset. There are 176 observations. """ rslt = np.array([15136, 16733, 20016, 17708, 18019, 19227, 22893, 23739, 21133, 22591, 26786, 29740, 15028, 17977, 20008, 21354, 19498, 22125, 25817, 28779, 20960, 22254, 27392, 29945, 16933, 17892, 20533, 23569, 22417, 22084, 26580, 27454, 24081, 23451, 28991, 31386, 16896, 20045, 23471, 21747, 25621, 23859, 25500, 30998, 24475, 23145, 29701, 34365, 17556, 22077, 25702, 22214, 26886, 23191, 27831, 35406, 23195, 25110, 30009, 36242, 18450, 21845, 26488, 22394, 28057, 25451, 24872, 33424, 24052, 28449, 33533, 37351, 19969, 21701, 26249, 24493, 24603, 26485, 30723, 34569, 26689, 26157, 32064, 38870, 21337, 19419, 23166, 28286, 24570, 24001, 33151, 24878, 26804, 28967, 33311, 40226, 20504, 23060, 23562, 27562, 23940, 24584, 34303, 25517, 23494, 29095, 32903, 34379, 16991, 21109, 23740, 25552, 21752, 20294, 29009, 25500, 24166, 26960, 31222, 38641, 14672, 17543, 25453, 32683, 22449, 22316, 27595, 25451, 25421, 25288, 32568, 35110, 16052, 22146, 21198, 19543, 22084, 23816, 29961, 26773, 26635, 26972, 30207, 38687, 16974, 21697, 24179, 23757, 25013, 24019, 30345, 24488, 25156, 25650, 30923, 37240, 17466, 19463, 24352, 26805, 25236, 24735, 29356, 31234, 22724, 28496, 32857, 37198, 13652, 22784, 23565, 26323, 23779, 27549, 29660, 23356]) if not as_series: return rslt # Otherwise we want a series and have to cleverly create the index # (we don't want after aug in 1994, so trip Sep, Oct, Nov and Dec) index = [ "%s %i" % (calendar.month_abbr[i + 1], year) for year in range(1980, 1995) for i in range(12) ][:-4] return pd.Series(rslt, index=index)