Source code for pmdarima.datasets.sunspots

# -*- coding: utf-8 -*-
#
# Author: Taylor Smith <taylor.smith@alkaline-ml.com>
#
# This is the sunspots dataset found in R.

import numpy as np
import pandas as pd

from os.path import join
import calendar

from ..compat import DTYPE
from . import _base as base

__all__ = [
    'load_sunspots'
]


[docs]def load_sunspots(as_series=False, dtype=DTYPE): """Monthly Sunspot Numbers, 1749 - 1983 Monthly mean relative sunspot numbers from 1749 to 1983. Collected at Swiss Federal Observatory, Zurich until 1960, then Tokyo Astronomical Observatory. Parameters ---------- as_series : bool, optional (default=False) Whether to return a Pandas series. If True, the index will be set to the observed years/months. If False, will return a 1d numpy array. dtype : type, optional (default=np.float64) The type to return for the array. Default is np.float64, which is used throughout the package as the default type. Notes ----- This is monthly data, so *m* should be set to 12 when using in a seasonal context. Examples -------- >>> from pmdarima.datasets import load_sunspots >>> load_sunspots() array([58. , 62.6, 70. , ..., 55.8, 33.3, 33.4]) >>> load_sunspots(True).head() Jan 1749 58.0 Feb 1749 62.6 Mar 1749 70.0 Apr 1749 55.7 May 1749 85.0 dtype: float64 References ---------- .. [1] https://www.rdocumentation.org/packages/datasets/versions/3.6.1/topics/sunspots Returns ------- rslt : array-like, shape=(n_samples,) The sunspots dataset. There are 2820 observations. """ # noqa: E501 rslt = base._cache.get('sunspots', None) if rslt is None: data_path = join(base.get_data_path(), 'sunspots.txt.gz') rslt = np.loadtxt(data_path).ravel() base._cache['sunspots'] = rslt # don't want to cache type conversion rslt = rslt.astype(dtype) if not as_series: return rslt # Otherwise we want a series and have to cleverly create the index index = [ "%s %i" % (calendar.month_abbr[i + 1], year) for year in range(1749, 1984) for i in range(12) ] return pd.Series(rslt, index=index)