Skip to content

Commit

Permalink
Merge pull request #36 from IBM/time_series
Browse files Browse the repository at this point in the history
Add timeseries wrapper
  • Loading branch information
mplpl authored May 16, 2023
2 parents a1279c6 + e61acb9 commit 1ecad40
Show file tree
Hide file tree
Showing 4 changed files with 276 additions and 15 deletions.
38 changes: 23 additions & 15 deletions docs/source/predictive.rst
Original file line number Diff line number Diff line change
Expand Up @@ -57,42 +57,50 @@ Association Rules
:undoc-members:
:show-inheritance:

Classification base module
--------------------------------------------------
Bisecting KMeans
-----------------------------------------------------

.. automodule:: nzpyida.analytics.predictive.classification
.. automodule:: nzpyida.analytics.predictive.bisecting_kmeans
:members:
:undoc-members:
:show-inheritance:

Regression base module
----------------------------------------------
Two Step Clustering
-----------------------------------------------------

.. automodule:: nzpyida.analytics.predictive.regression
.. automodule:: nzpyida.analytics.predictive.two_step_clustering
:members:
:undoc-members:
:show-inheritance:

Predictive Modeling base module
--------------------------------------------------------
Time Series Forecasting
-----------------------------------------------------

.. automodule:: nzpyida.analytics.predictive.predictive_modeling
.. automodule:: nzpyida.analytics.predictive.timeseries
:members:
:undoc-members:
:show-inheritance:

Bisecting KMeans
-----------------------------------------------------
Classification base module
--------------------------------------------------

.. automodule:: nzpyida.analytics.predictive.bisecting_kmeans
.. automodule:: nzpyida.analytics.predictive.classification
:members:
:undoc-members:
:show-inheritance:

Two Step Clustering
-----------------------------------------------------
Regression base module
----------------------------------------------

.. automodule:: nzpyida.analytics.predictive.two_step_clustering
.. automodule:: nzpyida.analytics.predictive.regression
:members:
:undoc-members:
:show-inheritance:

Predictive Modeling base module
--------------------------------------------------------

.. automodule:: nzpyida.analytics.predictive.predictive_modeling
:members:
:undoc-members:
:show-inheritance:
1 change: 1 addition & 0 deletions nzpyida/analytics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from .predictive.bisecting_kmeans import BisectingKMeans
from .predictive.regression_trees import DecisionTreeRegressor
from .predictive.two_step_clustering import TwoStepClustering
from .predictive.timeseries import TimeSeries
from .exploration.distribution import bitable, moments, histogram, outliers
from .exploration.distribution import quantile, unitable
from .transform.discretization import EFDisc, EMDisc, EWDisc
Expand Down
185 changes: 185 additions & 0 deletions nzpyida/analytics/predictive/timeseries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#-----------------------------------------------------------------------------
# Copyright (c) 2023, IBM Corp.
# All rights reserved.
#
# Distributed under the terms of the BSD Simplified License.
#
# The full license is in the LICENSE file, distributed with this software.
#-----------------------------------------------------------------------------
"""
A time series model is built by analyzing series of timed numeric values, and is
applied immediately for predicting future values. The model itself is stored but
not really needed any more (except for understanding the predicted values).
If specified, a table <outtable> is additionally created with the following columns:
<by>, <time>, forecast, standarderror. The table contains the forecast values for
future time points of the time series identified by <by>. For each prediction,
the standarderror value indicates a confidence interval around the forecast value.
If specified, a table <seasadjtable> is additionally created with the following columns:
<by>, <time>, adjusted. The values in column <target> of the input table are seasonally
adjusted and then copied into this table, with the values of columns <by> and <time>
"""
from typing import List
from nzpyida.frame import IdaDataFrame
from nzpyida.base import IdaDataBase
from nzpyida.analytics.predictive.predictive_modeling import PredictiveModeling
from nzpyida.analytics.utils import call_proc_df_in_out
from nzpyida.analytics.model_manager import ModelManager

class TimeSeries(PredictiveModeling):
"""
Time Series Model
"""
def __init__(self, idadb: IdaDataBase, model_name: str):
"""
Creates Time Series
"""
super().__init__(idadb, model_name)
self.fit_proc = "TIMESERIES"
self.has_print_proc = True

def fit_predict(self, in_df: IdaDataFrame, time_column: str, target_column: str, by_column: str=None,
out_table: str=None, description_table: str=None, algorithm: str='ExponentialSmoothing',
interpolation_method: str='linear', from_time=None, to_time=None, forecast_horizon: str=None,
forecast_times: str=None, trend: str=None, seasonality: str=None, period: float=None,
unit: str=None, p: int=None, d: int=None, q: int=None, sp: int=None, sd: int=None, sq: int=None,
saesonally_adjusted_table: str=None ) -> IdaDataFrame:
"""
Predicts future values of series of timed numeric values
Parameters
----------
in_df : IdaDataFrame
the input data frame
time_column : str
the input data frame column which define an order on the numeric values
target_columns : str
the input data frame column which contains the numeric values
by_column : str
the input data frame column which uniquely identifies a serie of values.
If not specified, all numeric values belong to only one time series.
out_table : str
the output data frmae containing predicted future values. This parameter
is not allowed for algorithm = SpectralAnalysis. If not specified,
no output table is written out
description_table : str
the optional input data frame containing the name and descriptions of the
time series. The table must contain following columns: <by_column>, 'NAME'=str,
'DESCRIPTION'=str. If not specified, the series do not have a name or a description
algorithm : str
the time series algorithm to use. Allowed values are: ExponentialSmoothing,
ARIMA, SeasonalTrendDecomposition, SpectralAnalysis
interpolation_method : str
the interpolation method. Allowed values are: linear, cubicspline, exponentialspline
from_time : same as type of <time column>
the value of column time to start the analysis from. If not specified, the analysis
starts from the first value of the time series in the input table
to_time : same as type of <time column>
the value of column time to stop the analysis at. If not specified, the analysis
stops at the last value of the time series in the input table
forecast_horizon : str
the value of column time until which to predict. This parameter is not allowed for
algorithm=SpectralAnalysis. If not specified, the algorithm determines itself
until which time it predicts values
forecast_times : str
list of semi-column separated values of column time to predict at. This parameter
is not allowed for algorithm=SpectralAnalysis. If not specified, the times to predict
values at is determined by the algorithm
trend : str
the trend type for algorithm=ExponentialSmoothing. Allowed values are: N (none),
A (addditive), DA (damped additive), M (multiplicative), DM (damped multiplicative).
If not specified, the trend type is determined by the algorithm
seasonality : str
the seasonality type for algorithm=ExponentialSmoothing. Allowed values are: N (none),
A (addditive), M (multiplicative). If not specified, the seasonality type is
determined by the algorithm
period : float
the seasonality period. This parameter is not allowed for algorithm=SpectralAnalysis.
If not specified, the seasonality period is determined by the algorithm. If set to 0,
no seasonality period will be considered by the algorithm
unit : str
the seasonality period unit. This parameter is not allowed for algorithm=SpectralAnalysis.
This parameter must be specified if the parameter period is specified and the <time_column>
is of type date, time or timestamp. Otherwise, it must not be spe- cified. Allowed values are:
ms, s, min, h, d, wk, qtr, q, a, y
p : int
the parameter p for algorithm=ARIMA, either equal to or below specified value.
If not specified, the algorithm will determine its best value automatically
d : int
the parameter d for algorithm=ARIMA, either equal to or below specified value.
If not specified, the algorithm will determine its best value automatically
q : int
the parameter q for algorithm=ARIMA, either equal to or below specified value.
If not specified, the algorithm will determine its best value automatically
sp : int
the seasonal parameter SP for algorithm=ARIMA, either equal to or below specified value.
If not specified, the algorithm will determine its best value automatically
sd : int
the seasonal parameter SD for algorithm=ARIMA, either equal to or below specified value.
If not specified, the algorithm will determine its best value automatically
sq : int
the seasonal parameter SQ for algorithm=ARIMA, either equal to or below specified value.
If not specified, the algorithm will determine its best value automatically
saesonally_adjusted_table : str
the output table containing seasonally adjusted values. This parameter is not allowed
for algorithm=SpectralAnalysis or algorithm=ARIMA. If not specified, no output table
is written out
"""

params = {
'model': self.model_name,
'time': time_column,
'target': target_column,
'by': by_column,
'desctable': description_table,
'algorithm': algorithm,
'interpolationmethod': interpolation_method,
'from': from_time,
'to': to_time,
'forecasthorizon': forecast_horizon,
'forecasttimes': forecast_times,
'trend': trend,
'seasonality': seasonality,
'period': period,
'unit': unit,
'p': p,
'd': d,
'q': q,
'SP': sp,
'SD': sd,
'SQ': sq,
'seasadjtable': saesonally_adjusted_table,
}

if not isinstance(in_df, IdaDataFrame):
raise TypeError("Argument in_df should be an IdaDataFrame")

ModelManager(self.idadb).drop_model(self.model_name)

return call_proc_df_in_out(proc=self.fit_proc, in_df=in_df, params=params,
out_table=out_table)[0]
67 changes: 67 additions & 0 deletions nzpyida/analytics/tests/test_timeseries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#-----------------------------------------------------------------------------
# Copyright (c) 2023, IBM Corp.
# All rights reserved.
#
# Distributed under the terms of the BSD Simplified License.
#
# The full license is in the LICENSE file, distributed with this software.
#-----------------------------------------------------------------------------

from nzpyida.analytics.predictive.timeseries import TimeSeries
from nzpyida.base import IdaDataBase
from nzpyida.frame import IdaDataFrame
from nzpyida.analytics.model_manager import ModelManager
import pytest
from nzpyida.analytics.tests.conftest import MOD_NAME, TAB_NAME_TRAIN, OUT_TABLE_PRED
import pandas as pd
from math import sin


@pytest.fixture(scope='module')
def mm(idadb: IdaDataBase):
return ModelManager(idadb)

@pytest.fixture
def clean_up(idadb, mm):
if mm.model_exists(MOD_NAME):
mm.drop_model(MOD_NAME)
if idadb.exists_table(OUT_TABLE_PRED):
idadb.drop_table(OUT_TABLE_PRED)
yield
if mm.model_exists(MOD_NAME):
mm.drop_model(MOD_NAME)
if idadb.exists_table(OUT_TABLE_PRED):
idadb.drop_table(OUT_TABLE_PRED)


@pytest.fixture
def idf_train(idadb: IdaDataBase):
if idadb.exists_table(TAB_NAME_TRAIN):
idadb.drop_table(TAB_NAME_TRAIN)

time_series = [sin(x)+x for x in range(200)]
df = pd.DataFrame.from_dict({
"TIME": range(200),
"VALUE": time_series
})
yield idadb.as_idadataframe(df, TAB_NAME_TRAIN)

if idadb.exists_table(TAB_NAME_TRAIN):
idadb.drop_table(TAB_NAME_TRAIN)


def test_timeseries(idadb: IdaDataBase, mm: ModelManager, idf_train: IdaDataFrame, clean_up):
model = TimeSeries(idadb, MOD_NAME)
assert model
assert not mm.model_exists(MOD_NAME)

outtab = model.fit_predict(idf_train, time_column="TIME", target_column="VALUE", out_table=OUT_TABLE_PRED,
forecast_horizon='399')

assert mm.model_exists(MOD_NAME)
assert outtab
assert len(outtab) == 200
assert round(outtab.head(10).iloc[-1]["VALUE"]) == round(sin(210)+210)
assert round(outtab.tail().iloc[-1]["VALUE"]) == round(sin(399)+399)

0 comments on commit 1ecad40

Please sign in to comment.