-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #36 from IBM/time_series
Add timeseries wrapper
- Loading branch information
Showing
4 changed files
with
276 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,185 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding: utf-8 -*- | ||
#----------------------------------------------------------------------------- | ||
# Copyright (c) 2023, IBM Corp. | ||
# All rights reserved. | ||
# | ||
# Distributed under the terms of the BSD Simplified License. | ||
# | ||
# The full license is in the LICENSE file, distributed with this software. | ||
#----------------------------------------------------------------------------- | ||
""" | ||
A time series model is built by analyzing series of timed numeric values, and is | ||
applied immediately for predicting future values. The model itself is stored but | ||
not really needed any more (except for understanding the predicted values). | ||
If specified, a table <outtable> is additionally created with the following columns: | ||
<by>, <time>, forecast, standarderror. The table contains the forecast values for | ||
future time points of the time series identified by <by>. For each prediction, | ||
the standarderror value indicates a confidence interval around the forecast value. | ||
If specified, a table <seasadjtable> is additionally created with the following columns: | ||
<by>, <time>, adjusted. The values in column <target> of the input table are seasonally | ||
adjusted and then copied into this table, with the values of columns <by> and <time> | ||
""" | ||
from typing import List | ||
from nzpyida.frame import IdaDataFrame | ||
from nzpyida.base import IdaDataBase | ||
from nzpyida.analytics.predictive.predictive_modeling import PredictiveModeling | ||
from nzpyida.analytics.utils import call_proc_df_in_out | ||
from nzpyida.analytics.model_manager import ModelManager | ||
|
||
class TimeSeries(PredictiveModeling): | ||
""" | ||
Time Series Model | ||
""" | ||
def __init__(self, idadb: IdaDataBase, model_name: str): | ||
""" | ||
Creates Time Series | ||
""" | ||
super().__init__(idadb, model_name) | ||
self.fit_proc = "TIMESERIES" | ||
self.has_print_proc = True | ||
|
||
def fit_predict(self, in_df: IdaDataFrame, time_column: str, target_column: str, by_column: str=None, | ||
out_table: str=None, description_table: str=None, algorithm: str='ExponentialSmoothing', | ||
interpolation_method: str='linear', from_time=None, to_time=None, forecast_horizon: str=None, | ||
forecast_times: str=None, trend: str=None, seasonality: str=None, period: float=None, | ||
unit: str=None, p: int=None, d: int=None, q: int=None, sp: int=None, sd: int=None, sq: int=None, | ||
saesonally_adjusted_table: str=None ) -> IdaDataFrame: | ||
""" | ||
Predicts future values of series of timed numeric values | ||
Parameters | ||
---------- | ||
in_df : IdaDataFrame | ||
the input data frame | ||
time_column : str | ||
the input data frame column which define an order on the numeric values | ||
target_columns : str | ||
the input data frame column which contains the numeric values | ||
by_column : str | ||
the input data frame column which uniquely identifies a serie of values. | ||
If not specified, all numeric values belong to only one time series. | ||
out_table : str | ||
the output data frmae containing predicted future values. This parameter | ||
is not allowed for algorithm = SpectralAnalysis. If not specified, | ||
no output table is written out | ||
description_table : str | ||
the optional input data frame containing the name and descriptions of the | ||
time series. The table must contain following columns: <by_column>, 'NAME'=str, | ||
'DESCRIPTION'=str. If not specified, the series do not have a name or a description | ||
algorithm : str | ||
the time series algorithm to use. Allowed values are: ExponentialSmoothing, | ||
ARIMA, SeasonalTrendDecomposition, SpectralAnalysis | ||
interpolation_method : str | ||
the interpolation method. Allowed values are: linear, cubicspline, exponentialspline | ||
from_time : same as type of <time column> | ||
the value of column time to start the analysis from. If not specified, the analysis | ||
starts from the first value of the time series in the input table | ||
to_time : same as type of <time column> | ||
the value of column time to stop the analysis at. If not specified, the analysis | ||
stops at the last value of the time series in the input table | ||
forecast_horizon : str | ||
the value of column time until which to predict. This parameter is not allowed for | ||
algorithm=SpectralAnalysis. If not specified, the algorithm determines itself | ||
until which time it predicts values | ||
forecast_times : str | ||
list of semi-column separated values of column time to predict at. This parameter | ||
is not allowed for algorithm=SpectralAnalysis. If not specified, the times to predict | ||
values at is determined by the algorithm | ||
trend : str | ||
the trend type for algorithm=ExponentialSmoothing. Allowed values are: N (none), | ||
A (addditive), DA (damped additive), M (multiplicative), DM (damped multiplicative). | ||
If not specified, the trend type is determined by the algorithm | ||
seasonality : str | ||
the seasonality type for algorithm=ExponentialSmoothing. Allowed values are: N (none), | ||
A (addditive), M (multiplicative). If not specified, the seasonality type is | ||
determined by the algorithm | ||
period : float | ||
the seasonality period. This parameter is not allowed for algorithm=SpectralAnalysis. | ||
If not specified, the seasonality period is determined by the algorithm. If set to 0, | ||
no seasonality period will be considered by the algorithm | ||
unit : str | ||
the seasonality period unit. This parameter is not allowed for algorithm=SpectralAnalysis. | ||
This parameter must be specified if the parameter period is specified and the <time_column> | ||
is of type date, time or timestamp. Otherwise, it must not be spe- cified. Allowed values are: | ||
ms, s, min, h, d, wk, qtr, q, a, y | ||
p : int | ||
the parameter p for algorithm=ARIMA, either equal to or below specified value. | ||
If not specified, the algorithm will determine its best value automatically | ||
d : int | ||
the parameter d for algorithm=ARIMA, either equal to or below specified value. | ||
If not specified, the algorithm will determine its best value automatically | ||
q : int | ||
the parameter q for algorithm=ARIMA, either equal to or below specified value. | ||
If not specified, the algorithm will determine its best value automatically | ||
sp : int | ||
the seasonal parameter SP for algorithm=ARIMA, either equal to or below specified value. | ||
If not specified, the algorithm will determine its best value automatically | ||
sd : int | ||
the seasonal parameter SD for algorithm=ARIMA, either equal to or below specified value. | ||
If not specified, the algorithm will determine its best value automatically | ||
sq : int | ||
the seasonal parameter SQ for algorithm=ARIMA, either equal to or below specified value. | ||
If not specified, the algorithm will determine its best value automatically | ||
saesonally_adjusted_table : str | ||
the output table containing seasonally adjusted values. This parameter is not allowed | ||
for algorithm=SpectralAnalysis or algorithm=ARIMA. If not specified, no output table | ||
is written out | ||
""" | ||
|
||
params = { | ||
'model': self.model_name, | ||
'time': time_column, | ||
'target': target_column, | ||
'by': by_column, | ||
'desctable': description_table, | ||
'algorithm': algorithm, | ||
'interpolationmethod': interpolation_method, | ||
'from': from_time, | ||
'to': to_time, | ||
'forecasthorizon': forecast_horizon, | ||
'forecasttimes': forecast_times, | ||
'trend': trend, | ||
'seasonality': seasonality, | ||
'period': period, | ||
'unit': unit, | ||
'p': p, | ||
'd': d, | ||
'q': q, | ||
'SP': sp, | ||
'SD': sd, | ||
'SQ': sq, | ||
'seasadjtable': saesonally_adjusted_table, | ||
} | ||
|
||
if not isinstance(in_df, IdaDataFrame): | ||
raise TypeError("Argument in_df should be an IdaDataFrame") | ||
|
||
ModelManager(self.idadb).drop_model(self.model_name) | ||
|
||
return call_proc_df_in_out(proc=self.fit_proc, in_df=in_df, params=params, | ||
out_table=out_table)[0] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding: utf-8 -*- | ||
#----------------------------------------------------------------------------- | ||
# Copyright (c) 2023, IBM Corp. | ||
# All rights reserved. | ||
# | ||
# Distributed under the terms of the BSD Simplified License. | ||
# | ||
# The full license is in the LICENSE file, distributed with this software. | ||
#----------------------------------------------------------------------------- | ||
|
||
from nzpyida.analytics.predictive.timeseries import TimeSeries | ||
from nzpyida.base import IdaDataBase | ||
from nzpyida.frame import IdaDataFrame | ||
from nzpyida.analytics.model_manager import ModelManager | ||
import pytest | ||
from nzpyida.analytics.tests.conftest import MOD_NAME, TAB_NAME_TRAIN, OUT_TABLE_PRED | ||
import pandas as pd | ||
from math import sin | ||
|
||
|
||
@pytest.fixture(scope='module') | ||
def mm(idadb: IdaDataBase): | ||
return ModelManager(idadb) | ||
|
||
@pytest.fixture | ||
def clean_up(idadb, mm): | ||
if mm.model_exists(MOD_NAME): | ||
mm.drop_model(MOD_NAME) | ||
if idadb.exists_table(OUT_TABLE_PRED): | ||
idadb.drop_table(OUT_TABLE_PRED) | ||
yield | ||
if mm.model_exists(MOD_NAME): | ||
mm.drop_model(MOD_NAME) | ||
if idadb.exists_table(OUT_TABLE_PRED): | ||
idadb.drop_table(OUT_TABLE_PRED) | ||
|
||
|
||
@pytest.fixture | ||
def idf_train(idadb: IdaDataBase): | ||
if idadb.exists_table(TAB_NAME_TRAIN): | ||
idadb.drop_table(TAB_NAME_TRAIN) | ||
|
||
time_series = [sin(x)+x for x in range(200)] | ||
df = pd.DataFrame.from_dict({ | ||
"TIME": range(200), | ||
"VALUE": time_series | ||
}) | ||
yield idadb.as_idadataframe(df, TAB_NAME_TRAIN) | ||
|
||
if idadb.exists_table(TAB_NAME_TRAIN): | ||
idadb.drop_table(TAB_NAME_TRAIN) | ||
|
||
|
||
def test_timeseries(idadb: IdaDataBase, mm: ModelManager, idf_train: IdaDataFrame, clean_up): | ||
model = TimeSeries(idadb, MOD_NAME) | ||
assert model | ||
assert not mm.model_exists(MOD_NAME) | ||
|
||
outtab = model.fit_predict(idf_train, time_column="TIME", target_column="VALUE", out_table=OUT_TABLE_PRED, | ||
forecast_horizon='399') | ||
|
||
assert mm.model_exists(MOD_NAME) | ||
assert outtab | ||
assert len(outtab) == 200 | ||
assert round(outtab.head(10).iloc[-1]["VALUE"]) == round(sin(210)+210) | ||
assert round(outtab.tail().iloc[-1]["VALUE"]) == round(sin(399)+399) |