Skip to content

Commit

Permalink
Feat/output chunk length regression model (#761)
Browse files Browse the repository at this point in the history
* fix multicollinearity in regression ensemble model tests causing exploding coefficients

* reset seed to intial value

* add output_chunk_length parameter to regression model

* add output_chunk_length to fit method of regressionmodel

* add check if model support multi output regression natively

* remove _shift_matrices test

* update the LightGBMModel

* update linear regression model

* update random forest regression model

* update LightGBMModel docstring

* use dict for lags in regressionmodel and adjust all models and tests accordingly

* reformat regression_ensemble_model using pre-commit

* reformat test_regression_models with pre-commit

* shorten comment line length

* remove unused import to pass flake8

* reformat with black

* reformat with black

* update docstring of _create_lagged_data

* reformat using black

* improve error message when unable to build any samples to fit and when input_dim doesn't match

* return self at the end of fit() in regressionmodel

* remove numpydoc type hints and add n_jobs_multioutput_wrapper parameter to fit()

* add comments
  • Loading branch information
brunnedu committed Feb 5, 2022
1 parent c2d91e0 commit 0e5b5ad
Show file tree
Hide file tree
Showing 7 changed files with 725 additions and 602 deletions.
18 changes: 11 additions & 7 deletions darts/models/forecasting/gradient_boosted_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
LightGBM Model
--------------
This is a LightGBM implementation of Gradient Boosted Trees algorightm.
This is a LightGBM implementation of Gradient Boosted Trees algorithm.
To enable LightGBM support in Darts, follow the detailed install instructions for LightGBM in the README:
https://github.com/unit8co/darts/blob/master/README.md
Expand All @@ -23,7 +23,8 @@ def __init__(
lags: Union[int, list] = None,
lags_past_covariates: Union[int, List[int]] = None,
lags_future_covariates: Union[Tuple[int, int], List[int]] = None,
**kwargs
output_chunk_length: int = 1,
**kwargs,
):
"""Light Gradient Boosted Model
Expand All @@ -41,6 +42,10 @@ def __init__(
given the last `past` lags in the past are used (inclusive, starting from lag -1) along with the first
`future` future lags (starting from 0 - the prediction time - up to `future - 1` included). Otherwise a list
of integers with lags is required.
output_chunk_length
Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
be useful if the covariates don't extend far enough into the future.
**kwargs
Additional keyword arguments passed to `lightgbm.LGBRegressor`.
"""
Expand All @@ -50,13 +55,12 @@ def __init__(
lags=lags,
lags_past_covariates=lags_past_covariates,
lags_future_covariates=lags_future_covariates,
output_chunk_length=output_chunk_length,
model=lgb.LGBMRegressor(**kwargs),
)

def __str__(self):
return "LGBModel(lags={}, lags_past={}, lags_future={})".format(
self.lags, self.lags_past_covariates, self.lags_future_covariates
)
return f"LGBModel(lags={self.lags})"

def fit(
self,
Expand All @@ -67,7 +71,7 @@ def fit(
val_past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
val_future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
max_samples_per_ts: Optional[int] = None,
**kwargs
**kwargs,
):
"""
Fits/trains the model using the provided list of features time series and the target time series.
Expand Down Expand Up @@ -109,7 +113,7 @@ def fit(
past_covariates=past_covariates,
future_covariates=future_covariates,
max_samples_per_ts=max_samples_per_ts,
**kwargs
**kwargs,
)

return self
13 changes: 7 additions & 6 deletions darts/models/forecasting/linear_regression_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def __init__(
lags: Union[int, list] = None,
lags_past_covariates: Union[int, List[int]] = None,
lags_future_covariates: Union[Tuple[int, int], List[int]] = None,
output_chunk_length: int = 1,
**kwargs,
):
"""Linear regression model.
Expand All @@ -37,6 +38,10 @@ def __init__(
given the last `past` lags in the past are used (inclusive, starting from lag -1) along with the first
`future` future lags (starting from 0 - the prediction time - up to `future - 1` included). Otherwise a list
of integers with lags is required.
output_chunk_length
Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
be useful if the covariates don't extend far enough into the future.
**kwargs
Additional keyword arguments passed to `sklearn.linear_model.LinearRegression`.
"""
Expand All @@ -45,13 +50,9 @@ def __init__(
lags=lags,
lags_past_covariates=lags_past_covariates,
lags_future_covariates=lags_future_covariates,
output_chunk_length=output_chunk_length,
model=LinearRegression(**kwargs),
)

def __str__(self):
return (
f"LinearRegression(lags={self.lags}, "
f"lags_past_covariates={self.lags_past_covariates}, "
f"lags_historical_covariates={self.lags_historical_covariates}, "
f"lags_future_covariates={self.lags_future_covariates})"
)
return f"LinearRegression(lags={self.lags})"
9 changes: 6 additions & 3 deletions darts/models/forecasting/random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def __init__(
lags: Union[int, list] = None,
lags_past_covariates: Union[int, List[int]] = None,
lags_future_covariates: Union[Tuple[int, int], List[int]] = None,
output_chunk_length: int = 1,
n_estimators: Optional[int] = 100,
max_depth: Optional[int] = None,
**kwargs,
Expand All @@ -48,6 +49,10 @@ def __init__(
given the last `past` lags in the past are used (inclusive, starting from lag -1) along with the first
`future` future lags (starting from 0 - the prediction time - up to `future - 1` included). Otherwise a list
of integers with lags is required.
output_chunk_length
Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
be useful if the covariates don't extend far enough into the future.
n_estimators : int
The number of trees in the forest.
max_depth : int
Expand All @@ -66,14 +71,12 @@ def __init__(
lags=lags,
lags_past_covariates=lags_past_covariates,
lags_future_covariates=lags_future_covariates,
output_chunk_length=output_chunk_length,
model=RandomForestRegressor(**kwargs),
)

def __str__(self):
return (
f"RandomForest(lags={self.lags}, "
f"lags_past_covariates={self.lags_past_covariates}, "
f"lags_historical_covariates={self.lags_historical_covariates}, "
f"lags_future_covariates={self.lags_future_covariates}, "
f"n_estimators={self.n_estimators}, max_depth={self.max_depth})"
)
22 changes: 7 additions & 15 deletions darts/models/forecasting/regression_ensemble_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"""
from typing import Optional, List, Union, Sequence, Tuple
from darts.timeseries import TimeSeries
from darts.logging import get_logger, raise_if
from darts.logging import get_logger, raise_if, raise_if_not

from darts.models.forecasting.forecasting_model import (
ForecastingModel,
Expand Down Expand Up @@ -58,20 +58,12 @@ def __init__(
lags_future_covariates=[0], model=regression_model
)

raise_if(
regression_model.lags is not None
and regression_model.lags_historical_covariates is not None
and regression_model.lags_past_covariates is not None
and regression_model.lags_future_covariates != [0],
(
f"`lags`, `lags_historical_covariates` and `lags_past_covariates` "
f"of regression model must be `None` "
f"and `lags_future_covariates` must be [0]. Given:\n"
f"`lags`: {regression_model.lags}, "
f"`lags_historical_covariates`: {regression_model.lags_historical_covariates}, "
f"`lags_past_covariates`: {regression_model.lags} and "
f"`lags_future_covariates`: {regression_model.lags_future_covariates}."
),
# check lags of the regression model
raise_if_not(
regression_model.lags == {"future": [0]},
f"`lags` and `lags_past_covariates` of regression model must be `None`"
f"and `lags_future_covariates` must be [0]. Given:\n"
f"{regression_model.lags}",
)

self.regression_model = regression_model
Expand Down
Loading

0 comments on commit 0e5b5ad

Please sign in to comment.