Feat/output chunk length regression model (#761)

* fix multicollinearity in regression ensemble model tests causing exploding coefficients * reset seed to intial value * add output_chunk_length parameter to regression model * add output_chunk_length to fit method of regressionmodel * add check if model support multi output regression natively * remove _shift_matrices test * update the LightGBMModel * update linear regression model * update random forest regression model * update LightGBMModel docstring * use dict for lags in regressionmodel and adjust all models and tests accordingly * reformat regression_ensemble_model using pre-commit * reformat test_regression_models with pre-commit * shorten comment line length * remove unused import to pass flake8 * reformat with black * reformat with black * update docstring of _create_lagged_data * reformat using black * improve error message when unable to build any samples to fit and when input_dim doesn't match * return self at the end of fit() in regressionmodel * remove numpydoc type hints and add n_jobs_multioutput_wrapper parameter to fit() * add comments
unit8co · Feb 5, 2022 · 0e5b5ad · 0e5b5ad
1 parent c2d91e0
commit 0e5b5ad
Show file tree

Hide file tree

Showing 7 changed files with 725 additions and 602 deletions.
diff --git a/darts/models/forecasting/gradient_boosted_model.py b/darts/models/forecasting/gradient_boosted_model.py
@@ -2,7 +2,7 @@
 LightGBM Model
 --------------
 
-This is a LightGBM implementation of Gradient Boosted Trees algorightm.
+This is a LightGBM implementation of Gradient Boosted Trees algorithm.
 
 To enable LightGBM support in Darts, follow the detailed install instructions for LightGBM in the README:
 https://github.com/unit8co/darts/blob/master/README.md
@@ -23,7 +23,8 @@ def __init__(
         lags: Union[int, list] = None,
         lags_past_covariates: Union[int, List[int]] = None,
         lags_future_covariates: Union[Tuple[int, int], List[int]] = None,
-        **kwargs
+        output_chunk_length: int = 1,
+        **kwargs,
     ):
         """Light Gradient Boosted Model
 
@@ -41,6 +42,10 @@ def __init__(
             given the last `past` lags in the past are used (inclusive, starting from lag -1) along with the first
             `future` future lags (starting from 0 - the prediction time - up to `future - 1` included). Otherwise a list
             of integers with lags is required.
+        output_chunk_length
+            Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
+            horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
+            be useful if the covariates don't extend far enough into the future.
         **kwargs
             Additional keyword arguments passed to `lightgbm.LGBRegressor`.
         """
@@ -50,13 +55,12 @@ def __init__(
             lags=lags,
             lags_past_covariates=lags_past_covariates,
             lags_future_covariates=lags_future_covariates,
+            output_chunk_length=output_chunk_length,
             model=lgb.LGBMRegressor(**kwargs),
         )
 
     def __str__(self):
-        return "LGBModel(lags={}, lags_past={}, lags_future={})".format(
-            self.lags, self.lags_past_covariates, self.lags_future_covariates
-        )
+        return f"LGBModel(lags={self.lags})"
 
     def fit(
         self,
@@ -67,7 +71,7 @@ def fit(
         val_past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
         val_future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
         max_samples_per_ts: Optional[int] = None,
-        **kwargs
+        **kwargs,
     ):
         """
         Fits/trains the model using the provided list of features time series and the target time series.
@@ -109,7 +113,7 @@ def fit(
             past_covariates=past_covariates,
             future_covariates=future_covariates,
             max_samples_per_ts=max_samples_per_ts,
-            **kwargs
+            **kwargs,
         )
 
         return self
diff --git a/darts/models/forecasting/linear_regression_model.py b/darts/models/forecasting/linear_regression_model.py
@@ -19,6 +19,7 @@ def __init__(
         lags: Union[int, list] = None,
         lags_past_covariates: Union[int, List[int]] = None,
         lags_future_covariates: Union[Tuple[int, int], List[int]] = None,
+        output_chunk_length: int = 1,
         **kwargs,
     ):
         """Linear regression model.
@@ -37,6 +38,10 @@ def __init__(
             given the last `past` lags in the past are used (inclusive, starting from lag -1) along with the first
             `future` future lags (starting from 0 - the prediction time - up to `future - 1` included). Otherwise a list
             of integers with lags is required.
+        output_chunk_length
+            Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
+            horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
+            be useful if the covariates don't extend far enough into the future.
         **kwargs
             Additional keyword arguments passed to `sklearn.linear_model.LinearRegression`.
         """
@@ -45,13 +50,9 @@ def __init__(
             lags=lags,
             lags_past_covariates=lags_past_covariates,
             lags_future_covariates=lags_future_covariates,
+            output_chunk_length=output_chunk_length,
             model=LinearRegression(**kwargs),
         )
 
     def __str__(self):
-        return (
-            f"LinearRegression(lags={self.lags}, "
-            f"lags_past_covariates={self.lags_past_covariates}, "
-            f"lags_historical_covariates={self.lags_historical_covariates}, "
-            f"lags_future_covariates={self.lags_future_covariates})"
-        )
+        return f"LinearRegression(lags={self.lags})"
diff --git a/darts/models/forecasting/random_forest.py b/darts/models/forecasting/random_forest.py
@@ -28,6 +28,7 @@ def __init__(
         lags: Union[int, list] = None,
         lags_past_covariates: Union[int, List[int]] = None,
         lags_future_covariates: Union[Tuple[int, int], List[int]] = None,
+        output_chunk_length: int = 1,
         n_estimators: Optional[int] = 100,
         max_depth: Optional[int] = None,
         **kwargs,
@@ -48,6 +49,10 @@ def __init__(
             given the last `past` lags in the past are used (inclusive, starting from lag -1) along with the first
             `future` future lags (starting from 0 - the prediction time - up to `future - 1` included). Otherwise a list
             of integers with lags is required.
+        output_chunk_length
+            Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
+            horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
+            be useful if the covariates don't extend far enough into the future.
         n_estimators : int
             The number of trees in the forest.
         max_depth : int
@@ -66,14 +71,12 @@ def __init__(
             lags=lags,
             lags_past_covariates=lags_past_covariates,
             lags_future_covariates=lags_future_covariates,
+            output_chunk_length=output_chunk_length,
             model=RandomForestRegressor(**kwargs),
         )
 
     def __str__(self):
         return (
             f"RandomForest(lags={self.lags}, "
-            f"lags_past_covariates={self.lags_past_covariates}, "
-            f"lags_historical_covariates={self.lags_historical_covariates}, "
-            f"lags_future_covariates={self.lags_future_covariates}, "
             f"n_estimators={self.n_estimators}, max_depth={self.max_depth})"
         )
diff --git a/darts/models/forecasting/regression_ensemble_model.py b/darts/models/forecasting/regression_ensemble_model.py
@@ -6,7 +6,7 @@
 """
 from typing import Optional, List, Union, Sequence, Tuple
 from darts.timeseries import TimeSeries
-from darts.logging import get_logger, raise_if
+from darts.logging import get_logger, raise_if, raise_if_not
 
 from darts.models.forecasting.forecasting_model import (
     ForecastingModel,
@@ -58,20 +58,12 @@ def __init__(
                 lags_future_covariates=[0], model=regression_model
             )
 
-        raise_if(
-            regression_model.lags is not None
-            and regression_model.lags_historical_covariates is not None
-            and regression_model.lags_past_covariates is not None
-            and regression_model.lags_future_covariates != [0],
-            (
-                f"`lags`, `lags_historical_covariates` and `lags_past_covariates` "
-                f"of regression model must be `None` "
-                f"and `lags_future_covariates` must be [0]. Given:\n"
-                f"`lags`: {regression_model.lags}, "
-                f"`lags_historical_covariates`: {regression_model.lags_historical_covariates}, "
-                f"`lags_past_covariates`: {regression_model.lags} and "
-                f"`lags_future_covariates`: {regression_model.lags_future_covariates}."
-            ),
+        # check lags of the regression model
+        raise_if_not(
+            regression_model.lags == {"future": [0]},
+            f"`lags` and `lags_past_covariates` of regression model must be `None`"
+            f"and `lags_future_covariates` must be [0]. Given:\n"
+            f"{regression_model.lags}",
         )
 
         self.regression_model = regression_model