Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[minor] Memory efficient float32 types instead of float64 types #1402

Merged
merged 13 commits into from
Aug 21, 2023
12 changes: 7 additions & 5 deletions neuralprophet/data/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def _reshape_raw_predictions_to_forecst_df(
forecast = predicted[:, forecast_lag - 1, j]
pad_before = max_lags + forecast_lag - 1
pad_after = n_forecasts - forecast_lag
yhat = np.concatenate(([np.NaN] * pad_before, forecast, [np.NaN] * pad_after))
yhat = np.pad(forecast, (pad_before, pad_after), mode="constant", constant_values=np.NaN)
if prediction_frequency is not None:
ds = df_forecast["ds"].iloc[pad_before : -pad_after if pad_after > 0 else None]
mask = df_utils.create_mask_for_prediction_frequency(
Expand All @@ -86,7 +86,7 @@ def _reshape_raw_predictions_to_forecst_df(
)
yhat = np.full((len(ds),), np.nan)
yhat[mask] = forecast
yhat = np.concatenate(([np.NaN] * pad_before, yhat, [np.NaN] * pad_after))
yhat = np.pad(yhat, (pad_before, pad_after), mode="constant", constant_values=np.NaN)
# 0 is the median quantile index
if j == 0:
name = f"yhat{forecast_lag}"
Expand All @@ -111,7 +111,7 @@ def _reshape_raw_predictions_to_forecst_df(
forecast = components[comp][:, forecast_lag - 1, j] # 0 is the median quantile
pad_before = max_lags + forecast_lag - 1
pad_after = n_forecasts - forecast_lag
yhat = np.concatenate(([np.NaN] * pad_before, forecast, [np.NaN] * pad_after))
yhat = np.pad(forecast, (pad_before, pad_after), mode="constant", constant_values=np.NaN)
if prediction_frequency is not None:
ds = df_forecast["ds"].iloc[pad_before : -pad_after if pad_after > 0 else None]
mask = df_utils.create_mask_for_prediction_frequency(
Expand All @@ -121,7 +121,7 @@ def _reshape_raw_predictions_to_forecst_df(
)
yhat = np.full((len(ds),), np.nan)
yhat[mask] = forecast
yhat = np.concatenate(([np.NaN] * pad_before, yhat, [np.NaN] * pad_after))
yhat = np.pad(yhat, (pad_before, pad_after), mode="constant", constant_values=np.NaN)
if j == 0: # temporary condition to add only the median component
name = f"{comp}{forecast_lag}"
df_forecast[name] = yhat
Expand All @@ -132,7 +132,9 @@ def _reshape_raw_predictions_to_forecst_df(
for j in range(len(quantiles)):
forecast_0 = components[comp][0, :, j]
forecast_rest = components[comp][1:, n_forecasts - 1, j]
yhat = np.concatenate(([np.NaN] * max_lags, forecast_0, forecast_rest))
yhat = np.pad(
np.concatenate((forecast_0, forecast_rest)), (max_lags, 0), mode="constant", constant_values=np.NaN
)
if prediction_frequency is not None:
date_list = []
for key, value in prediction_frequency.items():
Expand Down
4 changes: 1 addition & 3 deletions neuralprophet/df_utils.py
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line is unnecessary as they will be converted to datetime the line below

Original file line number Diff line number Diff line change
Expand Up @@ -459,8 +459,6 @@ def check_dataframe(
raise ValueError("Dataframe must have columns 'ds' with the dates.")
if df["ds"].isnull().any():
raise ValueError("Found NaN in column ds.")
if df["ds"].dtype == np.int64:
df["ds"] = df.loc[:, "ds"].astype(str)
if not np.issubdtype(df["ds"].to_numpy().dtype, np.datetime64):
df["ds"] = pd.to_datetime(df.loc[:, "ds"], utc=True).dt.tz_convert(None)
if df.groupby("ID").apply(lambda x: x.duplicated("ds").any()).any():
Expand Down Expand Up @@ -1022,7 +1020,7 @@ def convert_events_to_features(df, config_events: ConfigEvents, events_df):
"""

for event in config_events.keys():
event_feature = pd.Series([0.0] * df.shape[0])
event_feature = pd.Series(0, index=range(df.shape[0]), dtype="float32")
# events_df may be None in case ID from original df is not provided in events df
if events_df is None:
dates = None
Expand Down
18 changes: 13 additions & 5 deletions neuralprophet/time_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,17 @@
if key in self.two_level_inputs:
self.inputs[key] = OrderedDict({})
for name, features in data.items():
self.inputs[key][name] = torch.from_numpy(features.astype(float)).type(inputs_dtype[key])
if features.dtype != np.float32:
features = features.astype(np.float32, copy=False)

tensor = torch.from_numpy(features)

if tensor.dtype != inputs_dtype[key]:
self.inputs[key][name] = tensor.to(

Check warning on line 145 in neuralprophet/time_dataset.py

View check run for this annotation

Codecov / codecov/patch

neuralprophet/time_dataset.py#L145

Added line #L145 was not covered by tests
dtype=inputs_dtype[key]
) # this can probably be removed, but was included in the previous code
else:
self.inputs[key][name] = tensor
else:
if key == "timestamps":
self.inputs[key] = data
Expand Down Expand Up @@ -335,7 +345,7 @@
series = df.loc[:, df_col_name].values
# Added dtype=np.float64 to solve the problem with np.isnan for ubuntu test
return np.array(
[series[i + max_lags - feature_dims : i + max_lags] for i in range(n_samples)], dtype=np.float64
[series[i + max_lags - feature_dims : i + max_lags] for i in range(n_samples)], dtype=np.float32
)

def _stride_timestamps_for_forecasts(x):
Expand Down Expand Up @@ -488,7 +498,7 @@
Matrix with seasonality features
"""
# convert to days since epoch
t = np.array((dates - datetime(1970, 1, 1)).dt.total_seconds().astype(float)) / (3600 * 24.0)
t = np.array((dates - datetime(1970, 1, 1)).dt.total_seconds().astype(np.float32)) / (3600 * 24.0)
return fourier_series_t(t, period, series_order)


Expand Down Expand Up @@ -602,8 +612,6 @@
# create all user specified events
if config_events is not None:
for event, configs in config_events.items():
if event not in df.columns:
df[event] = np.zeros_like(df["ds"], dtype=np.float64)
feature = df[event]
_create_event_offset_features(event, configs, feature, additive_events, multiplicative_events)

Expand Down
14 changes: 14 additions & 0 deletions tests/test_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -962,3 +962,17 @@ def test_multiple_countries():
assert "Christmas Day" not in holiday_names
assert "Erster Weihnachtstag" in holiday_names
assert "Neujahr" in holiday_names


def test_float32_inputs():
# test if float32 inputs are forecasted as float32 outputs
df = pd.read_csv(PEYTON_FILE, nrows=NROWS)
df["y"] = df["y"].astype(np.float32)
m = NeuralProphet(
epochs=EPOCHS,
batch_size=BATCH_SIZE,
learning_rate=LR,
)
m.fit(df, freq="D")
forecast = m.predict(df)
assert forecast["yhat1"].dtype == np.float32
Loading