diff --git a/neuralprophet/data/process.py b/neuralprophet/data/process.py index 961eff4ad..9f8861016 100644 --- a/neuralprophet/data/process.py +++ b/neuralprophet/data/process.py @@ -76,7 +76,7 @@ def _reshape_raw_predictions_to_forecst_df( forecast = predicted[:, forecast_lag - 1, j] pad_before = max_lags + forecast_lag - 1 pad_after = n_forecasts - forecast_lag - yhat = np.concatenate(([np.NaN] * pad_before, forecast, [np.NaN] * pad_after)) + yhat = np.pad(forecast, (pad_before, pad_after), mode="constant", constant_values=np.NaN) if prediction_frequency is not None: ds = df_forecast["ds"].iloc[pad_before : -pad_after if pad_after > 0 else None] mask = df_utils.create_mask_for_prediction_frequency( @@ -86,7 +86,7 @@ def _reshape_raw_predictions_to_forecst_df( ) yhat = np.full((len(ds),), np.nan) yhat[mask] = forecast - yhat = np.concatenate(([np.NaN] * pad_before, yhat, [np.NaN] * pad_after)) + yhat = np.pad(yhat, (pad_before, pad_after), mode="constant", constant_values=np.NaN) # 0 is the median quantile index if j == 0: name = f"yhat{forecast_lag}" @@ -111,7 +111,7 @@ def _reshape_raw_predictions_to_forecst_df( forecast = components[comp][:, forecast_lag - 1, j] # 0 is the median quantile pad_before = max_lags + forecast_lag - 1 pad_after = n_forecasts - forecast_lag - yhat = np.concatenate(([np.NaN] * pad_before, forecast, [np.NaN] * pad_after)) + yhat = np.pad(forecast, (pad_before, pad_after), mode="constant", constant_values=np.NaN) if prediction_frequency is not None: ds = df_forecast["ds"].iloc[pad_before : -pad_after if pad_after > 0 else None] mask = df_utils.create_mask_for_prediction_frequency( @@ -121,7 +121,7 @@ def _reshape_raw_predictions_to_forecst_df( ) yhat = np.full((len(ds),), np.nan) yhat[mask] = forecast - yhat = np.concatenate(([np.NaN] * pad_before, yhat, [np.NaN] * pad_after)) + yhat = np.pad(yhat, (pad_before, pad_after), mode="constant", constant_values=np.NaN) if j == 0: # temporary condition to add only the median component name = f"{comp}{forecast_lag}" df_forecast[name] = yhat @@ -132,7 +132,9 @@ def _reshape_raw_predictions_to_forecst_df( for j in range(len(quantiles)): forecast_0 = components[comp][0, :, j] forecast_rest = components[comp][1:, n_forecasts - 1, j] - yhat = np.concatenate(([np.NaN] * max_lags, forecast_0, forecast_rest)) + yhat = np.pad( + np.concatenate((forecast_0, forecast_rest)), (max_lags, 0), mode="constant", constant_values=np.NaN + ) if prediction_frequency is not None: date_list = [] for key, value in prediction_frequency.items(): diff --git a/neuralprophet/df_utils.py b/neuralprophet/df_utils.py index e51cbe745..322b41be0 100644 --- a/neuralprophet/df_utils.py +++ b/neuralprophet/df_utils.py @@ -459,8 +459,6 @@ def check_dataframe( raise ValueError("Dataframe must have columns 'ds' with the dates.") if df["ds"].isnull().any(): raise ValueError("Found NaN in column ds.") - if df["ds"].dtype == np.int64: - df["ds"] = df.loc[:, "ds"].astype(str) if not np.issubdtype(df["ds"].to_numpy().dtype, np.datetime64): df["ds"] = pd.to_datetime(df.loc[:, "ds"], utc=True).dt.tz_convert(None) if df.groupby("ID").apply(lambda x: x.duplicated("ds").any()).any(): @@ -1022,7 +1020,7 @@ def convert_events_to_features(df, config_events: ConfigEvents, events_df): """ for event in config_events.keys(): - event_feature = pd.Series([0.0] * df.shape[0]) + event_feature = pd.Series(0, index=range(df.shape[0]), dtype="float32") # events_df may be None in case ID from original df is not provided in events df if events_df is None: dates = None diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py index b3bcfe320..1331dcae3 100644 --- a/neuralprophet/time_dataset.py +++ b/neuralprophet/time_dataset.py @@ -136,7 +136,17 @@ def init_after_tabularized(self, inputs, targets=None): if key in self.two_level_inputs: self.inputs[key] = OrderedDict({}) for name, features in data.items(): - self.inputs[key][name] = torch.from_numpy(features.astype(float)).type(inputs_dtype[key]) + if features.dtype != np.float32: + features = features.astype(np.float32, copy=False) + + tensor = torch.from_numpy(features) + + if tensor.dtype != inputs_dtype[key]: + self.inputs[key][name] = tensor.to( + dtype=inputs_dtype[key] + ) # this can probably be removed, but was included in the previous code + else: + self.inputs[key][name] = tensor else: if key == "timestamps": self.inputs[key] = data @@ -335,7 +345,7 @@ def _stride_lagged_features(df_col_name, feature_dims): series = df.loc[:, df_col_name].values # Added dtype=np.float64 to solve the problem with np.isnan for ubuntu test return np.array( - [series[i + max_lags - feature_dims : i + max_lags] for i in range(n_samples)], dtype=np.float64 + [series[i + max_lags - feature_dims : i + max_lags] for i in range(n_samples)], dtype=np.float32 ) def _stride_timestamps_for_forecasts(x): @@ -488,7 +498,7 @@ def fourier_series(dates, period, series_order): Matrix with seasonality features """ # convert to days since epoch - t = np.array((dates - datetime(1970, 1, 1)).dt.total_seconds().astype(float)) / (3600 * 24.0) + t = np.array((dates - datetime(1970, 1, 1)).dt.total_seconds().astype(np.float32)) / (3600 * 24.0) return fourier_series_t(t, period, series_order) @@ -602,8 +612,6 @@ def make_events_features(df, config_events: Optional[configure.ConfigEvents] = N # create all user specified events if config_events is not None: for event, configs in config_events.items(): - if event not in df.columns: - df[event] = np.zeros_like(df["ds"], dtype=np.float64) feature = df[event] _create_event_offset_features(event, configs, feature, additive_events, multiplicative_events) diff --git a/tests/test_unit.py b/tests/test_unit.py index 6fc4a8913..37ec6961c 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -962,3 +962,17 @@ def test_multiple_countries(): assert "Christmas Day" not in holiday_names assert "Erster Weihnachtstag" in holiday_names assert "Neujahr" in holiday_names + + +def test_float32_inputs(): + # test if float32 inputs are forecasted as float32 outputs + df = pd.read_csv(PEYTON_FILE, nrows=NROWS) + df["y"] = df["y"].astype(np.float32) + m = NeuralProphet( + epochs=EPOCHS, + batch_size=BATCH_SIZE, + learning_rate=LR, + ) + m.fit(df, freq="D") + forecast = m.predict(df) + assert forecast["yhat1"].dtype == np.float32