From 1cd58b65adca63c5b37300e98e91179f4fb22ac4 Mon Sep 17 00:00:00 2001 From: Joey Ortiz Date: Tue, 20 Feb 2024 10:28:19 -0800 Subject: [PATCH] Adding NormalFixedMean (#333) * Implemented NormalFixedMean * Added NormalFixedVar and NormalFixedMean to test_distns.py * Match log(sigma) parameterization of Normal class * Update __init__.py Linter is demanding a blank line for ending a file (sigh) --- ngboost/distns/__init__.py | 3 +- ngboost/distns/normal.py | 62 ++++++++++++++++++++++++++++++++++++++ tests/test_distns.py | 15 ++++++++- 3 files changed, 78 insertions(+), 2 deletions(-) diff --git a/ngboost/distns/__init__.py b/ngboost/distns/__init__.py index 30b20d4..05e9412 100644 --- a/ngboost/distns/__init__.py +++ b/ngboost/distns/__init__.py @@ -7,7 +7,7 @@ from .laplace import Laplace from .lognormal import LogNormal from .multivariate_normal import MultivariateNormal -from .normal import Normal, NormalFixedVar +from .normal import Normal, NormalFixedMean, NormalFixedVar from .poisson import Poisson from .t import T, TFixedDf, TFixedDfFixedVar @@ -24,6 +24,7 @@ "LogNormal", "MultivariateNormal", "Normal", + "NormalFixedMean", "NormalFixedVar", "Poisson", "T", diff --git a/ngboost/distns/normal.py b/ngboost/distns/normal.py index 62f7210..35bbdfd 100644 --- a/ngboost/distns/normal.py +++ b/ngboost/distns/normal.py @@ -150,3 +150,65 @@ def __init__(self, params): def fit(Y): m, _ = sp.stats.norm.fit(Y) return m + + +# ### Fixed Mean Normal ### +class NormalFixedMeanLogScore(LogScore): + def score(self, Y): + return -self.dist.logpdf(Y) + + def d_score(self, Y): + D = np.zeros((len(Y), 1)) + D[:, 0] = 1 - ((self.loc - Y) ** 2) / self.var + return D + + def metric(self): + FI = np.zeros((self.var.shape[0], 1, 1)) + FI[:, 0, 0] = 2 + return FI + + +class NormalFixedMeanCRPScore(CRPScore): + def score(self, Y): + Z = (Y - self.loc) / self.scale + return self.scale * ( + Z * (2 * sp.stats.norm.cdf(Z) - 1) + + 2 * sp.stats.norm.pdf(Z) + - 1 / np.sqrt(np.pi) + ) + + def d_score(self, Y): + Z = (Y - self.loc) / self.scale + D = np.zeros((len(Y), 1)) + D[:, 0] = self.score(Y) + (Y - self.loc) * -1 * (2 * sp.stats.norm.cdf(Z) - 1) + return D + + def metric(self): + I = np.c_[self.var] + I = I.reshape((self.var.shape[0], 1, 1)) + I = 1 / (2 * np.sqrt(np.pi)) * I + return I + + +class NormalFixedMean(Normal): + """ + Implements the normal distribution with mean=0 for NGBoost. + + The fixed-mean normal distribution has one parameter, scale which is the standard deviation. + This distribution has both LogScore and CRPScore implemented for it. + """ + + n_params = 1 + scores = [NormalFixedMeanLogScore, NormalFixedMeanCRPScore] + + # pylint: disable=super-init-not-called + def __init__(self, params): + self.loc = np.zeros_like(params[0]) + self.scale = np.exp(params[0]) + self.var = self.scale**2 + self.shape = self.loc.shape + self.dist = dist(loc=self.loc, scale=self.scale) + + def fit(Y): + _, s = sp.stats.norm.fit(Y) + return s diff --git a/tests/test_distns.py b/tests/test_distns.py index 46a9309..3aff1b9 100644 --- a/tests/test_distns.py +++ b/tests/test_distns.py @@ -16,6 +16,8 @@ LogNormal, MultivariateNormal, Normal, + NormalFixedMean, + NormalFixedVar, T, TFixedDf, TFixedDfFixedVar, @@ -61,7 +63,18 @@ def is_t_distribution( @pytest.mark.slow @pytest.mark.parametrize( "dist", - [Normal, LogNormal, Exponential, Gamma, T, TFixedDf, TFixedDfFixedVar, Cauchy], + [ + Normal, + NormalFixedVar, + NormalFixedMean, + LogNormal, + Exponential, + Gamma, + T, + TFixedDf, + TFixedDfFixedVar, + Cauchy, + ], ) @pytest.mark.parametrize( "learner",