Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add spline variable to var_builder #15

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.4.2
hooks:
- id: ruff
args: [ --fix ]
- id: ruff-format
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.10.0
hooks:
- id: mypy
files: ^src
3 changes: 2 additions & 1 deletion src/spxmod/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,9 @@ def __init__(
data=dict(col_obs=obs, col_weights=weights),
variables=[],
linear_gpriors=[],
param_specs=param_specs or {},
)
if param_specs is not None:
self.core_config.update(param_specs)
self.spaces = spaces
self.var_builders = var_builders

Expand Down
216 changes: 155 additions & 61 deletions src/spxmod/regmod_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,19 @@
from msca.optim.solver import IPSolver, NTCGSolver
from regmod.data import Data
from regmod.models import BinomialModel, GaussianModel, PoissonModel
from regmod.prior import GaussianPrior, LinearGaussianPrior, UniformPrior
from regmod.variable import Variable
from regmod.parameter import Parameter
from regmod.prior import (
GaussianPrior,
LinearGaussianPrior,
LinearUniformPrior,
SplineGaussianPrior,
SplinePrior,
SplineUniformPrior,
UniformPrior,
)
from regmod.variable import SplineVariable, Variable
from scipy.stats import norm
from xspline import XSpline

from spxmod.linalg import get_pred_var
from spxmod.typing import Callable, DataFrame, NDArray, RegmodModel
Expand Down Expand Up @@ -38,25 +48,109 @@ def msca_optimize(
return result.x


class SparseParameter(Parameter):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could there be any reason to add SparseParameter directly to regmod?

def get_linear_gmat(self) -> Matrix:
"""Get the linear Gaussian prior matrix.

Returns
-------
Matrix
Gaussian prior design matrix.

"""
if len(self.variables) == 0:
return np.empty(shape=(0, 0))
gmat = sp.block_diag(
[
(
var.get_linear_gmat()
if isinstance(var, SplineVariable)
else np.empty((0, 1))
)
for var in self.variables
]
)
if len(self.linear_gpriors) > 0:
gmat = sp.vstack(
[gmat] + [prior.mat for prior in self.linear_gpriors]
)
return asmatrix(sp.csc_matrix(gmat))

def get_linear_umat(self) -> Matrix:
"""Get the linear Uniform prior matrix.

Returns
-------
Matrix
Uniform prior design matrix.

"""
if len(self.variables) == 0:
return np.empty(shape=(0, 0))
umat = sp.block_diag(
[
(
var.get_linear_umat()
if isinstance(var, SplineVariable)
else np.empty((0, 1))
)
for var in self.variables
]
)
if len(self.linear_upriors) > 0:
umat = sp.vstack(
[umat] + [prior.mat for prior in self.linear_upriors]
)
return asmatrix(sp.csc_matrix(umat))


class SparseRegmodModel(RegmodModel):
def __init__(
self,
data: dict,
variables: list[dict],
linear_gpriors: list[dict] | None = None,
linear_upriors: list[dict] | None = None,
**kwargs,
):
data = Data(**data)

# build variables
variables = [_build_regmod_variable(**v) for v in variables]

linear_gpriors = linear_gpriors or []
linear_upriors = linear_upriors or []

# build smoothing prior
if linear_gpriors:
for i, prior in enumerate(linear_gpriors):
if prior["mat"].size > 0:
linear_gpriors[i] = LinearGaussianPrior(**prior)

if linear_upriors:
for i, prior in enumerate(linear_upriors):
if prior["mat"].size > 0:
linear_upriors[i] = LinearUniformPrior(**prior)

param = SparseParameter(
name=self.param_names[0],
variables=variables,
linear_gpriors=linear_gpriors,
linear_upriors=linear_upriors,
**kwargs,
)
super().__init__(data, params=[param])

def attach_df(self, df: DataFrame, encode: Callable) -> None:
param = self.params[0]
self.data.attach_df(df)
self.mat = [asmatrix(sp.csc_matrix(encode(df)))]
self.uvec = self.get_uvec()
self.gvec = self.get_gvec()
self.linear_uvec = self.get_linear_uvec()
self.linear_gvec = self.get_linear_gvec()
self.linear_umat = asmatrix(sp.csc_matrix((0, self.size)))
self.linear_gmat = asmatrix(sp.csc_matrix((0, self.size)))
param = self.params[0]
if self.params[0].linear_gpriors:
self.linear_gmat = asmatrix(
sp.csc_matrix(param.linear_gpriors[0].mat)
)
if self.params[0].linear_upriors:
self.linear_umat = asmatrix(
sp.csc_matrix(param.linear_upriors[0].mat)
)
self.uvec = param.get_uvec()
self.gvec = param.get_gvec()
self.linear_uvec = param.get_linear_uvec()
self.linear_gvec = param.get_linear_gvec()
self.linear_gmat = param.get_linear_gmat()
self.linear_umat = param.get_linear_umat()

# parse constraints
cmat = asmatrix(
Expand Down Expand Up @@ -180,11 +274,8 @@ class SparseBinomialModel(SparseRegmodModel, BinomialModel):
"""Sparse Binomial model."""

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
if self.params[0].inv_link.name != "expit":
raise ValueError(
"Sparse Binomial model can only work with expit inv_link function"
)
kwargs["inv_link"] = "expit"
SparseRegmodModel.__init__(self, *args, **kwargs)

def objective(self, coefs: NDArray) -> float:
weights = self.data.weights * self.data.trim_weights
Expand Down Expand Up @@ -231,11 +322,8 @@ class SparseGaussianModel(SparseRegmodModel, GaussianModel):
"""Sparse Gaussian model."""

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
if self.params[0].inv_link.name != "identity":
raise ValueError(
"Sparse Gaussian model can only work with identity inv_link function"
)
kwargs["inv_link"] = "identity"
SparseRegmodModel.__init__(self, *args, **kwargs)

def objective(self, coefs: NDArray) -> float:
weights = self.data.weights * self.data.trim_weights
Expand Down Expand Up @@ -280,11 +368,8 @@ class SparsePoissonModel(SparseRegmodModel, PoissonModel):
"""Sparse Poisson model."""

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
if self.params[0].inv_link.name != "exp":
raise ValueError(
"Sparse Poisson model can only work with exp inv_link function"
)
kwargs["inv_link"] = "exp"
SparseRegmodModel.__init__(self, *args, **kwargs)

def objective(self, coefs: NDArray) -> float:
weights = self.data.weights * self.data.trim_weights
Expand Down Expand Up @@ -338,40 +423,49 @@ def build_regmod_model(
model_type: str,
data: dict,
variables: list[dict],
linear_gpriors: list[dict],
param_specs: dict,
linear_gpriors: list[dict] | None = None,
linear_upriors: list[dict] | None = None,
**kwargs,
) -> RegmodModel:
# build data
data = Data(**data)

# build variables
variables = [_build_regmod_variable(**kwargs) for kwargs in variables]

# build smoothing prior
for i, prior in enumerate(linear_gpriors):
if prior["mat"].size > 0:
linear_gpriors[i] = LinearGaussianPrior(**prior)

# buid regmod model
model_class = _model_dict[model_type]
model_param = model_class.param_names[0]

return model_class(
return _model_dict[model_type](
data,
param_specs={
model_param: {
"variables": variables,
"linear_gpriors": linear_gpriors,
**param_specs,
}
},
variables,
linear_gpriors or [],
linear_upriors or [],
**kwargs,
)


def _build_regmod_variable(name: str, gprior: dict, uprior: dict) -> Variable:
gprior = GaussianPrior(**gprior)
uprior = UniformPrior(**uprior)
return Variable(name=name, priors=[gprior, uprior])
def _build_regmod_variable(
name: str,
gprior: dict | None = None,
uprior: dict | None = None,
spline: XSpline | None = None,
spline_gpriors: list[dict] | None = None,
spline_upriors: list[dict] | None = None,
) -> Variable:
priors = []
if gprior is not None:
priors.append(GaussianPrior(**gprior))
if uprior is not None:
priors.append(UniformPrior(**uprior))
if spline_gpriors is not None:
priors += [
SplineGaussianPrior(**spline_gprior)
for spline_gprior in spline_gpriors
]
if spline_upriors is not None:
priors += [
SplineUniformPrior(**spline_uprior)
for spline_uprior in spline_upriors
]
if spline is None:
return Variable(name=name, priors=priors)
else:
for prior in priors:
if isinstance(prior, SplinePrior):
prior.attach_spline(spline)
return SplineVariable(name=name, spline=spline, priors=priors)


def get_vcov(hessian: Matrix, jacobian2: Matrix) -> NDArray:
Expand Down
55 changes: 32 additions & 23 deletions src/spxmod/space.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,41 +84,43 @@ def build_encoded_names(self, column: str) -> list[str]:
return [column]
return [f"{column}_{self.name}_{i}" for i in range(self.size)]

def encode(self, data: DataFrame, column: str = "intercept") -> DataFrame:
def encode(self, mat: NDArray, coords: DataFrame) -> coo_matrix:
"""Encode the data into the space grid.

Parameters
----------
data : DataFrame
Dataframe that contains the coordinate information and the column
to encode.
column : str, optional
Column name to encode. Default is "intercept".
mat
Design matrix to be encoded, it should have the same number of rows
with `coords`.
coords
Coordinate data frame for each row of the design matrix. It's
columns should match with the space dimension name.

Returns
-------
DataFrame
Encoded dataframe.
coo_matrix
Encoded design matrix.

"""
val = (
np.ones(len(data))
if column == "intercept"
else data[column].to_numpy()
)
row = np.arange(len(data), dtype=int)
col = np.zeros(len(data), dtype=int)
vals = mat.ravel()
nrow, ncol = mat.shape

row = np.repeat(np.arange(nrow, dtype=int), ncol)
col = np.zeros(nrow, dtype=int)
if self.dims:
col = (
data[self.dim_names]
.merge(self.span.reset_index(), how="left", on=self.dim_names)
coords.merge(
self.span.reset_index(), how="left", on=self.dim_names
)
.eval("index")
.to_numpy()
)
return coo_matrix((val, (row, col)), shape=(len(data), self.size))
col = np.add.outer(ncol * col, np.arange(ncol)).ravel()
return coo_matrix((vals, (row, col)), shape=(nrow, self.size * ncol))

def build_smoothing_prior(
self,
size: int,
lam: float | dict[str, float] = 0.0,
lam_mean: float = 0.0,
scale_by_distance: bool = False,
Expand All @@ -143,7 +145,7 @@ def build_smoothing_prior(
"""
if isinstance(lam, float):
lam = {name: lam for name in self.dim_names}
mat, sd = coo_matrix((0, self.size)), np.empty((0,))
mat, sd = coo_matrix((0, self.size * size)), np.empty((0,))

mats_default = list(map(identity, self.dim_sizes))
sds_default = list(map(np.ones, self.dim_sizes))
Expand All @@ -152,16 +154,23 @@ def build_smoothing_prior(
lam_i = lam[dim.name]
if lam_i > 0.0 and isinstance(dim, NumericalDimension):
mats = mats_default.copy()
mats[i] = dim.build_smoothing_mat()
mats[i] = kron(dim.build_smoothing_mat(), identity(size))
mat = vstack([mat, functools.reduce(kron, mats)])

sds = sds_default.copy()
sds[i] = dim.build_smoothing_sd(lam_i, scale_by_distance)
sds[i] = np.repeat(
dim.build_smoothing_sd(lam_i, scale_by_distance), size
)
sd = np.hstack([sd, functools.reduce(_flatten_outer, sds)])

if lam_mean > 0.0:
mat = vstack([mat, np.repeat(1 / self.size, self.size)])
sd = np.hstack([sd, [1 / np.sqrt(lam_mean)]])
mat = vstack(
[
mat,
kron(np.repeat(1 / self.size, self.size), identity(size)),
]
)
sd = np.hstack([sd, np.repeat(1 / np.sqrt(lam_mean), size)])

# TODO: regmod cannot recognize sparse array as prior, this shouldn't
# be necessary in the future
Expand Down
Loading