Skip to content

Commit

Permalink
additional tests of pivoting logic; bootstrapped effects for mixed mo…
Browse files Browse the repository at this point in the history
…dels (#75)

* additional tests of pivoting logic

* bootstrapped effects

* YAS

* docs compat bump

* drop explicit StatsModels 0.6 testing
  • Loading branch information
palday committed Jul 9, 2024
1 parent be25032 commit 7771901
Show file tree
Hide file tree
Showing 8 changed files with 153 additions and 24 deletions.
10 changes: 1 addition & 9 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ on:
- main
jobs:
test:
name: Julia ${{ matrix.version }} - StatsModels ${{ matrix.statsmodels }} - ${{ matrix.os }} - ${{ matrix.arch }}
name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
Expand All @@ -21,9 +21,6 @@ jobs:
- '1'
- '1.6'
- 'nightly'
statsmodels:
- '0.6'
- '0.7'
os:
- ubuntu-latest
arch:
Expand All @@ -35,11 +32,6 @@ jobs:
with:
version: ${{ matrix.version }}
arch: ${{ matrix.arch }}
- name: "Install StatsModels"
shell: julia --color=yes --project {0}
run: |
using Pkg
Pkg.add(Pkg.PackageSpec(; name="StatsModels", version="${{ matrix.statsmodels }}"))
- name: Cache
uses: julia-actions/cache@v2
with:
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,5 @@ docs/site/
# environment.
Manifest.toml
Manifest-v*.toml
coverage/lcov.info
*.cov
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Effects"
uuid = "8f03c58b-bd97-4933-a826-f71b64d2cca2"
authors = ["Beacon Biosignals, Inc."]
version = "1.2.0"
version = "1.3.0"

[deps]
Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
Expand Down
3 changes: 1 addition & 2 deletions docs/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,8 @@ StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[compat]
AlgebraOfGraphics = "0.4.5"
AlgebraOfGraphics = "0.6"
DataFrames = "1"
Documenter = "1.3"
GLM = "1.3"
StatsModels = "0.6.20"
julia = "1.6"
8 changes: 4 additions & 4 deletions docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,8 @@ refgrid[!, :lower] = @. refgrid.weight - 1.96 * refgrid.err
refgrid[!, :upper] = @. refgrid.weight + 1.96 * refgrid.err
sort!(refgrid, [:age])
plt = data(refgrid) * mapping(:age, :weight; lower=:lower, upper=:upper, color=:sex) *
(visual(Lines) + visual(LinesFill))
plt = data(refgrid) * mapping(:age, :weight; color=:sex) *
(visual(Lines) + mapping(; lower=:lower, upper=:upper) * visual(LinesFill))
draw(plt)
```

Expand All @@ -149,8 +149,8 @@ refgrid_centered[!, :lower] = @. refgrid_centered.weight - 1.96 * refgrid_center
refgrid_centered[!, :upper] = @. refgrid_centered.weight + 1.96 * refgrid_centered.err
sort!(refgrid_centered, [:age])
plt = data(refgrid_centered) * mapping(:age, :weight; lower=:lower, upper=:upper, color=:sex) *
(visual(Lines) + visual(LinesFill))
plt = data(refgrid_centered) * mapping(:age, :weight; color=:sex) *
(visual(Lines) + mapping(; lower=:lower, upper=:upper) * visual(LinesFill))
draw(plt)
```

Expand Down
93 changes: 91 additions & 2 deletions ext/EffectsMixedModelsExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@ using Effects
using MixedModels
using StatsBase

using Effects: typify, _difference_method!, _responsename
using Effects: typify, _difference_method!,
_responsename, _model_link
using LinearAlgebra: diag
using StatsModels: modelcols
using GLM: Link
using GLM: GLM, Link

Effects._model_link(m::GeneralizedLinearMixedModel, ::AutoInvLink) = Link(m)

Expand All @@ -29,4 +30,92 @@ function Effects.effects!(reference_grid::DataFrame, model::MixedModel;
return reference_grid
end

_invlink(model::MixedModel, invlink) = Base.Fix1(GLM.linkinv, _model_link(model, invlink))
_invlink(::MixedModel, ::typeof(identity)) = identity

"""
effects!(reference_grid::DataFrame, model::MixedModel, boot::MixedModelBootstrap;
eff_col=nothing, err_col=:err, typical=mean, invlink=identity,
lower_col=:lower, upper_col=:upper, level=nothing)
Use the results of a bootstrap to compute empirical error estimates
(instead of using the variance-covariance matrix).
!!! warning
This method is experimental and may change in its defaults or
disappear entirely in a future release!
"""
function Effects.effects!(reference_grid::DataFrame, model::MixedModel,
boot::MixedModelBootstrap;
eff_col=nothing, err_col=:err, typical=mean, invlink=identity,
lower_col=:lower, upper_col=:upper,
level=nothing)
# right now this is written for a RegressionModel and implicitly assumes
# the existence of an appropriate formula method
form = formula(model)
form_typical = typify(reference_grid, form, modelmatrix(model); typical=typical)
# we don't need to worry about pivoting / fixef rank deficiency here
# because the default -0.0 placeholder coefs still yield the correct predictions
# it's the NaNs in the vcov that create problems, but this method sidesteps
# the vcov computation, so we're all good!
X = modelcols(form_typical, reference_grid)
eff = X * coef(model)
# each row is a bootstrap replicate
# each column is a different row of X
# this seems like a weird way to store things, until you remember
# that we aggregate across replicates and thus want to take advantage
# of column major storage
boot_err = mapreduce(vcat, groupby(DataFrame(boot.β), :iter)) do gdf
β = gdf[!, ]
return (X * β)'
end

err = map(std, eachcol(boot_err))
_difference_method!(eff, err, model, invlink)
reference_grid[!, something(eff_col, _responsename(model))] = eff
reference_grid[!, err_col] = err

# logic here is slightly different than for other methods
# because we can compute empirical CIs instead of relying on a Wald
# approximation
if !isnothing(level)
lower_tail = (1 - level) / 2
upper_tail = 1 - lower_tail

ci = map(eachcol(boot_err)) do col
return quantile(col, [lower_tail, upper_tail])
end

invlink = _invlink(model, invlink)
reference_grid[!, lower_col] = invlink.(first.(ci))
reference_grid[!, upper_col] = invlink.(last.(ci))
end
return reference_grid
end

"""
effects(design::AbstractDict, model::MixedModel, boot::MixedModelBootstrap;
eff_col=nothing, err_col=:err, typical=mean,
lower_col=:lower, upper_col=:upper, invlink=identity,
level=nothing)
Use the results of a bootstrap to compute empirical error estimates
(instead of using the variance-covariance matrix).
!!! warning
This method is experimental and may change in its defaults or
disappear entirely in a future release!
"""
function Effects.effects(design::AbstractDict, model::MixedModel, boot::MixedModelBootstrap;
eff_col=nothing, err_col=:err, typical=mean,
lower_col=:lower, upper_col=:upper, invlink=identity,
level=nothing)
grid = expand_grid(design)
dv = something(eff_col, _responsename(model))
level = isnothing(level) ? 0.68 : level
effects!(grid, model, boot; eff_col=dv, err_col, typical, invlink, level, lower_col,
upper_col)
return grid
end

end # module
4 changes: 2 additions & 2 deletions src/regressionmodel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ determined from the model type.
!!! compat "Julia 1.9"
Automatic inverse link determination is implemented using package
extensions, which are available beginning in Julia 1.9.
An error is thrown if the inverse link cannot be determined. This will
always occur with Julia versions prior to 1.9, and will otherwise occur
when no extension has been loaded that specifies the link function for
Expand Down Expand Up @@ -143,7 +143,7 @@ _invlink_and_deriv(invlink, η) = (invlink(η), ForwardDiff.derivative(invlink,
_invlink_and_deriv(::typeof(identity), η) = (η, 1)
# this isn't the best name because it sometimes returns the inverse link and sometimes the link (Link())
# for now, this is private API, but we should see how this goes and whether we can make it public API
# so local extensions (instead of Package-Extensions) are better supported
# so local extensions (instead of Package-Extensions) are better supported
_model_link(::RegressionModel, invlink::Function) = invlink
function _model_link(model::RegressionModel, ::AutoInvLink)
msg = string("cannot automatically determine inverse link for models ",
Expand Down
55 changes: 51 additions & 4 deletions test/mixedmodels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,32 @@ using Test

rng = StableRNG(42)
x = rand(rng, 100)
data = (x=x, x2=1.5 .* x, y=rand(rng, [0, 1], 100), z=repeat('A':'T', 5))
model = @suppress fit(MixedModel, @formula(y ~ x + x2 + (1 | z)), data; progress=false)
a = rand(rng, 100)
data = (; x, x2=1.5 .* x, a, y=rand(rng, [0, 1], 100), g=repeat('A':'T', 5))
model = @suppress fit(MixedModel, @formula(y ~ a + x + x2 + (1 | g)), data; progress=false)
dropped_idx = model.feterm.piv[end]
dropped_coef = coefnames(model)[dropped_idx]
kept_coef = last(fixefnames(model))

# due to the vagaries of numerical linear algebra and pivoting, it's
# not completely deterministic which of x and x2 gets dropped, though it
# will tend to be x2
design = Dict(Symbol(kept_coef) => [0])
design = Dict(Symbol(kept_coef) => [1], :a => [2])
eff = effects(design, model)
@test dropped_coef names(eff)
@test kept_coef names(eff)
@test !isnan(only(eff.err))
@test all(!isnan, eff.err)
@test eff.y - eff.err eff.lower
@test eff.y + eff.err eff.upper

β = fixef(model)
# note that in the design above, we had a = 2
# β[1] = intercept
# β[2] = a, multiply by 2 because that's what we put in the design
# β[3] = whatever of x|x2 wasn't pivoted out, "multiply by 1" (ie do nothing)
# because we had keep_coef = 1 in the design
@test only(eff.y) β[1] + 2 * β[2] + β[3]

# there is one bit of weirdness -- the removed coefficients behave like any other
# variable in the "design" that is missing from the model.
design = Dict(Symbol(dropped_coef) => [0])
Expand All @@ -33,3 +42,41 @@ design = Dict(:q => [0])
eff_not_present = effects(design, model)

@test Matrix(eff_dropped) Matrix(eff_not_present)

# bootstrap
design = Dict(Symbol(kept_coef) => [1], :a => [2])
eff = effects(design, model)

boot = @suppress parametricbootstrap(StableRNG(666), 1000, model)
bootdf = unstack(DataFrame(boot.β), :coefname, )
# make sure that the bootstrap is correctly zeroing out
@test all(isequal(-0.0), bootdf[!, dropped_coef])

# now make sure the bootstrap gives approximately the same results
eff_boot = effects(design, model, boot)
@test eff_boot.y eff.y
@test eff_boot.err eff.err atol = 0.005

# test intervals and multiple rows in refgrid
design = Dict(Symbol(kept_coef) => 1:2, :a => [2])
eff95 = effects(design, model; level=0.95)
eff_boot95 = effects(design, model, boot; level=0.95)
@test eff_boot95.y eff95.y
@test eff_boot95.lower eff95.lower atol = 0.1
@test eff_boot95.upper eff95.upper atol = 0.1

glmm = @suppress fit(MixedModel,
@formula(use ~ 1 + age + abs2(age) + livch + urban + (1 | dist)),
MixedModels.dataset(:contra),
Bernoulli(); fast=true)

gboot = @suppress parametricbootstrap(StableRNG(345), 250, glmm)

design = Dict(:age => -1:1:1)
geff_boot = effects(design, glmm, gboot; eff_col="y", invlink=AutoInvLink(), level=0.95)
geff = effects(design, glmm; eff_col="y", invlink=AutoInvLink(), level=0.95)

@test geff_boot.y geff.y
@test geff_boot.err geff.err atol = 0.05
@test geff_boot.lower geff.lower atol = 0.05
@test geff_boot.upper geff.upper atol = 0.05

2 comments on commit 7771901

@palday
Copy link
Member Author

@palday palday commented on 7771901 Jul 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/110780

Tip: Release Notes

Did you know you can add release notes too? Just add markdown formatted text underneath the comment after the text
"Release notes:" and it will be added to the registry PR, and if TagBot is installed it will also be added to the
release that TagBot creates. i.e.

@JuliaRegistrator register

Release notes:

## Breaking changes

- blah

To add them here just re-invoke and the PR will be updated.

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v1.3.0 -m "<description of version>" 7771901d5263f5eba95ecdee50599c1ea97bc719
git push origin v1.3.0

Please sign in to comment.