Skip to content

Commit

Permalink
Merge 53e04e3 into b551a03
Browse files Browse the repository at this point in the history
  • Loading branch information
palday committed Aug 1, 2024
2 parents b551a03 + 53e04e3 commit 61dec6e
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 25 deletions.
7 changes: 6 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
MixedModels v4.25.3 Release Notes
==============================
- Fix a bug in the handling of rank deficiency in the `simulate[!]` code. This has important correctness implications for bootstrapping models with rank-deficient fixed effects (as can happen in the case of partial crossing of the fixed effects / missing cells). [#778]

MixedModels v4.25.2 Release Notes
==============================
- Use `public` keyword so that users don't see unnecessary docstring warnings on 1.11+. [#776]
- Fix accidental export of `dataset` and `datasets` and make them `public` instead. [#776]

MixedModels v4.25.1 Release Notes
==============================
- Use more sophisticated checks on property names in `restoreoptsum` to allow for optsums saved by pre-v4.25 versions to be used with this version and later. [#775]
- Use more sophisticated checks on property names in `restoreoptsum` to allow for optsums saved by pre-v4.25 versions to be used with this version and later. [#775]

MixedModels v4.25 Release Notes
==============================
Expand Down Expand Up @@ -549,3 +553,4 @@ Package dependencies
[#773]: https://github.com/JuliaStats/MixedModels.jl/issues/773
[#775]: https://github.com/JuliaStats/MixedModels.jl/issues/775
[#776]: https://github.com/JuliaStats/MixedModels.jl/issues/776
[#778]: https://github.com/JuliaStats/MixedModels.jl/issues/778
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "MixedModels"
uuid = "ff71e718-51f3-5ec2-a782-8ffcbfa3c316"
author = ["Phillip Alday <me@phillipalday.com>", "Douglas Bates <dmbates@gmail.com>", "Jose Bayoan Santiago Calderon <jbs3hp@virginia.edu>"]
version = "4.25.2"
version = "4.25.3"

[deps]
Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
Expand Down
15 changes: 9 additions & 6 deletions src/bootstrap.jl
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ end

"""
parametricbootstrap([rng::AbstractRNG], nsamp::Integer, m::MixedModel{T}, ftype=T;
β = coef(m), σ = m.σ, θ = m.θ, progress=true, optsum_overrides=(;))
β = fixef(m), σ = m.σ, θ = m.θ, progress=true, optsum_overrides=(;))
Perform `nsamp` parametric bootstrap replication fits of `m`, returning a `MixedModelBootstrap`.
Expand Down Expand Up @@ -214,7 +214,7 @@ function parametricbootstrap(
n::Integer,
morig::MixedModel{T},
ftype::Type{<:AbstractFloat}=T;
β::AbstractVector=coef(morig),
β::AbstractVector=fixef(morig),
σ=morig.σ,
θ::AbstractVector=morig.θ,
use_threads::Bool=false,
Expand All @@ -232,9 +232,13 @@ function parametricbootstrap(
if σ !== missing
σ = T(σ)
end
β, θ = convert(Vector{T}, β), convert(Vector{T}, θ)
βsc, θsc = similar(ftype.(β)), similar(ftype.(θ))
p, k = length(β), length(θ)
β = convert(Vector{T}, β)
θ = convert(Vector{T}, θ)
# scratch -- note that this is the length of the unpivoted coef vector
βsc = coef(morig)
θsc = zeros(ftype, length(θ))
p = length(βsc)
k = length(θsc)
m = deepcopy(morig)
for (key, val) in pairs(optsum_overrides)
setfield!(m.optsum, key, val)
Expand All @@ -251,7 +255,6 @@ function parametricbootstrap(
samp = replicate(n; progress) do
simulate!(rng, m; β, σ, θ)
refit!(m; progress=false)
# @info "" m.optsum.feval
(
objective=ftype.(m.objective),
σ=ismissing(m.σ) ? missing : ftype(m.σ),
Expand Down
30 changes: 14 additions & 16 deletions src/simulate.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ function simulate(m::MixedModel, args...; kwargs...)
end

"""
simulate!(rng::AbstractRNG, m::MixedModel{T}; β=m.β, σ=m.σ, θ=T[])
simulate!(m::MixedModel; β=m.β, σ=m.σ, θ=m.θ)
simulate!(rng::AbstractRNG, m::MixedModel{T}; β=fixef(m), σ=m.σ, θ=T[])
simulate!(m::MixedModel; β=fixef(m), σ=m.σ, θ=m.θ)
Overwrite the response (i.e. `m.trms[end]`) with a simulated response vector from model `m`.
Expand All @@ -32,15 +32,15 @@ This simulation includes sampling new values for the random effects.
which modify the model's response and return the entire modified model.
"""
function simulate!(
rng::AbstractRNG, m::LinearMixedModel{T}; β=coef(m), σ=m.σ, θ=T[]
rng::AbstractRNG, m::LinearMixedModel{T}; β=fixef(m), σ=m.σ, θ=T[]
) where {T}
# XXX should we add support for doing something with weights?
simulate!(rng, m.y, m; β, σ, θ)
return unfit!(m)
end

function simulate!(
rng::AbstractRNG, m::GeneralizedLinearMixedModel{T}; β=coef(m), σ=m.σ, θ=T[]
rng::AbstractRNG, m::GeneralizedLinearMixedModel{T}; β=fixef(m), σ=m.σ, θ=T[]
) where {T}
# note that these m.resp.y and m.LMM.y will later be synchronized in (re)fit!()
# but for now we use them as distinct scratch buffers to avoid allocations
Expand Down Expand Up @@ -85,7 +85,7 @@ function _rand(rng::AbstractRNG, d::Distribution, location, scale=NaN, n=1)
return rand(rng, dist) / n
end

function simulate!(m::MixedModel{T}; β=coef(m), σ=m.σ, θ=T[]) where {T}
function simulate!(m::MixedModel{T}; β=fixef(m), σ=m.σ, θ=T[]) where {T}
return simulate!(Random.GLOBAL_RNG, m; β, σ, θ)
end

Expand Down Expand Up @@ -121,7 +121,7 @@ function simulate!(
y::AbstractVector,
m::LinearMixedModel,
newdata::Tables.ColumnTable;
β=m.β,
β=fixef(m),
σ=m.σ,
θ=m.θ,
)
Expand All @@ -147,20 +147,18 @@ function simulate!(
end

function simulate!(
rng::AbstractRNG, y::AbstractVector, m::LinearMixedModel{T}; β=m.β, σ=m.σ, θ=m.θ
rng::AbstractRNG, y::AbstractVector, m::LinearMixedModel{T}; β=fixef(m), σ=m.σ, θ=m.θ
) where {T}
length(β) == length(pivot(m)) || length(β) == m.feterm.rank ||
length(β) == length(pivot(m)) || length(β) == rank(m) ||
throw(ArgumentError("You must specify all (non-singular) βs"))

β = convert(Vector{T}, β)
σ = T(σ)
θ = convert(Vector{T}, θ)
isempty(θ) || setθ!(m, θ)

if length(β) length(pivot(m))
padding = length(pivot(m)) - rank(m)
append!(β, fill(-0.0, padding))
invpermute!(β, pivot(m))
if length(β) == length(pivot(m))
β = view(view(β, pivot(m)), 1:rank(m))
end

# initialize y to standard normal
Expand All @@ -172,15 +170,15 @@ function simulate!(
end

# scale by σ and add fixed-effects contribution
return mul!(y, m.X, β, one(T), σ)
return mul!(y, fullrankx(m), β, one(T), σ)
end

function simulate!(
rng::AbstractRNG,
y::AbstractVector,
m::GeneralizedLinearMixedModel,
newdata::Tables.ColumnTable;
β=m.β,
β=fixef(m),
σ=m.σ,
θ=m.θ,
)
Expand Down Expand Up @@ -209,7 +207,7 @@ function simulate!(
rng::AbstractRNG,
y::AbstractVector,
m::GeneralizedLinearMixedModel{T};
β=m.β,
β=fixef(m),
σ=m.σ,
θ=m.θ,
) where {T}
Expand Down Expand Up @@ -250,7 +248,7 @@ function _simulate!(

if length(β) == length(pivot(m))
# unlike LMM, GLMM stores the truncated, pivoted vector directly
β = view(β, view(pivot(m), 1:(rank(m))))
β = view(view(β, pivot(m)), 1:rank(m))
end
fast = (length(m.θ) == length(m.optsum.final))
setpar! = fast ? setθ! : setβθ!
Expand Down
29 changes: 28 additions & 1 deletion test/bootstrap.jl
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ end
rng = MersenneTwister(0);
x = rand(rng, 100);
data = (x = x, x2 = 1.5 .* x, y = rand(rng, [0,1], 100), z = repeat('A':'T', 5))
@testset "$family" for family in [Normal(), Bernoulli()]
@testset "$family" for family in [Normal(), Bernoulli()]
model = @suppress fit(MixedModel, @formula(y ~ x + x2 + (1|z)), data, family; progress=false)
boot = quickboot(model, 10)

Expand All @@ -242,6 +242,33 @@ end
yf = simulate(StableRNG(1), model; β=fixef(model))
@test all(x -> isapprox(x...), zip(yc, yf))
end

@testset "partial crossing" begin
id = lpad.(string.(1:40), 2, "0")
B = ["b0", "b1", "b2"]
C = ["c0", "c1", "c2", "c3", "c4"]
df = DataFrame(reshape(collect(Iterators.product(B, C, id)), :), [:b, :c, :id])
df[!, :y] .= 0
filter!(df) do row
b = last(row.b)
c = last(row.c)
return b != c
end

m = LinearMixedModel(@formula(y ~ 1 + b * c + (1|id)), df)
β = 1:rank(m)
σ = 1
simulate!(StableRNG(628), m; β, σ)
fit!(m)

boot = parametricbootstrap(StableRNG(271828), 1000, m);
bootci = DataFrame(shortestcovint(boot))
filter!(:group => ismissing, bootci)
select!(bootci, :names => disallowmissing => :coef, :lower, :upper)
transform!(bootci, [:lower, :upper] => ByRow(middle) => :mean)

@test all(x -> isapprox(x[1], x[2]; atol=0.1), zip(coef(m), bootci.mean))
end
end
end

Expand Down

0 comments on commit 61dec6e

Please sign in to comment.