From e2247d8156093b21519a74b3edf874346c0b8a62 Mon Sep 17 00:00:00 2001 From: yufongpeng <54415349+yufongpeng@users.noreply.github.com> Date: Mon, 12 Sep 2022 14:01:57 +0800 Subject: [PATCH 1/6] Improve doc --- src/anova.jl | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/src/anova.jl b/src/anova.jl index db4dbe4..3342c19 100644 --- a/src/anova.jl +++ b/src/anova.jl @@ -8,47 +8,52 @@ Analysis of variance. Return `AnovaResult{M, test, N}`. See [`AnovaResult`](@ref) for details. +# Arguments * `models`: model objects 1. `TableRegressionModel{<: LinearModel}` fitted by `GLM.lm` 2. `TableRegressionModel{<: GeneralizedLinearModel}` fitted by `GLM.glm` - If mutiple models are provided, they should be nested and the last one is the most saturated. + If mutiple models are provided, they should be nested and the last one is the most complex. * `test`: test statistics for goodness of fit. Available tests are [`LikelihoodRatioTest`](@ref) ([`LRT`](@ref)) and [`FTest`](@ref). The default is based on the model type. 1. `TableRegressionModel{<: LinearModel}`: `FTest`. 2. `TableRegressionModel{<: GeneralizedLinearModel}`: based on distribution function, see `canonicalgoodnessoffit`. -Other keyword arguments: +## Other keyword arguments * When one model is provided: 1. `type` specifies type of anova (1, 2 or 3). Default value is 1. * When multiple models are provided: 1. `check`: allows to check if models are nested. Defalut value is true. Some checkers are not implemented now. 2. `isnested`: true when models are checked as nested (manually or automatically). Defalut value is false. -Algorithm: +# Algorithm -For the ith model, devᵢ is defined as the sum of [squared deviance residuals (unit deviance)](https://en.wikipedia.org/wiki/Deviance_(statistics)). +The variable `dev` is a vector that the ith element is defined as the sum of [squared deviance residuals (unit deviance)](https://en.wikipedia.org/wiki/Deviance_(statistics)) of the ith model. It is equivalent to the residual sum of squares for ordinary linear regression. -* F-test: +The variable `Δdev` is the difference of the adjacent elements of `dev`, i.e. `Δdevᵢ = devᵢ₋₁ - devᵢ`. - The attribute `deviance` is Δdevᵢ = devᵢ₋₁ - devᵢ. +## F-test - F-statistic is then defined as Δdevᵢ/(squared dispersion × degree of freedom). +The attribute `deviance` of `AnovaResult` is `Δdev`. - For type I and III ANOVA, F-statistic is computed directly by the variAnce-covariance matrix(vcov) of the saturated model; the deviance is calculated backward. - 1. Type I: +F-statistic is then defined as `Δdev / (dispersion² × degree of freedom)`. - First, calculate f as the upper factor of Cholesky factorization of vcov⁻¹ * β. +For type I and III ANOVA, F-statistic is computed directly by the variance-covariance matrix (`vcov`) of the most complex model; the deviance is calculated backward. +1. Type I: - Then, for a factor that starts at ith row/column of vcov with n degree of freedom, the f-statistic is Σᵢⁱ⁺ⁿ⁻¹ fₖ²/n. - 2. Type III: + First, calculate `f` as the upper factor of Cholesky factorization of `vcov⁻¹ * β`. - For a factor occupying ith to jth row/column of vcov with n degree of freedom, f-statistic is (β[i:j]' * vcov[i:j, i:j]⁻¹ * β[i:j])/n. -* LRT: + Then, for a factor that starts from ith row/column of the model matrix with `n` degree of freedom, the f-statistic is `Σᵢⁱ⁺ⁿ⁻¹ fₖ² / n`. +2. Type III: - The attribute `deviance` is devᵢ. - - The likelihood ratio is defined as (devᵢ₋₁ - devᵢ)/squared dispersion. + For a factor occupying ith to jth row/column of the model matrix with `n` degree of freedom, the f-statistic is `β[i, ..., j]ᵀ * vcov[i, ..., j; i, ..., j]⁻¹ * β[i, ..., j] / n`. -For fitting new models and conducting anova at the same time, see [`anova_lm`](@ref) for `LinearModel`, [`anova_glm`](@ref) for `GeneralizedLinearModel`. +## LRT + +The attribute `deviance` of `AnovaResult` is `dev`. + +The likelihood ratio is defined as `Δdev / dispersion²`. + +!!! note + For fitting new models and conducting anova at the same time, see [`anova_lm`](@ref) for `LinearModel`, [`anova_glm`](@ref) for `GeneralizedLinearModel`. """ anova(::Val{:AnovaGLM}) From 78e8fdb5c9045276edbd7fce4697fce742318d87 Mon Sep 17 00:00:00 2001 From: yufongpeng <54415349+yufongpeng@users.noreply.github.com> Date: Tue, 13 Sep 2022 11:34:44 +0800 Subject: [PATCH 2/6] Improve doc --- src/anova.jl | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/src/anova.jl b/src/anova.jl index 3342c19..fce3ec7 100644 --- a/src/anova.jl +++ b/src/anova.jl @@ -26,29 +26,35 @@ Return `AnovaResult{M, test, N}`. See [`AnovaResult`](@ref) for details. # Algorithm -The variable `dev` is a vector that the ith element is defined as the sum of [squared deviance residuals (unit deviance)](https://en.wikipedia.org/wiki/Deviance_(statistics)) of the ith model. +Vectors of models and the corresponding base models: + +`model = (model₁, ..., modelₙ)` + +`basemodel = (basemodel₁, ..., basemodelₙ)` + +When `n + 1` models are given, `model = (model₂, ..., modelₙ₊₁), basemodel = (model₁, ..., modelₙ)`. +When one model is given, `n` is the number of factors except for the factors used in the simplest models. The `basemodel` depends on the type of ANOVA. + +The variable `dev` and `basedev` are vectors that the ith elements are the sum of [squared deviance residuals (unit deviance)](https://en.wikipedia.org/wiki/Deviance_(statistics)) of `modelᵢ` and `basemodelᵢ`. It is equivalent to the residual sum of squares for ordinary linear regression. -The variable `Δdev` is the difference of the adjacent elements of `dev`, i.e. `Δdevᵢ = devᵢ₋₁ - devᵢ`. -## F-test +The variable `Δdev` is the difference of `dev` and `basedev`, i.e. `Δdev = dev - basedev`. -The attribute `deviance` of `AnovaResult` is `Δdev`. +`dispersion` is the estimated dispersion (or scale) parameter for `modelₙ`'s distribution. -F-statistic is then defined as `Δdev / (dispersion² × degree of freedom)`. +For ordinary linear regression, +`dispersion² = residual sum of squares / degree of freedom of residuals` -For type I and III ANOVA, F-statistic is computed directly by the variance-covariance matrix (`vcov`) of the most complex model; the deviance is calculated backward. -1. Type I: +## F-test - First, calculate `f` as the upper factor of Cholesky factorization of `vcov⁻¹ * β`. +The attribute `deviance` of the returned object is `(Δdev₁, ..., Δdevₙ, NaN)`. - Then, for a factor that starts from ith row/column of the model matrix with `n` degree of freedom, the f-statistic is `Σᵢⁱ⁺ⁿ⁻¹ fₖ² / n`. -2. Type III: +F-value is a vector `F` such that `Fᵢ = Δdevᵢ / (dispersion² × dofᵢ)` where `dof` is difference of degree of freedom of each model and basemodel pairs. - For a factor occupying ith to jth row/column of the model matrix with `n` degree of freedom, the f-statistic is `β[i, ..., j]ᵀ * vcov[i, ..., j; i, ..., j]⁻¹ * β[i, ..., j] / n`. ## LRT -The attribute `deviance` of `AnovaResult` is `dev`. +The attribute `deviance` of the returned object is `dev`. The likelihood ratio is defined as `Δdev / dispersion²`. From f437c297b8172136f523d665907f0fbc6b12d552 Mon Sep 17 00:00:00 2001 From: yufongpeng <54415349+yufongpeng@users.noreply.github.com> Date: Thu, 19 Jan 2023 02:46:21 +0800 Subject: [PATCH 3/6] Update to AnovaBase 0.7 --- Project.toml | 10 +-- src/AnovaGLM.jl | 9 +-- src/anova.jl | 204 +++++++++++++++++------------------------------ src/fit.jl | 115 +++++++------------------- src/io.jl | 81 ++++++++++--------- test/runtests.jl | 74 ++++++++--------- 6 files changed, 187 insertions(+), 306 deletions(-) diff --git a/Project.toml b/Project.toml index 5823b3c..224e817 100644 --- a/Project.toml +++ b/Project.toml @@ -1,11 +1,10 @@ name = "AnovaGLM" uuid = "0a47a8e3-ec57-451e-bddb-e0be9d22772b" authors = ["Yu-Fong Peng "] -version = "0.1.4" +version = "0.2.0" [deps] AnovaBase = "946dddda-6a23-4b48-8e70-8e60d9b8d680" -DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" @@ -16,11 +15,10 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d" [compat] -AnovaBase = "0.6.3" -DataFrames = "1" +AnovaBase = "0.7" Distributions = "0.23, 0.24, 0.25" -GLM = "1.5, 1.6, 1.7, 1.8" +GLM = "1.8" Reexport = "0.2, 1" StatsBase = "0.33" StatsModels = "0.6" -julia = "1.6, 1.7" +julia = "1.6, 1.7, 1.8" diff --git a/src/AnovaGLM.jl b/src/AnovaGLM.jl index 944f01f..933932c 100644 --- a/src/AnovaGLM.jl +++ b/src/AnovaGLM.jl @@ -1,9 +1,9 @@ module AnovaGLM -using Statistics, StatsBase, LinearAlgebra, Distributions, Reexport, Printf +using Statistics, StatsBase, LinearAlgebra, Distributions, Reexport, Printf, AnovaBase @reexport using GLM, AnovaBase import StatsBase: fit!, fit -import StatsModels: TableRegressionModel, vectorize, ModelFrame, ModelMatrix, response +import StatsModels: TableRegressionModel, vectorize, ModelFrame, ModelMatrix, response, asgn import GLM: glm, # Model LinPredModel, AbstractGLM, GeneralizedLinearModel, LinearModel, @@ -16,9 +16,8 @@ import GLM: glm, updateμ!, cholfactors, # other FP, BlasReal, Link, dispersion, deviance, dof, dof_residual, nobs -import AnovaBase: lrt_nested, ftest_nested, formula, anova, nestedmodels, _diff, _diffn, subformula, selectcoef, dof, dof_residual, deviance, nobs, coefnames, extract_contrasts -using DataFrames: DataFrame, ByRow, combine - +using AnovaBase: select_super_interaction, extract_contrasts, canonicalgoodnessoffit, subformula, predictors, dof_asgn, lrt_nested, ftest_nested +import AnovaBase: anova, nestedmodels, anovatable, prednames export anova_lm, anova_glm include("anova.jl") diff --git a/src/anova.jl b/src/anova.jl index fce3ec7..5fb62fd 100644 --- a/src/anova.jl +++ b/src/anova.jl @@ -1,15 +1,15 @@ # =========================================================================================== # Main API -@doc """ - anova(...; test::Type{<: GoodnessOfFit}, ) - anova(test::Type{<: GoodnessOfFit}, ...; ) +""" + anova(...; test::Type{<: GoodnessOfFit}, ) + anova(test::Type{<: GoodnessOfFit}, ...; ) Analysis of variance. Return `AnovaResult{M, test, N}`. See [`AnovaResult`](@ref) for details. # Arguments -* `models`: model objects +* `glmmodels`: model objects 1. `TableRegressionModel{<: LinearModel}` fitted by `GLM.lm` 2. `TableRegressionModel{<: GeneralizedLinearModel}` fitted by `GLM.glm` If mutiple models are provided, they should be nested and the last one is the most complex. @@ -24,157 +24,113 @@ Return `AnovaResult{M, test, N}`. See [`AnovaResult`](@ref) for details. 1. `check`: allows to check if models are nested. Defalut value is true. Some checkers are not implemented now. 2. `isnested`: true when models are checked as nested (manually or automatically). Defalut value is false. -# Algorithm - -Vectors of models and the corresponding base models: - -`model = (model₁, ..., modelₙ)` - -`basemodel = (basemodel₁, ..., basemodelₙ)` - -When `n + 1` models are given, `model = (model₂, ..., modelₙ₊₁), basemodel = (model₁, ..., modelₙ)`. -When one model is given, `n` is the number of factors except for the factors used in the simplest models. The `basemodel` depends on the type of ANOVA. - -The variable `dev` and `basedev` are vectors that the ith elements are the sum of [squared deviance residuals (unit deviance)](https://en.wikipedia.org/wiki/Deviance_(statistics)) of `modelᵢ` and `basemodelᵢ`. -It is equivalent to the residual sum of squares for ordinary linear regression. - -The variable `Δdev` is the difference of `dev` and `basedev`, i.e. `Δdev = dev - basedev`. - -`dispersion` is the estimated dispersion (or scale) parameter for `modelₙ`'s distribution. - -For ordinary linear regression, -`dispersion² = residual sum of squares / degree of freedom of residuals` - -## F-test - -The attribute `deviance` of the returned object is `(Δdev₁, ..., Δdevₙ, NaN)`. - -F-value is a vector `F` such that `Fᵢ = Δdevᵢ / (dispersion² × dofᵢ)` where `dof` is difference of degree of freedom of each model and basemodel pairs. - - -## LRT - -The attribute `deviance` of the returned object is `dev`. - -The likelihood ratio is defined as `Δdev / dispersion²`. - !!! note For fitting new models and conducting anova at the same time, see [`anova_lm`](@ref) for `LinearModel`, [`anova_glm`](@ref) for `GeneralizedLinearModel`. """ -anova(::Val{:AnovaGLM}) +anova(::Type{<: GoodnessOfFit}, ::Vararg{TableRegressionModel{<: Union{LinearModel, GeneralizedLinearModel}}}) -anova(models::Vararg{TableRegressionModel{<: LinearModel, <: AbstractArray}, N}; - test::Type{T} = FTest, - kwargs...) where {N, T <: GoodnessOfFit} = +anova(models::Vararg{TableRegressionModel{<: LinearModel}}; + test::Type{<: GoodnessOfFit} = FTest, + kwargs...) = anova(test, models...; kwargs...) -anova(models::Vararg{TableRegressionModel{<: GeneralizedLinearModel, <: AbstractArray}, N}; - test::Type{T} = canonicalgoodnessoffit(models[1].model.rr.d), - kwargs...) where {N, T <: GoodnessOfFit} = +anova(models::Vararg{TableRegressionModel{<: GeneralizedLinearModel}}; + test::Type{<: GoodnessOfFit} = canonicalgoodnessoffit(models[1].model.rr.d), + kwargs...) = anova(test, models...; kwargs...) # ================================================================================================================== # ANOVA by F test # LinearModels +const TRM_LM = TableRegressionModel{<: Union{LinearModel, GeneralizedLinearModel{<: GLM.GlmResp{T, <: Normal, IdentityLink}}}} where T anova(::Type{FTest}, - trm::TableRegressionModel{<: Union{LinearModel, GeneralizedLinearModel{<: GLM.GlmResp{T, <: Normal, IdentityLink}}}}; - type::Int = 1, - kwargs...) where T = _anova_vcov(trm; type, kwargs...) - -function _anova_vcov(trm::TableRegressionModel{<: Union{LinearModel, GeneralizedLinearModel}}; - type::Int = 1, kwargs...) - type in [1, 2, 3] || throw(ArgumentError("Invalid type")) - - assign = trm.mm.assign - df = dof(assign) - filter!(>(0), df) - # May exist some floating point error from dof_residual - push!(df, round(Int, dof_residual(trm))) - df = tuple(df...) - if type in [1, 3] - # vcov methods - varβ = vcov(trm.model) - β = trm.model.pp.beta0 - if type == 1 - fs = abs2.(cholesky(Hermitian(inv(varβ))).U * β) - offset = first(assign) == 1 ? 0 : 1 - fstat = ntuple(last(assign) - offset) do fix - sum(fs[findall(==(fix + offset), assign)]) / df[fix] - end - else - # calculate block by block - offset = first(assign) == 1 ? 0 : 1 - fstat = ntuple(last(assign) - offset) do fix - select = findall(==(fix + offset), assign) - β[select]' * inv(varβ[select, select]) * β[select] / df[fix] - end + trm::TableRegressionModel{<: Union{LinearModel, GeneralizedLinearModel}}; + type::Int = 1, kwargs...) = anova(FTest, FullModel(trm, type, isnullable(trm.model), true); kwargs...) + +function anova(::Type{FTest}, aovm::FullModel{<: TRM_LM}) + f = formula(aovm.model) + assign = asgn(predictors(aovm)) + fullasgn = asgn(f) + df = tuple(dof_asgn(assign)...) + varβ = vcov(aovm.model.model) + β = aovm.model.model.pp.beta0 + offset = first(assign) + last(fullasgn) - last(assign) - 1 + if aovm.type == 1 + fs = abs2.(cholesky(Hermitian(inv(varβ))).U * β) + fstat = ntuple(last(fullasgn) - offset) do fix + sum(fs[findall(==(fix + offset), fullasgn)]) / df[fix] + end + elseif aovm.type == 2 + fstat = ntuple(last(fullasgn) - offset) do fix + select1 = sort!(collect(select_super_interaction(f.rhs, fix + offset))) + select2 = setdiff(select1, fix + offset) + select1 = findall(in(select1), fullasgn) + select2 = findall(in(select2), fullasgn) + (β[select1]' * inv(varβ[select1, select1]) * β[select1] - β[select2]' * inv(varβ[select2, select2]) * β[select2]) / df[fix] end - σ² = dispersion(trm.model, true) - devs = (fstat .* σ²..., σ²) .* df else - # refit methods - devs = deviances(trm; type, kwargs...) - msr = devs ./ df - fstat = msr[1:end - 1] ./ dispersion(trm.model, true) + # calculate block by block + fstat = ntuple(last(fullasgn) - offset) do fix + select = findall(==(fix + offset), fullasgn) + β[select]' * inv(varβ[select, select]) * β[select] / df[fix] + end end - pvalue = (ccdf.(FDist.(df[1:end - 1], last(df)), abs.(fstat))..., NaN) - AnovaResult{FTest}(trm, type, df, devs, (fstat..., NaN), pvalue, NamedTuple()) + σ² = dispersion(aovm.model.model, true) + devs = @. fstat * σ² * df + dfr = round(Int, dof_residual(aovm.model)) + pvalue = @. ccdf(FDist(df, dfr), abs(fstat)) + AnovaResult{FTest}(aovm, df, devs, fstat, pvalue, NamedTuple()) end - function anova(::Type{FTest}, - trm::TableRegressionModel{<: Union{LinearModel, GeneralizedLinearModel}}; - type::Int = 1, kwargs...) - type in [1, 2, 3] || throw(ArgumentError("Invalid type")) - - assign = trm.mm.assign - devs = deviances(trm; type, kwargs...) - df = dof(assign) - filter!(>(0), df) - # May exist some floating point error from dof_residual - push!(df, round(Int, dof_residual(trm))) - length(df) == length(devs) + 1 && popfirst!(df) - df = tuple(df...) + aovm::FullModel{<: TableRegressionModel{<: Union{LinearModel, GeneralizedLinearModel}}}; kwargs...) + devs = deviances(aovm; kwargs...) + assign = asgn(collect(predictors(aovm))) + #length(vdf) ≡ length(devs) + 1 && popfirst!(vdf) + df = tuple(dof_asgn(assign)...) msr = devs ./ df - fstat = msr[1:end - 1] ./ dispersion(trm.model, true) - pvalue = (ccdf.(FDist.(df[1:end - 1], last(df)), abs.(fstat))..., NaN) - AnovaResult{FTest}(trm, type, df, devs, (fstat..., NaN), pvalue, NamedTuple()) + fstat = msr ./ dispersion(aovm.model.model, true) + dfr = round(Int, dof_residual(aovm.model)) + pvalue = @. ccdf(FDist(df, dfr), abs(fstat)) + AnovaResult{FTest}(aovm, df, devs, fstat, pvalue, NamedTuple()) end # ---------------------------------------------------------------------------------------- # ANOVA for genaralized linear models # λ = -2ln(𝓛(̂θ₀)/𝓛(θ)) ~ χ²ₙ , n = difference of predictors +anova(::Type{LRT}, + trm::TableRegressionModel{<: Union{LinearModel, GeneralizedLinearModel}}; + type::Int = 1, kwargs...) = anova(LRT, FullModel(trm, type, isnullable(trm.model), true); kwargs...) function anova(::Type{LRT}, - trm::TableRegressionModel{<: Union{LinearModel, GeneralizedLinearModel}}) - Δdev = deviances(trm, type = 1) - df = dof(trm.mm.assign) - filter!(>(0), df) - isnullable(trm.model) || popfirst!(df) - df = tuple(df...) + aovm::FullModel{<: TableRegressionModel{<: Union{LinearModel, GeneralizedLinearModel}}}) + Δdev = deviances(aovm) + assign = asgn(collect(predictors(aovm))) + #isnullable(trm.model) || popfirst!(vdf) + df = tuple(dof_asgn(assign)...) # den = last(ss) / (nobs(trm) - dof(trm) + 1) # lrstat = ss[1:end - 1] ./ den - σ² = dispersion(trm.model, true) - lrstat = Δdev[1:end - 1] ./ σ² + σ² = dispersion(aovm.model.model, true) + lrstat = Δdev ./ σ² n = length(lrstat) - dev = collect(Δdev) + dev = push!(collect(Δdev), deviance(aovm.model)) i = n while i > 0 dev[i] += dev[i + 1] i -= 1 end - pval = ccdf.(Chisq.(df), abs.(lrstat)) - AnovaResult{LRT}(trm, 1, df, tuple(dev[2:end]...), lrstat, pval, NamedTuple()) + pval = @. ccdf(Chisq(df), abs(lrstat)) + AnovaResult{LRT}(aovm, df, tuple(dev[2:end]...), lrstat, pval, NamedTuple()) end # ================================================================================================================= # Nested models function anova(::Type{FTest}, - trms::Vararg{TableRegressionModel{<: Union{LinearModel, GeneralizedLinearModel}}}; + trms::Vararg{M}; check::Bool = true, - isnested::Bool = false) + isnested::Bool = false) where {M <: TableRegressionModel{<: Union{LinearModel, GeneralizedLinearModel}}} df = dof.(trms) ord = sortperm(collect(df)) df = df[ord] @@ -183,20 +139,20 @@ function anova(::Type{FTest}, # May exist some floating point error from dof_residual # check comparable and nested check && @warn "Could not check whether models are nested: results may not be meaningful" - ftest_nested(trms, df, dfr, deviance.(trms), dispersion(last(trms).model, true)) + ftest_nested(NestedModels{M}(trms), df, dfr, deviance.(trms), dispersion(last(trms).model, true)) end function anova(::Type{LRT}, - trms::Vararg{<: TableRegressionModel}; + trms::Vararg{M}; check::Bool = true, - isnested::Bool = false) + isnested::Bool = false) where {M <: TableRegressionModel{<: Union{LinearModel, GeneralizedLinearModel}}} df = dof.(trms) ord = sortperm(collect(df)) trms = trms[ord] df = df[ord] # check comparable and nested check && @warn "Could not check whether models are nested: results may not be meaningful" - lrt_nested(trms, df, deviance.(trms), dispersion(last(trms).model, true)) + lrt_nested(NestedModels{M}(trms), df, deviance.(trms), dispersion(last(trms).model, true)) end @@ -268,16 +224,4 @@ function anova(test::Type{<: GoodnessOfFit}, ::Type{GeneralizedLinearModel}, X, kwargs...) trm = glm(X, y, d, l; kwargs...) anova(test, trm; type, kwargs... ) -end - - -""" - GLM.glm(f::FormulaTerm, df::DataFrame, d::Binomial, l::GLM.Link, args...; kwargs...) - -Automatically transform dependent variable into 0/1 for family `Binomial`. -""" -GLM.glm(f::FormulaTerm, df::DataFrame, d::Binomial, l::Link, args...; kwargs...) = - fit(GeneralizedLinearModel, f, - combine(df, :, f.lhs.sym => ByRow(==(last(unique(df[!, f.lhs.sym])))) => f.lhs.sym), - d, l, args...; kwargs...) - +end \ No newline at end of file diff --git a/src/fit.jl b/src/fit.jl index b522d1c..36e24de 100644 --- a/src/fit.jl +++ b/src/fit.jl @@ -1,89 +1,31 @@ # ========================================================================================================== # Backend funcion -function deviances(trm::TableRegressionModel{<: Union{LinearModel, GeneralizedLinearModel}, <: AbstractArray{T}}; - type::Int = 1, kwargs...) where {T <: BlasReal} - isa(trm.model.pp, DensePredChol) || throw( +function deviances(aovm::FullModel{<: TableRegressionModel{<: Union{LinearModel, GeneralizedLinearModel}}}; kwargs...) + isa(aovm.model.model.pp, DensePredChol) || throw( ArgumentError("Methods for other PredChol types is not implemented; use model with DensesPredChol instead.")) - - assign, f = trm.mm.assign, formula(trm).rhs - # Determine null model by link function - ## model with 0 factors as null - ## if 0 factor is not allowed, - ### eg, ~ 1 + A + B + ... - ### ~ 1 as null - ### eg, ~ 0 + A + A & B - ### ~ A as null - # start value - start = isnullable(trm.model) ? 0 : first(assign) - err1 = ArgumentError("Invalid set of model specification for ANOVA; not enough variables provided.") - err2 = ArgumentError("Invalid set of model specification for ANOVA; try adding variables without zeros.") - if type == 1 - todel = union(start, assign) - # ~ 0 + A, ~ 1 - start > 0 && (length(todel) > 1 || throw(err1)) - devs = zeros(T, length(todel) + 1) - @inbounds for id in eachindex(todel) - devs[id] = deviance(trm, @view(todel[id + 1:end]); kwargs...) - end - devs = -diff(devs) - elseif type == 2 - # Problems with Fullrank promotion occur - ## ~ 0 + A (categorical) + B (categorical) - ## A is aliased with 1, but 1 is not in formula. - ## A is promoted to full rank. - ## B is also aliased with 1, but it is not promoted. - ## Doing type 2 and 3 anova cause problem - ## when deleting promoted categorical variables. - # Some warning or checks? - todel = unique(assign) - if start > 0 - # ~ 0 + A + A & B, all terms are related to A, ~ A as null - selectcoef(f, first(todel)) == Set(todel) && popfirst!(todel) - # ~ 0 + A, ~ 1 - if isempty(todel) - throw(err1) - # ~ 0 + 2 categorical - # Do not inspect InteractionTerm - elseif length(todel) == 2 - all(t -> isa(t, CategoricalTerm), f.terms[todel]) && throw(err2) - end - end - devs = zeros(T, length(todel) + 1) + trm = aovm.model + if aovm.type ≡ 1 + ids = union(first(aovm.pred_id) - 1, aovm.pred_id) + devs = -diff(map(i -> deviance(trm, @view(ids[i + 1:end]); kwargs...), eachindex(ids))) + elseif aovm.type ≡ 2 + devs = zeros(Float64, length(aovm.pred_id)) # cache fitted - dict_devs = Dict{Set{Int}, T}() - @inbounds for (id, del) in enumerate(todel) - delcoef = selectcoef(f, del) - dev1 = get(dict_devs, delcoef, (push!(dict_devs, delcoef => deviance(trm, delcoef; kwargs...)); dict_devs[delcoef])) + dict_devs = Dict{Set{Int}, Float64}() + f = formula(aovm.model).rhs + @inbounds for (id, del) in enumerate(aovm.pred_id) + delcoef = select_super_interaction(f, del) + dev1 = get!(dict_devs, delcoef, deviance(trm, delcoef; kwargs...)) delete!(delcoef, del) - dev2 = get(dict_devs, delcoef, (push!(dict_devs, delcoef => deviance(trm, delcoef; kwargs...)); dict_devs[delcoef])) + dev2 = get!(dict_devs, delcoef, deviance(trm, delcoef; kwargs...)) devs[id] = dev1 - dev2 end - devs[end] = deviance(trm, 0; kwargs...) else - todel = unique(assign) - if start > 0 - # ~ 0 + A, ~ 1 - if length(todel) <= 1 - throw(err1) - # ~ 0 + 2 categorical or - # ~ 1 + 1 categorical - # Do not inspect InteractionTerm - elseif length(todel) == 2 - if isa(f.terms[todel][1], Union{CategoricalTerm, InterceptTerm{true}}) - isa(f.terms[todel][2], CategoricalTerm) && throw(err2) - end - end - end - devs = zeros(T, length(todel) + 1) - @inbounds for (id, del) in enumerate(todel) - devs[id] = deviance(trm, del; kwargs...) - end devr = deviance(trm, 0; kwargs...) - devs .-= devr - devs[end] = devr + devs = map(del -> deviance(trm, del; kwargs...) - devr, aovm.pred_id) end # every method end with deviance(trm, 0; kwargs...), ie full model fit. + deviance(trm, 0; kwargs...) installbeta!(trm.model.pp) # ensure model unchanged tuple(devs...) end @@ -275,14 +217,14 @@ function linpred!(out, p::LinPred, X::Matrix, f::Real = 1.0) end # Create nestedmodels -@doc """ +""" nestedmodels(trm::TableRegressionModel{<: LinearModel}; null::Bool = true, ) nestedmodels(trm::TableRegressionModel{<: GeneralizedLinearModel}; null::Bool = true, ) nestedmodels(::Type{LinearModel}, formula, data; null::Bool = true, ) nestedmodels(::Type{GeneralizedLinearModel}, formula, data, distr::UnivariateDistribution, link::Link = canonicallink(d); null::Bool = true, ) -Generate nested models from a model or formula and data. +Generate nested nested models `NestedModels` from a model or formula and data. The null model will be a model with at least one factor (including intercept) if the link function does not allow factors to be 0 (factors in denominators) or the keyword argument `null` is false (default value is true). * `InverseLink` for `Gamma` @@ -290,11 +232,9 @@ The null model will be a model with at least one factor (including intercept) if * `LinearModel` fitted with `CholeskyPivoted` when `dropcollinear = true` Otherwise, it will be an empty model. """ -nestedmodels(::Val{:AnovaGLM}) - -function nestedmodels(trm::TableRegressionModel{<: LinearModel}; null::Bool = true, kwargs...) - null = null && isnullable(trm.model.pp.chol) - assign = unique(trm.mm.assign) +function nestedmodels(trm::M; null::Bool = true, kwargs...) where {M <: TableRegressionModel{<: LinearModel}} + null = null && isnullable(trm.model.pp.chol) || (@warn "Empty model is not allowed due to `CholeskyPivoted`"; false) + assign = unique(asgn(formula(trm))) pop!(assign) dropcollinear, range = null ? (false, union(0, assign)) : (true, assign) wts = trm.model.rr.wts @@ -304,11 +244,11 @@ function nestedmodels(trm::TableRegressionModel{<: LinearModel}; null::Bool = tr y = response(mf) TableRegressionModel(fit(trm.mf.model, mm.m, y; wts, dropcollinear, kwargs...), mf, mm) end - (trms..., trm) + NestedModels{M}(trms..., trm) end -function nestedmodels(trm::TableRegressionModel{<: GeneralizedLinearModel}; null::Bool = true, kwargs...) - null = null && isnullable(trm.model) +function nestedmodels(trm::M; null::Bool = true, kwargs...) where {M <: TableRegressionModel{<: GeneralizedLinearModel}} + null = null && isnullable(trm.model) && isnullable(trm.model.pp.chol) || (@warn "Empty model is not allowed due to `CholeskyPivoted`"; false) distr = trm.model.rr.d link = typeof(trm.model.rr).parameters[3]() wts = trm.model.rr.wts @@ -322,13 +262,12 @@ function nestedmodels(trm::TableRegressionModel{<: GeneralizedLinearModel}; null y = response(mf) TableRegressionModel(fit(trm.mf.model, mm.m, y, distr, link; wts, offset, kwargs...), mf, mm) end - (trms..., trm) + NestedModels{M}(trms..., trm) end - -nestedmodels(::Type{LinearModel}, formula, data; null::Bool = true, kwargs...) = +nestedmodels(::Type{LinearModel}, formula::FormulaTerm, data; null::Bool = true, kwargs...) = nestedmodels(lm(formula, data; kwargs...); null, kwargs...) -nestedmodels(::Type{GeneralizedLinearModel}, formula, data, +nestedmodels(::Type{GeneralizedLinearModel}, formula::FormulaTerm, data, distr::UnivariateDistribution, link::Link = canonicallink(distr); null::Bool = true, kwargs...) = nestedmodels(glm(formula, data, distr, link; kwargs...); null, kwargs...) diff --git a/src/io.jl b/src/io.jl index 443a29d..9ebd7c2 100644 --- a/src/io.jl +++ b/src/io.jl @@ -1,78 +1,83 @@ # ====================================================================================================== # IO -import AnovaBase: AnovaTable, anovatable -function coefnames(aov::AnovaResult{T, FTest}) where {T <: TableRegressionModel{<: Union{LinearModel, GeneralizedLinearModel}}} - v = coefnames(aov.model, Val(:anova)) - push!(v, "(Residuals)") - v -end - -coefnames(trm::TableRegressionModel{<: Union{LinearModel, GeneralizedLinearModel}}, anova::Val{:anova}) = - vectorize(coefnames(trm.mf.f.rhs, anova)) - # anovatable api -function anovatable(aov::AnovaResult{<: TableRegressionModel{<: LinearModel}, FTest}; kwargs...) - AnovaTable(hcat(vectorize.((dof(aov), deviance(aov), deviance(aov) ./ dof(aov), teststat(aov), pval(aov)))...), +function anovatable(aov::AnovaResult{<: FullModel{<: TableRegressionModel{<: LinearModel}, N}, FTest}; rownames = push!(prednames(aov), "(Residuals)") + ) where N + dfr = round(Int, dof_residual(aov.anovamodel.model)) + σ² = dispersion(aov.anovamodel.model.model, true) + AnovaTable([ + [dof(aov)..., dfr], + [deviance(aov)..., dfr * σ²], + [(deviance(aov) ./ dof(aov))..., σ²], + [teststat(aov)..., NaN], + [pval(aov)..., NaN] + ], ["DOF", "Exp.SS", "Mean Square", "F value","Pr(>|F|)"], - ["x$i" for i in eachindex(pval(aov))], 5, 4) + rownames, 5, 4) end -function anovatable(aov::AnovaResult{<: TableRegressionModel{<: GeneralizedLinearModel}, FTest}; kwargs...) - AnovaTable(hcat(vectorize.((dof(aov), deviance(aov), deviance(aov) ./ dof(aov), teststat(aov), pval(aov)))...), +function anovatable(aov::AnovaResult{<: FullModel{<: TableRegressionModel{<: GeneralizedLinearModel}, N}, FTest}; rownames = push!(prednames(aov), "(Residuals)")) where N + dfr = round(Int, dof_residual(aov.anovamodel.model.model)) + σ² = dispersion(aov.anovamodel.model.model, true) + AnovaTable([ + [dof(aov)..., dfr], + [deviance(aov)..., dfr * σ²], + [(deviance(aov) ./ dof(aov))..., σ²], + [teststat(aov)..., NaN], + [pval(aov)..., NaN] + ], ["DOF", "ΔDeviance", "Mean ΔDev", "F value","Pr(>|F|)"], - ["x$i" for i in eachindex(pval(aov))], 5, 4) + rownames, 5, 4) end -function anovatable(aov::AnovaResult{<: TableRegressionModel{<: LinearModel}, LRT}; kwargs...) +function anovatable(aov::AnovaResult{<: FullModel{<: TableRegressionModel{<: LinearModel}}, LRT}; rownames = prednames(aov)) AnovaTable(hcat(vectorize.((dof(aov), deviance(aov), teststat(aov), pval(aov)))...), ["DOF", "Res.SS", "χ²", "Pr(>|χ²|)"], - ["x$i" for i in eachindex(pval(aov))], 4, 3) + rownames, 4, 3) end -function anovatable(aov::AnovaResult{<: TableRegressionModel{<: GeneralizedLinearModel}, LRT}; kwargs...) +function anovatable(aov::AnovaResult{<: FullModel{<: TableRegressionModel{<: GeneralizedLinearModel}}, LRT}; rownames = prednames(aov)) AnovaTable(hcat(vectorize.((dof(aov), deviance(aov), teststat(aov), pval(aov)))...), ["DOF", "Deviance", "χ²", "Pr(>|χ²|)"], - ["x$i" for i in eachindex(pval(aov))], 4, 3) + rownames, 4, 3) end -function anovatable(aov::AnovaResult{<: Tuple, FTest}, - modeltype1::Type{<: TableRegressionModel{<: LinearModel}}, - modeltype2::Type{<: TableRegressionModel{<: LinearModel}}; - kwargs...) +function anovatable(aov::AnovaResult{NestedModels{<: TableRegressionModel{<: LinearModel}, N}, FTest}; + rownames = "x" .* string.(1:N)) where N - rs = r2.(aov.model) + rs = r2.(aov.anovamodel.model) Δrs = _diff(rs) - AnovaTable(hcat(vectorize.(( + AnovaTable([ dof(aov), [NaN, _diff(dof(aov))...], - dof_residual(aov), + round(Int, dof_residual(aov.anovamodel.model)), rs, [NaN, Δrs...], deviance(aov), [NaN, _diffn(deviance(aov))...], teststat(aov), pval(aov) - ))...), + ], ["DOF", "ΔDOF", "Res.DOF", "R²", "ΔR²", "Res.SS", "Exp.SS", "F value", "Pr(>|F|)"], - ["$i" for i in eachindex(pval(aov))], 9, 8) + rownames, 9, 8) end -function anovatable(aov::AnovaResult{<: Tuple, LRT}, - modeltype1::Type{<: TableRegressionModel{<: LinearModel}}, - modeltype2::Type{<: TableRegressionModel{<: LinearModel}}; - kwargs...) - rs = r2.(aov.model) +function anovatable(aov::AnovaResult{NestedModels{<: TableRegressionModel{<: LinearModel}, N}, LRT}; + rownames = "x" .* string.(1:N)) where N + + rs = r2.(aov.anovamodel.model) Δrs = _diff(rs) - AnovaTable(hcat(vectorize.(( + AnovaTable([ dof(aov), [NaN, _diff(dof(aov))...], - dof_residual(aov), + round(Int, dof_residual(aov.anovamodel.model)), rs, [NaN, Δrs...], deviance(aov), + [NaN, _diffn(deviance(aov))...], teststat(aov), pval(aov) - ))...), + ], ["DOF", "ΔDOF", "Res.DOF", "R²", "ΔR²", "Res.SS", "χ²", "Pr(>|χ²|)"], - ["$i" for i in eachindex(pval(aov))], 8, 7) + rownames, 8, 7) end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 8923c57..fb8bf77 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -45,7 +45,7 @@ isapprox(x::NTuple{N, Float64}, y::NTuple{N, Float64}, atol::NTuple{N, Float64} @testset "AnovaGLM.jl" begin @testset "LinearModel" begin @testset "Simple linear regression" begin - lm0, lm1, lm2, lm3, lm4 = nestedmodels(LinearModel, @formula(SepalLength ~ SepalWidth * Species), iris, dropcollinear = false) + lm0, lm1, lm2, lm3, lm4 = nestedmodels(LinearModel, @formula(SepalLength ~ SepalWidth * Species), iris, dropcollinear = false).model global aov3 = anova(lm4, type = 3) global aov2 = anova_lm(@formula(SepalLength ~ SepalWidth * Species), iris, type = 2) global aov1 = anova(lm4) @@ -61,29 +61,26 @@ isapprox(x::NTuple{N, Float64}, y::NTuple{N, Float64}, atol::NTuple{N, Float64} @test !(@test_error(test_show(aovlf))) @test anova_type(aov1) == 1 @test nobs(aov2) == ft.nobs - @test dof(aov3) == (1, 1, 2, 2, 144) - @test dof_residual(aov1) == 144 + @test dof(aov3) == (1, 1, 2, 2) + @test all(dof_residual(aov1) .== 144) @test isapprox(deviance(aovf)[2:end], ft.ssr) - @test isapprox(deviance(aov1)[1:end - 1], AnovaBase._diffn(deviance(aovf))) - @test isapprox(filter(!isnan, teststat(aov1)), filter(!isnan, teststat(aovf))) - @test isapprox(filter(!isnan, teststat(aov2)), (26485.300978452644, 56.637879295914324, 188.10911669921464, 0.4064420847481629)) - @test isapprox(filter(!isnan, teststat(aov1lr)), filter(!isnan, teststat(aovlr))) - @test isapprox(coef(lm4), coef(aov3.model)) - @test coefnames(lm4, Val(:anova)) == ["(Intercept)", "SepalWidth", "Species", "SepalWidth & Species"] - @test collect(coefnames(formula(lm1), Val(:anova))) == coefnames((formula(lm1).lhs, formula(lm1).rhs), Val(:anova)) - + @test isapprox(deviance(aov1), AnovaBase._diffn(deviance(aovf))) + @test isapprox(teststat(aov1), teststat(aovf)[2:end]) + @test isapprox(teststat(aov2), (26485.300978452644, 56.637879295914324, 188.10911669921464, 0.4064420847481629)) + @test isapprox(teststat(aov1lr), teststat(aovlr)[2:end]) + @test isapprox(coef(lm4), coef(aov3.anovamodel.model)) end @testset "Linear regression with frequency weights" begin wlm1, wlm2 = nestedmodels(LinearModel, @formula(Cost / Claims ~ Insured + Merit), insurance, - wts = insurance.Claims ./ sum(insurance.Claims) .* length(insurance.Claims)) + wts = insurance.Claims ./ sum(insurance.Claims) .* length(insurance.Claims)).model global aov = anova(wlm2, type = 2) global aovf = anova(wlm1, wlm2) @test !(@test_error test_show(aov)) @test !(@test_error test_show(aovf)) - @test nobs(aov) == sum(aov.model.model.rr.wts) - @test dof(aov) == (1, 1, 18) - @test isapprox(filter(!isnan, teststat(aov))[2:end], filter(!isnan, teststat(aovf))) + @test nobs(aov) == sum(aov.anovamodel.model.model.rr.wts) + @test dof(aov) == (1, 1) + @test isapprox(teststat(aov)[2:end], teststat(aovf)[2:end]) end end @@ -92,51 +89,50 @@ isapprox(x::NTuple{N, Float64}, y::NTuple{N, Float64}, atol::NTuple{N, Float64} global aov = anova_glm(FTest, @formula(Cost / Claims ~ 0 + Insured + Merit), insurance, Gamma(), wts = insurance.Claims ./ sum(insurance.Claims) .* length(insurance.Claims), type = 2) @test !(@test_error test_show(aov)) - @test @test_error anova_glm(@formula(Cost / Claims ~ Merit + Class), insurance, Gamma(), type = 3) - @test @test_error anova_glm(@formula(Cost / Claims ~ 1 + Class), insurance, Gamma(), type = 3) + #@test @test_error anova_glm(@formula(Cost / Claims ~ Merit + Class), insurance, Gamma(), type = 3) # should be ok without intercept + #@test @test_error anova_glm(@formula(Cost / Claims ~ 1 + Class), insurance, Gamma(), type = 3) # should be ok without intercept @test @test_error anova_glm(@formula(Cost / Claims ~ 0 + Class), insurance, Gamma(), type = 3) @test @test_error anova_glm(@formula(Cost / Claims ~ 0 + Class), insurance, Gamma(), type = 2) - @test nobs(aov) == nobs(aov.model) - @test dof(aov) == (1, 4, 15) - @test isapprox(filter(!isnan, deviance(aov)), AnovaGLM.deviances(aov.model, type = anova_type(aov))) - @test isapprox(filter(!isnan, teststat(aov)), (6.163653078060339, 2802.3252386290533)) - @test isapprox(filter(!isnan, pval(aov)), (0.025357816283854216, 2.3626325930920802e-21)) + @test nobs(aov) == nobs(aov.anovamodel.model) + @test dof(aov) == (1, 4) + @test isapprox(deviance(aov), AnovaGLM.deviances(FullModel(aov.anovamodel.model, anova_type(aov), false, false))) + @test isapprox(teststat(aov), (6.163653078060339, 2802.3252386290533)) + @test isapprox(pval(aov), (0.025357816283854216, 2.3626325930920802e-21)) end @testset "NegativeBinomial regression" begin global aov = anova_glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, NegativeBinomial(2.0), LogLink(), type = 3) @test !(@test_error test_show(aov)) - @test nobs(aov) == nobs(aov.model) - @test dof(aov) == (1, 1, 1, 3, 1, 139) + @test nobs(aov) == nobs(aov.anovamodel.model) + @test dof(aov) == (1, 1, 1, 3, 1) @test anova_test(aov) == FTest - @test isapprox(deviance(aov), AnovaGLM.deviances(aov.model, type = 3)) - @test isapprox(filter(!isnan, teststat(aov)), (227.97422313423752, 13.180680587112887, 0.2840882132754838, 4.037856229143672, 2.6138558930314595)) - @test isapprox(filter(!isnan, pval(aov)), (4.251334236308285e-31, 0.00039667906264309605, 0.5948852219093326, 0.008659894572621351, 0.10820118567663468)) - @test coefnames(aov.model, Val(:anova)) == ["(Intercept)", "Eth", "Sex", "Age", "Lrn"] + @test isapprox(deviance(aov), AnovaGLM.deviances(FullModel(aov.anovamodel.model, 3, true, true))) + @test isapprox(teststat(aov), (227.97422313423752, 13.180680587112887, 0.2840882132754838, 4.037856229143672, 2.6138558930314595)) + @test isapprox(pval(aov), (4.251334236308285e-31, 0.00039667906264309605, 0.5948852219093326, 0.008659894572621351, 0.10820118567663468)) end @testset "Poisson regression" begin - gmp = nestedmodels(GeneralizedLinearModel, @formula(num_awards ~ prog * math), sim, Poisson()) + gmp = nestedmodels(GeneralizedLinearModel, @formula(num_awards ~ prog * math), sim, Poisson()).model global aov = anova(gmp...) - lr = lrtest(gmp[2:end]...) + lr = lrtest(gmp...) @test !(@test_error test_show(aov)) @test first(nobs(aov)) == lr.nobs - @test dof(aov)[2:end] == lr.dof + @test dof(aov) == lr.dof @test anova_test(aov) == LRT - @test isapprox(deviance(aov)[2:end], lr.deviance) - @test isapprox(filter(!isnan, pval(aov))[2:end], filter(!isnan, lr.pval)) + @test isapprox(deviance(aov), lr.deviance) + @test isapprox(pval(aov)[2:end], lr.pval[2:end]) end @testset "Logit regression" begin gml = glm(@formula(AM ~ Cyl + HP + WT), mtcars, Binomial(), LogitLink()) global aov = anova(gml) - lr = lrtest(nestedmodels(gml)[2:end]...) + lr = lrtest(nestedmodels(gml).model...) @test !(@test_error test_show(aov)) @test nobs(aov) == lr.nobs @test dof(aov)[2:end] == AnovaBase._diff(lr.dof) @test isapprox(deviance(aov), lr.deviance) - @test isapprox(AnovaGLM.deviances(aov.model)[2:end], AnovaBase._diffn((deviance(aov)..., 0.0))) - @test isapprox(filter(!isnan, pval(aov))[2:end], filter(!isnan, lr.pval)) + @test isapprox(AnovaGLM.deviances(FullModel(aov.anovamodel.model, 1, true, true))[2:end], AnovaBase._diffn((deviance(aov)..., 0.0))[1:end - 1]) + @test isapprox(pval(aov)[2:end], lr.pval[2:end]) end @testset "Probit regression" begin @@ -157,9 +153,9 @@ isapprox(x::NTuple{N, Float64}, y::NTuple{N, Float64}, atol::NTuple{N, Float64} global aov = anova(gmi) @test !(@test_error test_show(aov)) @test nobs(aov) == nobs(gmi) - @test dof(aov) == (1, 2, 2, 144) - @test isapprox(filter(!isnan, teststat(aov)), (8.172100334461327, 217.34941014323272, 1.8933247444892272)) - @test isapprox(filter(!isnan, pval(aov)), (0.004885873691352542, 3.202701170052312e-44, 0.15429963193830074)) + @test dof(aov) == (1, 2, 2) + @test isapprox(teststat(aov), (8.172100334461327, 217.34941014323272, 1.8933247444892272)) + @test isapprox(pval(aov), (0.004885873691352542, 3.202701170052312e-44, 0.15429963193830074)) end end @testset "Miscellaneous" begin From f3b776aee164a638638ec958f958b054bc65e5f7 Mon Sep 17 00:00:00 2001 From: yufongpeng <54415349+yufongpeng@users.noreply.github.com> Date: Thu, 19 Jan 2023 02:58:43 +0800 Subject: [PATCH 4/6] Fix floating point errors --- test/runtests.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index fb8bf77..81710d6 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -93,7 +93,7 @@ isapprox(x::NTuple{N, Float64}, y::NTuple{N, Float64}, atol::NTuple{N, Float64} #@test @test_error anova_glm(@formula(Cost / Claims ~ 1 + Class), insurance, Gamma(), type = 3) # should be ok without intercept @test @test_error anova_glm(@formula(Cost / Claims ~ 0 + Class), insurance, Gamma(), type = 3) @test @test_error anova_glm(@formula(Cost / Claims ~ 0 + Class), insurance, Gamma(), type = 2) - @test nobs(aov) == nobs(aov.anovamodel.model) + @test nobs(aov) == round(Int, nobs(aov.anovamodel.model)) @test dof(aov) == (1, 4) @test isapprox(deviance(aov), AnovaGLM.deviances(FullModel(aov.anovamodel.model, anova_type(aov), false, false))) @test isapprox(teststat(aov), (6.163653078060339, 2802.3252386290533)) @@ -103,7 +103,7 @@ isapprox(x::NTuple{N, Float64}, y::NTuple{N, Float64}, atol::NTuple{N, Float64} @testset "NegativeBinomial regression" begin global aov = anova_glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, NegativeBinomial(2.0), LogLink(), type = 3) @test !(@test_error test_show(aov)) - @test nobs(aov) == nobs(aov.anovamodel.model) + @test nobs(aov) == round(Int, nobs(aov.anovamodel.model)) @test dof(aov) == (1, 1, 1, 3, 1) @test anova_test(aov) == FTest @test isapprox(deviance(aov), AnovaGLM.deviances(FullModel(aov.anovamodel.model, 3, true, true))) @@ -152,7 +152,7 @@ isapprox(x::NTuple{N, Float64}, y::NTuple{N, Float64}, atol::NTuple{N, Float64} gmi = glm(@formula(SepalLength ~ SepalWidth * Species), iris, InverseGaussian()) global aov = anova(gmi) @test !(@test_error test_show(aov)) - @test nobs(aov) == nobs(gmi) + @test nobs(aov) == round(Int, nobs(gmi)) @test dof(aov) == (1, 2, 2) @test isapprox(teststat(aov), (8.172100334461327, 217.34941014323272, 1.8933247444892272)) @test isapprox(pval(aov), (0.004885873691352542, 3.202701170052312e-44, 0.15429963193830074)) From d524c38bda1af04d2769953fb6152214e953c1bf Mon Sep 17 00:00:00 2001 From: yufongpeng <54415349+yufongpeng@users.noreply.github.com> Date: Thu, 19 Jan 2023 03:04:13 +0800 Subject: [PATCH 5/6] Fix another floating point error --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 81710d6..52951be 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -78,7 +78,7 @@ isapprox(x::NTuple{N, Float64}, y::NTuple{N, Float64}, atol::NTuple{N, Float64} global aovf = anova(wlm1, wlm2) @test !(@test_error test_show(aov)) @test !(@test_error test_show(aovf)) - @test nobs(aov) == sum(aov.anovamodel.model.model.rr.wts) + @test nobs(aov) == round(Int, sum(aov.anovamodel.model.model.rr.wts)) @test dof(aov) == (1, 1) @test isapprox(teststat(aov)[2:end], teststat(aovf)[2:end]) end From 6dfa18641125d77ce0a36a1272c0965186d2148b Mon Sep 17 00:00:00 2001 From: yufongpeng <54415349+yufongpeng@users.noreply.github.com> Date: Thu, 19 Jan 2023 17:01:13 +0800 Subject: [PATCH 6/6] Bug fix --- src/AnovaGLM.jl | 2 +- src/anova.jl | 6 +++--- src/io.jl | 13 ++++++------- test/runtests.jl | 1 - 4 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/AnovaGLM.jl b/src/AnovaGLM.jl index 933932c..8a8a272 100644 --- a/src/AnovaGLM.jl +++ b/src/AnovaGLM.jl @@ -16,7 +16,7 @@ import GLM: glm, updateμ!, cholfactors, # other FP, BlasReal, Link, dispersion, deviance, dof, dof_residual, nobs -using AnovaBase: select_super_interaction, extract_contrasts, canonicalgoodnessoffit, subformula, predictors, dof_asgn, lrt_nested, ftest_nested +using AnovaBase: select_super_interaction, extract_contrasts, canonicalgoodnessoffit, subformula, predictors, dof_asgn, lrt_nested, ftest_nested, _diff, _diffn import AnovaBase: anova, nestedmodels, anovatable, prednames export anova_lm, anova_glm diff --git a/src/anova.jl b/src/anova.jl index 5fb62fd..cff44e8 100644 --- a/src/anova.jl +++ b/src/anova.jl @@ -52,7 +52,7 @@ function anova(::Type{FTest}, aovm::FullModel{<: TRM_LM}) f = formula(aovm.model) assign = asgn(predictors(aovm)) fullasgn = asgn(f) - df = tuple(dof_asgn(assign)...) + df = dof_asgn(assign) varβ = vcov(aovm.model.model) β = aovm.model.model.pp.beta0 offset = first(assign) + last(fullasgn) - last(assign) - 1 @@ -88,7 +88,7 @@ function anova(::Type{FTest}, devs = deviances(aovm; kwargs...) assign = asgn(collect(predictors(aovm))) #length(vdf) ≡ length(devs) + 1 && popfirst!(vdf) - df = tuple(dof_asgn(assign)...) + df = dof_asgn(assign) msr = devs ./ df fstat = msr ./ dispersion(aovm.model.model, true) dfr = round(Int, dof_residual(aovm.model)) @@ -108,7 +108,7 @@ function anova(::Type{LRT}, Δdev = deviances(aovm) assign = asgn(collect(predictors(aovm))) #isnullable(trm.model) || popfirst!(vdf) - df = tuple(dof_asgn(assign)...) + df = dof_asgn(assign) # den = last(ss) / (nobs(trm) - dof(trm) + 1) # lrstat = ss[1:end - 1] ./ den σ² = dispersion(aovm.model.model, true) diff --git a/src/io.jl b/src/io.jl index 9ebd7c2..bc6554b 100644 --- a/src/io.jl +++ b/src/io.jl @@ -42,15 +42,15 @@ function anovatable(aov::AnovaResult{<: FullModel{<: TableRegressionModel{<: Gen rownames, 4, 3) end -function anovatable(aov::AnovaResult{NestedModels{<: TableRegressionModel{<: LinearModel}, N}, FTest}; - rownames = "x" .* string.(1:N)) where N +function anovatable(aov::AnovaResult{<: NestedModels{<: TableRegressionModel{<: LinearModel}, N}, FTest}; + rownames = string.(1:N)) where N rs = r2.(aov.anovamodel.model) Δrs = _diff(rs) AnovaTable([ dof(aov), [NaN, _diff(dof(aov))...], - round(Int, dof_residual(aov.anovamodel.model)), + repeat([round(Int, dof_residual(last(aov.anovamodel.model)))], N), rs, [NaN, Δrs...], deviance(aov), @@ -62,19 +62,18 @@ function anovatable(aov::AnovaResult{NestedModels{<: TableRegressionModel{<: Lin rownames, 9, 8) end -function anovatable(aov::AnovaResult{NestedModels{<: TableRegressionModel{<: LinearModel}, N}, LRT}; - rownames = "x" .* string.(1:N)) where N +function anovatable(aov::AnovaResult{<: NestedModels{<: TableRegressionModel{<: LinearModel}, N}, LRT}; + rownames = string.(1:N)) where N rs = r2.(aov.anovamodel.model) Δrs = _diff(rs) AnovaTable([ dof(aov), [NaN, _diff(dof(aov))...], - round(Int, dof_residual(aov.anovamodel.model)), + repeat([round(Int, dof_residual(last(aov.anovamodel.model)))], N), rs, [NaN, Δrs...], deviance(aov), - [NaN, _diffn(deviance(aov))...], teststat(aov), pval(aov) ], diff --git a/test/runtests.jl b/test/runtests.jl index 52951be..c93517b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -9,7 +9,6 @@ macro test_error(x) $x false catch e - @error e true end end