Skip to content

Commit

Permalink
Merge pull request #4 from smartdata-analysis-and-statistics/Valentijn
Browse files Browse the repository at this point in the history
1. Fixed bug and added functionality to metapred and fixed tests.
2. Added tests for multiple generalizability and performance functions in metapred. This should prevent the fixed bug from reoccuring.
3. Fixed test that raised an error when plotting valmeta. Note that the statistical issue remains, just the test is fixed.
4. Split tests for ccalc and oecalc into three. Added tolerance level, as the estimates are not expected to be identical
  • Loading branch information
VMTdeJong committed Feb 19, 2024
2 parents 6b7b0b3 + 5bb1486 commit af02eea
Show file tree
Hide file tree
Showing 18 changed files with 228 additions and 61 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ S3method(ci,listofperf)
S3method(ci,mse)
S3method(ci,recal)
S3method(coef,metapred)
S3method(coef,mp.1st.fit)
S3method(coef,mp.cv.meta.fit)
S3method(coef,mp.perf)
S3method(coef,mp.stratified.fit)
Expand Down
84 changes: 61 additions & 23 deletions R/metapred.R
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,9 @@
#' only the first is used for model selection.
#' @param selFUN Function for selecting the best method. Default: lowest value for \code{genFUN}. Should be set to
#' "which.max" if high values for \code{genFUN} indicate a good model.
#' @param gen.of.perf For which performance measures should generalizability measures be computed? \code{"first"} (default) for
#' only the first. \code{"respective"} for matching the generalizability measure to the performance measure on the same location
#' in the list. \code{"factorial"} for applying all generalizability measures to all performance estimates.
#' @param ... To pass arguments to estFUN (e.g. family = "binomial"), or to other FUNctions.
#'
#' @return A list of class \code{metapred}, containing the final model in \code{global.model}, and the stepwise
Expand Down Expand Up @@ -211,7 +214,7 @@
metapred <- function(data, strata, formula, estFUN = "glm", scope = NULL, retest = FALSE, max.steps = 1000,
center = FALSE, recal.int = FALSE, cvFUN = NULL, cv.k = NULL, # tol = 0,
metaFUN = NULL, meta.method = NULL, predFUN = NULL, perfFUN = NULL, genFUN = NULL,
selFUN = "which.min",
selFUN = "which.min", gen.of.perf = "first",
...) {
call <- match.call()

Expand Down Expand Up @@ -259,10 +262,12 @@ metapred <- function(data, strata, formula, estFUN = "glm", scope = NULL, retest
stop("At least 1 cluster must be used for development.")

# Fitting
fit <- mp.fit(formula = formula, data = data, remaining.changes = updates, st.i = strata.i, st.u = strata.u, folds = folds,
fit <- mp.fit(formula = formula, data = data, remaining.changes = updates,
st.i = strata.i, st.u = strata.u, folds = folds,
recal.int = recal.int, retest = retest, max.steps = max.steps, tol = 0,
estFUN = estFUN, metaFUN = metaFUN, meta.method = meta.method, predFUN = predFUN, perfFUN = perfFUN,
genFUN = genFUN, selFUN = selFUN, ...)
estFUN = estFUN, metaFUN = metaFUN, meta.method = meta.method,
predFUN = predFUN, perfFUN = perfFUN,
genFUN = genFUN, selFUN = selFUN, gen.of.perf = gen.of.perf, ...)

# mp.args <- c(list(formula = formula, data = data, remaining.changes = updates, st.i = strata.i, st.u = strata.u, folds = folds,
# recal.int = recal.int, retest = retest, max.steps = max.steps, tol = 0,
Expand All @@ -279,8 +284,10 @@ metapred <- function(data, strata, formula, estFUN = "glm", scope = NULL, retest
formula.changes = getFormulaDiffAsChar(formula.final, formula),
# NOTE: formula.changes is currently unordered!
options = list(cv.k = cv.k, meta.method = meta.method, recal.int = recal.int,
center = center, max.steps = max.steps, retest = retest, two.stage = two.stage), # add: tol
FUN = list(cvFUN = cvFUN, predFUN = predFUN, perfFUN = get.function(perfFUN), metaFUN = metaFUN, genFUN = genFUN,
center = center, max.steps = max.steps, retest = retest,
two.stage = two.stage, gen.of.perf = gen.of.perf), # add: tol
FUN = list(cvFUN = cvFUN, predFUN = predFUN, perfFUN = get.functions(perfFUN),
metaFUN = metaFUN, genFUN = genFUN,
selFUN = selFUN, estFUN = estFUN, estFUN.name = estFUN.name)))
class(out) <- c("metapred")
return(out)
Expand Down Expand Up @@ -556,7 +563,7 @@ mp.fit <- function(formula, data, remaining.changes, st.i, st.u, folds, recal.in
retest = FALSE, max.steps = 3, tol = 0,
estFUN = glm, metaFUN = urma, meta.method = "DL", predFUN = NULL,
perfFUN = mse, genFUN = abs.mean, selFUN = which.min,
two.stage = TRUE, ...) {
two.stage = TRUE, gen.of.perf = "first", ...) {
out <- steps <- list()

## Step 0
Expand All @@ -566,7 +573,8 @@ mp.fit <- function(formula, data, remaining.changes, st.i, st.u, folds, recal.in
st.i = st.i, st.u = st.u, folds = folds, recal.int = recal.int,
retest = FALSE, two.stage = two.stage,
estFUN = estFUN, metaFUN = metaFUN, meta.method = meta.method, predFUN = predFUN,
perfFUN = perfFUN, genFUN = genFUN, selFUN = selFUN, ...)
perfFUN = perfFUN, genFUN = genFUN, selFUN = selFUN,
gen.of.perf = gen.of.perf, ...)
steps[[getStepName(step.count)]][["step.count"]] <- step.count
out[["best.step"]] <- getStepName(step.count)
out[["stop.reason"]] <- "no changes were requested."
Expand Down Expand Up @@ -597,7 +605,8 @@ mp.fit <- function(formula, data, remaining.changes, st.i, st.u, folds, recal.in
st.i = st.i, st.u = st.u, folds = folds, recal.int = recal.int,
retest = retest, two.stage = two.stage,
estFUN = estFUN, metaFUN = metaFUN, meta.method = meta.method, predFUN = predFUN,
perfFUN = perfFUN, genFUN = genFUN, selFUN = selFUN, ...)
perfFUN = perfFUN, genFUN = genFUN, selFUN = selFUN,
gen.of.perf = gen.of.perf, ...)
steps[[getStepName(step.count)]][["step.count"]] <- step.count
## Model selection
# This step
Expand Down Expand Up @@ -699,7 +708,8 @@ mp.step.get.change <- function(step, ...)
mp.step <- function(formula, data, remaining.changes, st.i, st.u, folds, recal.int = FALSE,
two.stage = TRUE, retest = FALSE,
estFUN = glm, metaFUN = urma, meta.method = "DL", predFUN = NULL,
perfFUN = mse, genFUN = abs.mean, selFUN = which.min, ...) {
perfFUN = mse, genFUN = abs.mean, selFUN = which.min, gen.of.perf = "first",
...) {
cv <- out <- list()
out[["start.formula"]] <- formula

Expand All @@ -721,7 +731,8 @@ mp.step <- function(formula, data, remaining.changes, st.i, st.u, folds, recal.i
cv[[name]] <- mp.cv(formula = new.formula, data = data, st.i = st.i, st.u = st.u,
folds = folds, recal.int = recal.int, two.stage = two.stage,
estFUN = estFUN, metaFUN = metaFUN, meta.method = meta.method,
predFUN = predFUN, perfFUN = perfFUN, genFUN = genFUN, change = change, ...)
predFUN = predFUN, perfFUN = perfFUN, genFUN = genFUN,
change = change, gen.of.perf = gen.of.perf, ...)
# Save changes
cv[[name]][["remaining.changes"]] <- if (retest) remaining.changes else remaining.changes[-fc]
# cv[[name]][["changed"]] <- change
Expand Down Expand Up @@ -855,12 +866,13 @@ summary.mp.global <- function(object, ...) {
# and a validated on val folds
mp.cv <- function(formula, data, st.i, st.u, folds, recal.int = FALSE, two.stage = TRUE,
estFUN = glm, metaFUN = urma, meta.method = "DL", predFUN = NULL,
perfFUN = mse, genFUN = abs.mean, change = NULL, ...) {
perfFUN = mse, genFUN = abs.mean, change = NULL, gen.of.perf = "first", ...) {
out <- mp.cv.dev(formula = formula, data = data, st.i = st.i, st.u = st.u, folds = folds, two.stage = two.stage,
estFUN = estFUN, metaFUN = metaFUN, meta.method = meta.method, change = change, ...)

out <- mp.cv.val(cv.dev = out, data = data, st.i = st.i, folds = folds, recal.int = recal.int, two.stage = two.stage,
estFUN = estFUN, predFUN = predFUN, perfFUN = perfFUN, genFUN = genFUN, ...)
estFUN = estFUN, predFUN = predFUN, perfFUN = perfFUN, genFUN = genFUN,
gen.of.perf = gen.of.perf, ...)

class(out) <- c("mp.cv", class(out))
out
Expand Down Expand Up @@ -913,7 +925,7 @@ print.mp.cv <- function(x, ...) {
# Returns object of class mp.cv.val, which is a validated mp.cv.dev
mp.cv.val <- function(cv.dev, data, st.i, folds, recal.int = FALSE, two.stage = TRUE,
estFUN = glm, predFUN = NULL, perfFUN = mse,
genFUN = abs.mean, plot = F, ...) {
genFUN = abs.mean, plot = F, gen.of.perf = "first", ...) {
dots <- list(...)
pfn <- if (is.character(perfFUN)) perfFUN else "Performance"
cv.dev[["perf.name"]] <- pfn # To be removed!??!!?
Expand Down Expand Up @@ -987,10 +999,15 @@ mp.cv.val <- function(cv.dev, data, st.i, folds, recal.int = FALSE, two.stage =
cv.dev[["perf.all"]] <- perf.all # Future compatibility
cv.dev[["perf"]] <- perf.all[[1]] # Backwards compatibility

# Generalizibility
# Generalizability
if (!is.list(genFUN))
genFUN <- list(genFUN)

if (identical(gen.of.perf, "factorial")) {
which.perf <- rep(seq_along(perfFUN), each = length(genFUN))
genFUN <- rep(genFUN, times = length(perfFUN))
}

# Names of generalizability measures
if (identical(length(names(genFUN)), length(genFUN))) {
gen.names <- names(genFUN)
Expand All @@ -1003,8 +1020,10 @@ mp.cv.val <- function(cv.dev, data, st.i, folds, recal.int = FALSE, two.stage =
gen.all <- rep(NA, length(genFUN))

for (fun.id in seq_along(genFUN)) { # Single brackets intended!
cv.dev.selection <- if (identical(gen.of.perf, "first")) 1 else
if (identical(gen.of.perf, "factorial")) which.perf[fun.id] else fun.id # add which_perf somehow
genfun <- match.fun(genFUN[[fun.id]])
args <- c(list(object = cv.dev[["perf"]],
args <- c(list(object = cv.dev[["perf.all"]][[cv.dev.selection]],
coef = coef(cv.dev[["stratified.fit"]]),
title = paste("Model change: ~", cv.dev[["changed"]]),
xlab = as.character(pfn)
Expand Down Expand Up @@ -1136,7 +1155,7 @@ fitted.mp.cv.dev <- function(object, data, folds, st.i, predFUN = NULL, two.stag
family.mp.cv.dev <- function(object, ...)
object$family

# Estimate a one-stage or non-stratified model on the develoment (!) strata.
# Estimate a one-stage or non-stratified model on the development (!) strata.
mp.1st.fit <- function(formula, data, st.i, st.u, folds, estFUN, ...) {
out <- list()

Expand Down Expand Up @@ -1206,6 +1225,13 @@ mp.cv.meta.fit <- function(stratified.fit, folds, metaFUN = urma, meta.method =
coef.mp.cv.meta.fit <- function(object, ...)
t(as.data.frame(lapply(object, `[[`, "coefficients"), check.names = FALSE))

#' @author Valentijn de Jong
#' @method coef mp.1st.fit
#' @export
coef.mp.1st.fit <- function(object, ...) {
coef.mp.cv.meta.fit(object, ...)
}

#' @author Valentijn de Jong
#' @method print mp.cv.meta.fit
#' @export
Expand All @@ -1220,6 +1246,7 @@ print.mp.cv.meta.fit <- function(x, ...) {
}
}


# Make new meta model (i.e. model fitted on multiple clusters) for ?? for metapred
# stratified.fit mp.stratified.fit
# metaFUN function for estimating meta-analytic models, e.g. urma (this file)
Expand Down Expand Up @@ -1536,6 +1563,10 @@ ci.mse <- function(object, conf = .95, ...) {
#' to \link{subset.metapred} such that the generalizability estimates of other steps/models may be
#' returned..
#'
#' @details
#' With named values or indices for parameter \code{genFUN} one or more estimates
#' of generalizability can be selected. Use \code{genFUN = 0} to select all.
#'
#' @export
gen <- function(object, ...)
UseMethod("gen")
Expand All @@ -1548,9 +1579,11 @@ gen.metapred <- function(object, genFUN = 1, ...)
gen(subset(object, ...), genFUN = genFUN, ...)

#' @export
gen.mp.cv.val <- function(object, genFUN = 1, ...)
object$gen.all[[genFUN]]

gen.mp.cv.val <- function(object, genFUN = 1, ...) {
if (is.numeric(genFUN) && genFUN == 0)
return(object$gen.all)
object$gen.all[genFUN]
}

#' Performance estimates
#'
Expand All @@ -1566,7 +1599,7 @@ gen.mp.cv.val <- function(object, genFUN = 1, ...)
#' @param object A model fit object, either a \link{metapred} or \code{subset(metapred)} object.
#' @param ... By default, the final model is selected. This parameter allows other arguments to be
#' passed to \link{subset.metapred} such that the performance estimates of other steps/models may be
#' returned..
#' returned. Use \code{perfFUN = 0} to select all.
#'
#' @export
perf <- function(object, ...)
Expand All @@ -1580,5 +1613,10 @@ perf.metapred <- function(object, perfFUN = 1, ...)
perf(subset(object, ...), perfFUN = perfFUN, ...)

#' @export
perf.mp.cv.val <- function(object, perfFUN = 1, ...)
object[["perf.all"]][[perfFUN]]
perf.mp.cv.val <- function(object, perfFUN = 1, ...) {
if (is.numeric(perfFUN) && perfFUN == 0)
return(object[["perf.all"]])
object[["perf.all"]][[perfFUN]]
}


8 changes: 1 addition & 7 deletions R/metapred_measures.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,17 +71,11 @@ calibration.intercept <- cal.int <- function(p, y, estFUN, family, ...)
bin.cal.int <- function(p, y, ...)
pred.recal(p = p, y = y, estFUN = "glm", family = binomial, which = "intercept")

# Slope.only is a trick to make this functin work for metapred.
# Slope.only is a trick to make this function work for metapred.
# Slope.only should otherwise always be false! Also: this messes up the variances,
# making meta-analysis impossible!
# multiplicative slope!
calibration.slope <- cal.slope <- function(p, y, estFUN, family, slope.only = TRUE, ...) {
# refit <- pred.recal(p = p, y = y, estFUN = estFUN, family = family, which = "slope")
# if (slope.only) {
# refit[[1]] <- refit[[1]][[2]]
# }
# refit

refit <- pred.recal(p = p, y = y, estFUN = estFUN, family = family, which = "slope")
if (slope.only) {
refit$estimate <- refit[[1]] <- refit[[1]][2]
Expand Down
15 changes: 12 additions & 3 deletions R/metapred_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -209,10 +209,10 @@ getPredictMethod <- function(fit, two.stage = TRUE, predFUN = NULL, ...) {
# f formula used for selecting relevant variables from newdata. Overrides object
# ... For compatibility only.
# Returns vector of predicted values.
predictGLM <- function(object, newdata, b = NULL, f = NULL, type = "response", ...) {
predictGLM <- function(object, newdata, b = NULL, f = NULL, type = "response", X = NULL, ...) {
if (is.null(b)) b <- coef(object)
if (is.null(f)) f <- formula(object)
X <- model.matrix(f2rhsf(stats::as.formula(f)), data = newdata)
if (is.null(X)) X <- model.matrix(f2rhsf(stats::as.formula(f)), data = newdata)

lp <- X %*% b

Expand All @@ -225,8 +225,12 @@ predictGLM <- function(object, newdata, b = NULL, f = NULL, type = "response", .
return(lp)
}

predictglmer <- function(object, newdata, b = NULL, f = NULL, type = "response", ...)
predictglmer <- function(object, newdata, b = NULL, f = NULL, type = "response", ...) {
if (is.null(f)) f <- formula(object)

f <- lme4::nobars(f)
predictGLM(object = object, newdata = newdata, b = b, f = f, type = type, ...)
}

# Prediction function for logistf from the logisf package
# Args same as those of predictGLM()
Expand Down Expand Up @@ -449,6 +453,11 @@ get.function <- function(x, ...) {
return(get(as.character(x), mode = "function"))
}

# x list of functions or list of names of functions, or a combination of both
get.functions <- function(x, ...) {
lapply(x, get.function, ...)
}

# Convert factor to binary
# In case the factor has more than 2 levels, by default the same occurs as in
# glm: the first level is assumed to be the failure, and all others successes.
Expand Down
4 changes: 2 additions & 2 deletions R/plot_utils.r
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ forest.default <- function(theta,


# Add confidence interval of the summary estimate
p <- p + with(g2, geom_errorbar(data = g2, aes(ymin = ci.lower, ymax = ci.upper, x = order), width = 0.5, size=1.0))
p <- p + with(g2, geom_errorbar(data = g2, aes(ymin = ci.lower, ymax = ci.upper, x = order), width = 0.5, linewidth=1.0))

# Add summary estimate
p <- p + with(g2, geom_point(data = g2, aes(x = order, y = mean), shape=23, size=3, fill = col.diamond))
Expand All @@ -237,7 +237,7 @@ forest.default <- function(theta,

p <- p + with(g3, geom_errorbar(data = g3,
aes(ymin = pi.lower, ymax = pi.upper, x = order),
size = size.predint,
linewidth = size.predint,
width = 0.5,
color = col.predint,
linetype = predint.linetype))
Expand Down
6 changes: 0 additions & 6 deletions man/Tzoulaki.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,10 @@ Tzoulaki et al. (2009) reviewed studies that evaluated various candidate prognos
\item{\code{sign.AUCdiff}}{a boolean vector indicating whether the difference between \code{AUC.orig} and \code{AUC.modif} is below 0.05}
}
}
\details{
%% ~~ If necessary, more details than the __description__ above ~~
}
\source{
%% ~~ reference to a publication or URL from which the data were obtained ~~
Tzoulaki I, Liberopoulos G, Ioannidis JPA. Assessment of claims of improved prediction beyond the Framingham risk score. \emph{JAMA}. 2009 Dec 2;302(21):234552.

}
\references{
%% ~~ possibly secondary sources and usages ~~
}
\examples{
data(Tzoulaki)
Expand Down
4 changes: 4 additions & 0 deletions man/gen.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions man/metapred.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/perf.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file modified tests/testthat/Rplots.pdf
Binary file not shown.
Loading

0 comments on commit af02eea

Please sign in to comment.