Merge pull request #4 from smartdata-analysis-and-statistics/Valentijn

1. Fixed bug and added functionality to metapred and fixed tests. 2. Added tests for multiple generalizability and performance functions in metapred. This should prevent the fixed bug from reoccuring. 3. Fixed test that raised an error when plotting valmeta. Note that the statistical issue remains, just the test is fixed. 4. Split tests for ccalc and oecalc into three. Added tolerance level, as the estimates are not expected to be identical
smartdata-analysis-and-statistics · Feb 19, 2024 · af02eea · af02eea
2 parents 6b7b0b3 + 5bb1486
commit af02eea
Show file tree

Hide file tree

Showing 18 changed files with 228 additions and 61 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -7,6 +7,7 @@ S3method(ci,listofperf)
 S3method(ci,mse)
 S3method(ci,recal)
 S3method(coef,metapred)
+S3method(coef,mp.1st.fit)
 S3method(coef,mp.cv.meta.fit)
 S3method(coef,mp.perf)
 S3method(coef,mp.stratified.fit)

diff --git a/R/metapred.R b/R/metapred.R
@@ -149,6 +149,9 @@
 #' only the first is used for model selection.
 #' @param selFUN Function for selecting the best method. Default: lowest value for \code{genFUN}. Should be set to
 #' "which.max" if high values for \code{genFUN} indicate a good model.
+#' @param gen.of.perf For which performance measures should generalizability measures be computed? \code{"first"} (default) for 
+#' only the first. \code{"respective"} for matching the generalizability measure to the performance measure on the same location
+#' in the list. \code{"factorial"} for applying all generalizability measures to all performance estimates.
 #' @param ... To pass arguments to estFUN (e.g. family = "binomial"), or to other FUNctions.
 #'
 #' @return A list of class \code{metapred}, containing the final model in \code{global.model}, and the stepwise
@@ -211,7 +214,7 @@
 metapred <- function(data, strata, formula, estFUN = "glm", scope = NULL, retest = FALSE, max.steps = 1000, 
                      center = FALSE, recal.int = FALSE, cvFUN = NULL, cv.k = NULL,  # tol = 0,
                      metaFUN = NULL, meta.method = NULL, predFUN = NULL, perfFUN = NULL, genFUN = NULL,
-                     selFUN = "which.min",
+                     selFUN = "which.min", gen.of.perf = "first",
                      ...) {
   call <- match.call()
 
@@ -259,10 +262,12 @@ metapred <- function(data, strata, formula, estFUN = "glm", scope = NULL, retest
     stop("At least 1 cluster must be used for development.")
 
   # Fitting
-  fit <- mp.fit(formula = formula, data = data, remaining.changes = updates, st.i = strata.i, st.u = strata.u, folds = folds,
+  fit <- mp.fit(formula = formula, data = data, remaining.changes = updates, 
+                st.i = strata.i, st.u = strata.u, folds = folds,
                 recal.int = recal.int, retest = retest, max.steps = max.steps, tol = 0,
-                estFUN = estFUN, metaFUN = metaFUN, meta.method = meta.method, predFUN = predFUN, perfFUN = perfFUN,
-                genFUN = genFUN, selFUN = selFUN, ...)
+                estFUN = estFUN, metaFUN = metaFUN, meta.method = meta.method, 
+                predFUN = predFUN, perfFUN = perfFUN,
+                genFUN = genFUN, selFUN = selFUN, gen.of.perf = gen.of.perf, ...)
 
   # mp.args <- c(list(formula = formula, data = data, remaining.changes = updates, st.i = strata.i, st.u = strata.u, folds = folds,
   #                   recal.int = recal.int, retest = retest, max.steps = max.steps, tol = 0, 
@@ -279,8 +284,10 @@ metapred <- function(data, strata, formula, estFUN = "glm", scope = NULL, retest
                      formula.changes = getFormulaDiffAsChar(formula.final, formula), 
                      # NOTE: formula.changes is currently unordered!
                      options = list(cv.k = cv.k, meta.method = meta.method, recal.int = recal.int,
-                                    center = center, max.steps = max.steps, retest = retest, two.stage = two.stage), # add: tol
-                     FUN = list(cvFUN = cvFUN, predFUN = predFUN, perfFUN = get.function(perfFUN), metaFUN = metaFUN, genFUN = genFUN, 
+                                    center = center, max.steps = max.steps, retest = retest, 
+                                    two.stage = two.stage, gen.of.perf = gen.of.perf), # add: tol
+                     FUN = list(cvFUN = cvFUN, predFUN = predFUN, perfFUN = get.functions(perfFUN), 
+                                metaFUN = metaFUN, genFUN = genFUN, 
                                 selFUN = selFUN, estFUN = estFUN, estFUN.name = estFUN.name)))
   class(out) <- c("metapred")
   return(out)
@@ -556,7 +563,7 @@ mp.fit <- function(formula, data, remaining.changes, st.i, st.u, folds, recal.in
                    retest = FALSE, max.steps = 3, tol = 0,
                    estFUN = glm, metaFUN = urma, meta.method = "DL", predFUN = NULL, 
                    perfFUN = mse, genFUN = abs.mean, selFUN = which.min,
-                   two.stage = TRUE, ...) {
+                   two.stage = TRUE, gen.of.perf = "first", ...) {
   out <- steps <- list()
 
   ## Step 0
@@ -566,7 +573,8 @@ mp.fit <- function(formula, data, remaining.changes, st.i, st.u, folds, recal.in
                                               st.i = st.i, st.u = st.u, folds = folds, recal.int = recal.int, 
                                               retest = FALSE, two.stage = two.stage,
                                               estFUN = estFUN, metaFUN = metaFUN, meta.method = meta.method, predFUN = predFUN, 
-                                              perfFUN = perfFUN, genFUN = genFUN, selFUN = selFUN, ...)
+                                              perfFUN = perfFUN, genFUN = genFUN, selFUN = selFUN, 
+                                              gen.of.perf = gen.of.perf, ...)
   steps[[getStepName(step.count)]][["step.count"]] <- step.count
   out[["best.step"]] <- getStepName(step.count)
   out[["stop.reason"]] <- "no changes were requested."
@@ -597,7 +605,8 @@ mp.fit <- function(formula, data, remaining.changes, st.i, st.u, folds, recal.in
                                                   st.i = st.i, st.u = st.u, folds = folds, recal.int = recal.int,
                                                   retest = retest, two.stage = two.stage,
                                                   estFUN = estFUN, metaFUN = metaFUN, meta.method = meta.method, predFUN = predFUN,
-                                                  perfFUN = perfFUN, genFUN = genFUN, selFUN = selFUN, ...)
+                                                  perfFUN = perfFUN, genFUN = genFUN, selFUN = selFUN, 
+                                                  gen.of.perf = gen.of.perf, ...)
       steps[[getStepName(step.count)]][["step.count"]] <- step.count
       ## Model selection
       # This step
@@ -699,7 +708,8 @@ mp.step.get.change <- function(step, ...)
 mp.step <- function(formula, data, remaining.changes, st.i, st.u, folds, recal.int = FALSE, 
                     two.stage = TRUE, retest = FALSE,
                     estFUN = glm, metaFUN = urma, meta.method = "DL", predFUN = NULL, 
-                    perfFUN = mse, genFUN = abs.mean, selFUN = which.min, ...) {
+                    perfFUN = mse, genFUN = abs.mean, selFUN = which.min, gen.of.perf = "first", 
+                    ...) {
   cv <- out <- list()
   out[["start.formula"]] <- formula
 
@@ -721,7 +731,8 @@ mp.step <- function(formula, data, remaining.changes, st.i, st.u, folds, recal.i
     cv[[name]] <- mp.cv(formula = new.formula, data = data, st.i = st.i, st.u = st.u,
                         folds = folds, recal.int = recal.int, two.stage = two.stage,
                         estFUN = estFUN, metaFUN = metaFUN, meta.method = meta.method,
-                        predFUN = predFUN, perfFUN = perfFUN, genFUN = genFUN, change = change, ...) 
+                        predFUN = predFUN, perfFUN = perfFUN, genFUN = genFUN, 
+                        change = change, gen.of.perf = gen.of.perf, ...) 
     # Save changes
     cv[[name]][["remaining.changes"]] <- if (retest) remaining.changes else remaining.changes[-fc]
     # cv[[name]][["changed"]] <- change
@@ -855,12 +866,13 @@ summary.mp.global <- function(object, ...) {
 # and a validated on val folds 
 mp.cv <- function(formula, data, st.i, st.u, folds, recal.int = FALSE, two.stage = TRUE,
                   estFUN = glm, metaFUN = urma, meta.method = "DL", predFUN = NULL, 
-                  perfFUN = mse, genFUN = abs.mean, change = NULL, ...) {
+                  perfFUN = mse, genFUN = abs.mean, change = NULL, gen.of.perf = "first", ...) {
   out <- mp.cv.dev(formula = formula, data = data, st.i = st.i, st.u = st.u, folds = folds, two.stage = two.stage,
                    estFUN = estFUN, metaFUN = metaFUN, meta.method = meta.method, change = change, ...)
 
   out <- mp.cv.val(cv.dev = out, data = data, st.i = st.i, folds = folds, recal.int = recal.int, two.stage = two.stage,
-                   estFUN = estFUN, predFUN = predFUN, perfFUN = perfFUN, genFUN = genFUN, ...)
+                   estFUN = estFUN, predFUN = predFUN, perfFUN = perfFUN, genFUN = genFUN, 
+                   gen.of.perf = gen.of.perf, ...)
 
   class(out) <- c("mp.cv", class(out))
   out
@@ -913,7 +925,7 @@ print.mp.cv <- function(x, ...) {
 # Returns object of class mp.cv.val, which is a validated mp.cv.dev
 mp.cv.val <- function(cv.dev, data, st.i, folds, recal.int = FALSE, two.stage = TRUE, 
                       estFUN = glm, predFUN = NULL, perfFUN = mse, 
-                      genFUN = abs.mean, plot = F, ...) {
+                      genFUN = abs.mean, plot = F, gen.of.perf = "first", ...) {
   dots <- list(...)
   pfn <- if (is.character(perfFUN)) perfFUN else "Performance"
   cv.dev[["perf.name"]] <- pfn # To be removed!??!!?
@@ -987,10 +999,15 @@ mp.cv.val <- function(cv.dev, data, st.i, folds, recal.int = FALSE, two.stage =
   cv.dev[["perf.all"]] <- perf.all   # Future compatibility  
   cv.dev[["perf"]] <- perf.all[[1]]  # Backwards compatibility
 
-  # Generalizibility
+  # Generalizability
   if (!is.list(genFUN)) 
     genFUN <- list(genFUN)
 
+  if (identical(gen.of.perf, "factorial")) {
+    which.perf <- rep(seq_along(perfFUN), each = length(genFUN))
+    genFUN <- rep(genFUN, times = length(perfFUN))
+  }
+
   # Names of generalizability measures
   if (identical(length(names(genFUN)), length(genFUN))) {
     gen.names <- names(genFUN)
@@ -1003,8 +1020,10 @@ mp.cv.val <- function(cv.dev, data, st.i, folds, recal.int = FALSE, two.stage =
   gen.all <- rep(NA, length(genFUN))
 
   for (fun.id in seq_along(genFUN)) { # Single brackets intended!
+    cv.dev.selection <- if (identical(gen.of.perf, "first")) 1 else 
+      if (identical(gen.of.perf, "factorial")) which.perf[fun.id] else fun.id # add which_perf somehow 
     genfun <- match.fun(genFUN[[fun.id]])
-    args <- c(list(object = cv.dev[["perf"]],
+    args <- c(list(object = cv.dev[["perf.all"]][[cv.dev.selection]],
                    coef = coef(cv.dev[["stratified.fit"]]),
                    title = paste("Model change: ~", cv.dev[["changed"]]),
                    xlab = as.character(pfn)
@@ -1136,7 +1155,7 @@ fitted.mp.cv.dev <- function(object, data, folds, st.i, predFUN = NULL, two.stag
 family.mp.cv.dev <- function(object, ...)
   object$family
 
-# Estimate a one-stage or non-stratified model on the develoment (!) strata.
+# Estimate a one-stage or non-stratified model on the development (!) strata.
 mp.1st.fit <- function(formula, data, st.i, st.u, folds, estFUN, ...) {
   out <- list()
 
@@ -1206,6 +1225,13 @@ mp.cv.meta.fit <- function(stratified.fit, folds, metaFUN = urma, meta.method =
 coef.mp.cv.meta.fit <- function(object, ...) 
   t(as.data.frame(lapply(object, `[[`, "coefficients"), check.names = FALSE))
 
+#' @author Valentijn de Jong
+#' @method coef   mp.1st.fit
+#' @export
+coef.mp.1st.fit <- function(object, ...) {
+  coef.mp.cv.meta.fit(object, ...)
+}
+
 #' @author Valentijn de Jong
 #' @method print   mp.cv.meta.fit
 #' @export
@@ -1220,6 +1246,7 @@ print.mp.cv.meta.fit <- function(x, ...) {
   }
 }
 
+
 # Make new meta model (i.e. model fitted on multiple clusters) for ?? for metapred
 # stratified.fit mp.stratified.fit
 # metaFUN function for estimating meta-analytic models, e.g. urma (this file)
@@ -1536,6 +1563,10 @@ ci.mse <- function(object, conf = .95, ...) {
 #' to \link{subset.metapred} such that the generalizability estimates of other steps/models may be
 #' returned.. 
 #' 
+#' @details
+#' With named values or indices for parameter \code{genFUN} one or more estimates 
+#' of generalizability can be selected. Use \code{genFUN = 0} to select all.
+#' 
 #' @export
 gen <- function(object, ...) 
   UseMethod("gen")
@@ -1548,9 +1579,11 @@ gen.metapred <- function(object, genFUN = 1, ...)
   gen(subset(object, ...), genFUN = genFUN, ...)
 
 #' @export
-gen.mp.cv.val <- function(object, genFUN = 1, ...)
-  object$gen.all[[genFUN]]
-
+gen.mp.cv.val <- function(object, genFUN = 1, ...) {
+  if (is.numeric(genFUN) && genFUN == 0)
+    return(object$gen.all)
+  object$gen.all[genFUN]
+}
 
 #' Performance estimates
 #' 
@@ -1566,7 +1599,7 @@ gen.mp.cv.val <- function(object, genFUN = 1, ...)
 #' @param object A model fit object, either a \link{metapred} or \code{subset(metapred)} object.
 #' @param ... By default, the final model is selected. This parameter allows other arguments to be
 #' passed to \link{subset.metapred} such that the performance estimates of other steps/models may be
-#' returned.. 
+#' returned. Use \code{perfFUN = 0} to select all.
 #' 
 #' @export
 perf <- function(object, ...) 
@@ -1580,5 +1613,10 @@ perf.metapred <- function(object, perfFUN = 1, ...)
   perf(subset(object, ...), perfFUN = perfFUN, ...)
 
 #' @export
-perf.mp.cv.val <- function(object, perfFUN = 1, ...)
-  object[["perf.all"]][[perfFUN]]
+perf.mp.cv.val <- function(object, perfFUN = 1, ...) {
+  if (is.numeric(perfFUN) && perfFUN == 0) 
+    return(object[["perf.all"]])
+    object[["perf.all"]][[perfFUN]]
+}
+
+
diff --git a/R/metapred_measures.R b/R/metapred_measures.R
@@ -71,17 +71,11 @@ calibration.intercept <- cal.int <- function(p, y, estFUN, family, ...)
 bin.cal.int <- function(p, y, ...)
   pred.recal(p = p, y = y, estFUN = "glm", family = binomial, which = "intercept")
 
-# Slope.only is a trick to make this functin work for metapred.
+# Slope.only is a trick to make this function work for metapred.
 # Slope.only should otherwise always be false! Also: this messes up the variances,
 # making meta-analysis impossible!
 # multiplicative slope!
 calibration.slope <- cal.slope <- function(p, y, estFUN, family, slope.only = TRUE, ...) {
-  # refit <- pred.recal(p = p, y = y, estFUN = estFUN, family = family, which = "slope")
-  # if (slope.only) {
-  #   refit[[1]] <- refit[[1]][[2]]
-  # }
-  # refit
-
   refit <- pred.recal(p = p, y = y, estFUN = estFUN, family = family, which = "slope")
   if (slope.only) {
     refit$estimate <- refit[[1]] <- refit[[1]][2]

diff --git a/R/metapred_utils.R b/R/metapred_utils.R
@@ -209,10 +209,10 @@ getPredictMethod <- function(fit, two.stage = TRUE, predFUN = NULL, ...) {
 # f formula used for selecting relevant variables from newdata. Overrides object
 # ... For compatibility only.
 # Returns vector of predicted values.
-predictGLM <- function(object, newdata, b = NULL, f = NULL, type = "response", ...) {
+predictGLM <- function(object, newdata, b = NULL, f = NULL, type = "response", X = NULL, ...) {
   if (is.null(b)) b <- coef(object)
   if (is.null(f)) f <- formula(object)
-  X <- model.matrix(f2rhsf(stats::as.formula(f)), data = newdata)
+  if (is.null(X)) X <- model.matrix(f2rhsf(stats::as.formula(f)), data = newdata)
 
   lp <- X %*% b
 
@@ -225,8 +225,12 @@ predictGLM <- function(object, newdata, b = NULL, f = NULL, type = "response", .
     return(lp)
 }
 
-predictglmer <- function(object, newdata, b = NULL, f = NULL, type = "response", ...)
+predictglmer <- function(object, newdata, b = NULL, f = NULL, type = "response", ...) {
+  if (is.null(f)) f <- formula(object)
+
+  f <- lme4::nobars(f)
   predictGLM(object = object, newdata = newdata, b = b, f = f, type = type, ...)
+}
 
 # Prediction function for logistf from the logisf package
 # Args same as those of predictGLM()
@@ -449,6 +453,11 @@ get.function <- function(x, ...) {
     return(get(as.character(x), mode = "function"))
 }
 
+# x list of functions or list of names of functions, or a combination of both
+get.functions <- function(x, ...) {
+  lapply(x, get.function, ...)
+}
+
 # Convert factor to binary
 # In case the factor has more than 2 levels, by default the same occurs as in 
 # glm: the first level is assumed to be the failure, and all others successes.

diff --git a/R/plot_utils.r b/R/plot_utils.r
@@ -219,7 +219,7 @@ forest.default <- function(theta,
 
 
     # Add confidence interval of the summary estimate
-    p <- p + with(g2, geom_errorbar(data = g2, aes(ymin = ci.lower, ymax = ci.upper, x = order), width = 0.5, size=1.0))
+    p <- p + with(g2, geom_errorbar(data = g2, aes(ymin = ci.lower, ymax = ci.upper, x = order), width = 0.5, linewidth=1.0))
 
     # Add summary estimate
     p <- p + with(g2, geom_point(data = g2, aes(x = order, y = mean), shape=23, size=3, fill = col.diamond))
@@ -237,7 +237,7 @@ forest.default <- function(theta,
 
       p <- p + with(g3, geom_errorbar(data = g3, 
                              aes(ymin = pi.lower, ymax = pi.upper, x = order), 
-                             size = size.predint, 
+                             linewidth = size.predint, 
                              width = 0.5,
                              color = col.predint,
                              linetype = predint.linetype))

diff --git a/man/Tzoulaki.Rd b/man/Tzoulaki.Rd
@@ -30,16 +30,10 @@ Tzoulaki et al. (2009) reviewed studies that evaluated various candidate prognos
     \item{\code{sign.AUCdiff}}{a boolean vector indicating whether the difference between \code{AUC.orig} and \code{AUC.modif} is below 0.05}
   }
 }
-\details{
-%%  ~~ If necessary, more details than the __description__ above ~~
-}
 \source{
 %%  ~~ reference to a publication or URL from which the data were obtained ~~
 Tzoulaki I, Liberopoulos G, Ioannidis JPA. Assessment of claims of improved prediction beyond the Framingham risk score.  \emph{JAMA}. 2009 Dec 2;302(21):2345–52. 
 
-}
-\references{
-%%  ~~ possibly secondary sources and usages ~~
 }
 \examples{
 data(Tzoulaki)

diff --git a/man/gen.Rd b/man/gen.Rd
diff --git a/man/metapred.Rd b/man/metapred.Rd
diff --git a/man/perf.Rd b/man/perf.Rd
diff --git a/tests/testthat/Rplots.pdf b/tests/testthat/Rplots.pdf