From 23332edd876415ca9bbff6402e5f99c7cc661aea Mon Sep 17 00:00:00 2001
From: Matteo Delucchi <mail@matteodelucchi.com>
Date: Sun, 30 Jun 2024 09:40:15 +0200
Subject: [PATCH] first implementation of proposed improved cache structure in
 #58. WIP, still errors.

---
 R/build_score_cache_mle.R | 597 +++++++++++++++++++-------------------
 1 file changed, 292 insertions(+), 305 deletions(-)

diff --git a/R/build_score_cache_mle.R b/R/build_score_cache_mle.R
index 930116cd..0942dbac 100755
--- a/R/build_score_cache_mle.R
+++ b/R/build_score_cache_mle.R
@@ -27,36 +27,36 @@ forLoopContent <-
       if (verbose) {
         message("we have grouping (mixed-effects model) glmm")
       }
-    ###
-    # Build arguments for glmm.bic
-    ###
-    # current node's name
-    child <- mycache[["children"]][row.num] # child as integer
-    child.name <- colnames(mycache[["node.defn"]])[child]
-    # current node's parents names
-    parents.names <- names(which(mycache[["node.defn"]][row.num,] != 0))
-    # current node's distribution
-    child.dist <- data.dists[child] #       distribution <- data.dists[child]
-    # data.df including group.var=group.ids
-    data.df.grouping <- data.frame(data.df, grp = factor(group.ids))
-    names(data.df.grouping) <- c(names(data.df), group.var)
-
-    ###
-    # Beginn of GLMM
-    ###
-    # Build formula statement
-    if (length(parents.names) == 0){
-      # no parent only random effect
-      model <- as.formula(paste(child.name, "~ 1 + (1|", group.var, ")"))
-    } else if (length(parents.names) > 0){
-      model <- as.formula(paste(child.name, "~ (1|", group.var, ")+", paste(parents.names, collapse = "+")))
-    } else {
-      stop("Cannot build model formula. Unknown predictor names of parent nodes in 'parents.name'.")
-    }
+      ###
+      # Build arguments for glmm.bic
+      ###
+      # current node's name
+      child <- mycache[["children"]][row.num] # child as integer
+      child.name <- colnames(mycache[["node.defn"]])[child]
+      # current node's parents names
+      parents.names <- names(which(mycache[["node.defn"]][row.num,] != 0))
+      # current node's distribution
+      child.dist <- data.dists[child] #       distribution <- data.dists[child]
+      # data.df including group.var=group.ids
+      data.df.grouping <- data.frame(data.df, grp = factor(group.ids))
+      names(data.df.grouping) <- c(names(data.df), group.var)
+
+      ###
+      # Beginn of GLMM
+      ###
+      # Build formula statement
+      if (length(parents.names) == 0){
+        # no parent only random effect
+        model <- as.formula(paste(child.name, "~ 1 + (1|", group.var, ")"))
+      } else if (length(parents.names) > 0){
+        model <- as.formula(paste(child.name, "~ (1|", group.var, ")+", paste(parents.names, collapse = "+")))
+      } else {
+        stop("Cannot build model formula. Unknown predictor names of parent nodes in 'parents.name'.")
+      }
 
-    # Main part: Depending on child's distribution, call the appropriate modelling function
-    switch (as.character(child.dist),
-            gaussian = {
+      # Main part: Depending on child's distribution, call the appropriate modelling function
+      switch (as.character(child.dist),
+              gaussian = {
                 tryCatch({
                   fit <- lme4::lmer(model, data = data.df.grouping)
                 }, error=function(e)NULL)
@@ -75,8 +75,8 @@ forLoopContent <-
                 }
 
                 # if fit is still NULL, try fixed effect only
-            },
-            binomial = {
+              },
+              binomial = {
                 tryCatch({
                   fit <- lme4::glmer(model, data = data.df.grouping, family = "binomial")
                 }, error=function(e)NULL)
@@ -96,8 +96,8 @@ forLoopContent <-
                 }
 
                 # if fit is still NULL, do not modify model further as this would change the structure of the dag but return very low score (further down)
-            },
-            poisson = {
+              },
+              poisson = {
                 tryCatch({
                   fit <- lme4::glmer(model, data = data.df.grouping, family = "poisson")
                 }, error=function(e)NULL)
@@ -117,54 +117,54 @@ forLoopContent <-
                 }
 
                 # if fit is still NULL, do not modify model further as this would change the structure of the dag but return very low score (further down)
-            },
-            multinomial = {
-              if (length(parents.names) == 0){
-                model_basic <- as.formula(paste(child.name, "~ 1"))
-                model_random <- as.formula(paste("~ 1|", group.var, sep = ""))
-              } else {
-                model_basic <- as.formula(paste(child.name, "~ ", paste(parents.names, collapse = "+")))
-                model_random <- as.formula(paste("~ 1|", group.var, sep = ""))
-              }
-              if(verbose) message(paste("using mblogit with fixed term:", deparse1(model_basic), "and random term:", deparse1(model_random))) else NA
-
-              if (control[["catcov.mblogit"]] == "free"){
-                tryCatch({
-                  fit <- mclogit::mblogit(formula = model_basic, random = model_random, data = data.df.grouping, catCov = "free")
-                }, error=function(e) NULL)
-              } else if (control[["catcov.mblogit"]] == "diagonal"){
-                tryCatch({
-                  fit <- mclogit::mblogit(formula = model_basic, random = model_random, data = data.df.grouping, catCov = "diagonal")
+              },
+              multinomial = {
+                if (length(parents.names) == 0){
+                  model_basic <- as.formula(paste(child.name, "~ 1"))
+                  model_random <- as.formula(paste("~ 1|", group.var, sep = ""))
+                } else {
+                  model_basic <- as.formula(paste(child.name, "~ ", paste(parents.names, collapse = "+")))
+                  model_random <- as.formula(paste("~ 1|", group.var, sep = ""))
+                }
+                if(verbose) message(paste("using mblogit with fixed term:", deparse1(model_basic), "and random term:", deparse1(model_random))) else NA
+
+                if (control[["catcov.mblogit"]] == "free"){
+                  tryCatch({
+                    fit <- mclogit::mblogit(formula = model_basic, random = model_random, data = data.df.grouping, catCov = "free")
+                  }, error=function(e) NULL)
+                } else if (control[["catcov.mblogit"]] == "diagonal"){
+                  tryCatch({
+                    fit <- mclogit::mblogit(formula = model_basic, random = model_random, data = data.df.grouping, catCov = "diagonal")
+                    # manipulate VarCov to bring in correct shape
+                    fit_vcov <- matrix(0, nrow = length(fit$VarCov), ncol = length(fit$VarCov))
+                    diag(fit_vcov) <- unlist(fit$VarCov, use.names = FALSE)
+                    # col and rownames
+                    cn <- c()
+                    rn <- c()
+                    for(i in 1:length(fit$VarCov)){
+                      cn[i] <- colnames(fit$VarCov[[i]])
+                      rn[i] <- rownames(fit$VarCov[[i]])
+                    }
+                    colnames(fit_vcov) <- cn
+                    rownames(fit_vcov) <- rn
+                    # replace
+                    fit_vcov <- list(fit_vcov)
+                    names(fit_vcov) <- group.var
+                    fit$VarCov <- fit_vcov
+                  }, error=function(e) NULL)
+                } else if (control[["catcov.mblogit"]] == "single"){
+                  stop("'catcov.mblogit' == 'single' is not yet implemented.")
+                  fit <- mclogit::mblogit(formula = model_basic, random = model_random, data = data.df.grouping, catCov = "single")
                   # manipulate VarCov to bring in correct shape
-                  fit_vcov <- matrix(0, nrow = length(fit$VarCov), ncol = length(fit$VarCov))
-                  diag(fit_vcov) <- unlist(fit$VarCov, use.names = FALSE)
-                  # col and rownames
-                  cn <- c()
-                  rn <- c()
-                  for(i in 1:length(fit$VarCov)){
-                    cn[i] <- colnames(fit$VarCov[[i]])
-                    rn[i] <- rownames(fit$VarCov[[i]])
-                  }
-                  colnames(fit_vcov) <- cn
-                  rownames(fit_vcov) <- rn
-                  # replace
-                  fit_vcov <- list(fit_vcov)
-                  names(fit_vcov) <- group.var
-                  fit$VarCov <- fit_vcov
-                }, error=function(e) NULL)
-              } else if (control[["catcov.mblogit"]] == "single"){
-                stop("'catcov.mblogit' == 'single' is not yet implemented.")
-                fit <- mclogit::mblogit(formula = model_basic, random = model_random, data = data.df.grouping, catCov = "single")
-                # manipulate VarCov to bring in correct shape
-              } else {
-                stop("invalid 'catcov.mblogit' argument. Must be one of 'free', 'diagonal' or 'single'.")
-              }
+                } else {
+                  stop("invalid 'catcov.mblogit' argument. Must be one of 'free', 'diagonal' or 'single'.")
+                }
 
                 if (is.null(fit)){
                   # relax tolerances for change in parameter values and objective function.
                   # tryCatch({fit <- mclogit::mblogit(formula = model_basic, random = model_random, data = data.df.grouping, catCov = control[["catcov.mblogit"]]
-                                                    # control = mclogit::mclogit.control(epsilon = control[["epsilon"]],
-                                                    #                                    trace = control[["trace.mblogit"]]))
+                  # control = mclogit::mclogit.control(epsilon = control[["epsilon"]],
+                  #                                    trace = control[["trace.mblogit"]]))
                   # }, error=function(e)NULL)
                   if (control[["catcov.mblogit"]] == "free"){
                     tryCatch({
@@ -205,109 +205,109 @@ forLoopContent <-
                   }
                 }
                 # if fit is still NULL, do not modify model further as this would change the structure of the dag but return very low score (further down)
-            }
-    )
-    ###
-    # End of GLMM
-    ###
-    # collect values to return
-    if(!is.null(fit)){
-      fit_loglik <- logLik(fit)
-      fit_aic <- AIC(fit)
-      fit_bic <- BIC(fit)
-      # fit_mdl <- fit_bic + (1 + sum(mycache[["node.defn.multi"]][row.num, ]) - num.na) * log(ncol(data.df.grouping)) # num.na: number of "unused" variables determinded in removing columns if rank deficient
-      fit_mdl <- fit_bic + (attributes(logLik(fit))$df-1) * log(ncol(data.df.grouping)) # logLik returns object of degrees of freedom which corresponds to the models parameters +1
-
-      c(fit_loglik, fit_aic, fit_bic, fit_mdl)
-    } else if(is.null(fit)){
-      # no convergence, singularity, rank-deficiency, return very low score
-      c(rep(-Inf, 4))
-    } else {
-      stop("Unknown state of fit. I should never end up here.")
-    }
-  } else if (is.null(group.var)) {
-    # we have no grouping (do what was always done).
-    child <- mycache[["children"]][row.num]
-    distribution <- data.dists[child]
-    Y <- data.matrix(data.df[, child])
-
-    if (is.null(adj.vars)) {
-      if ("multinomial" %in% data.dists[as.logical(mycache$node.defn[row.num, ])]) {
-        X <- data.matrix(cbind(data.df.multi[, as.logical(mycache[["node.defn.multi"]][row.num, ])]))
+              }
+      )
+      ###
+      # End of GLMM
+      ###
+      # collect values to return
+      if(!is.null(fit)){
+        fit_loglik <- logLik(fit)
+        fit_aic <- AIC(fit)
+        fit_bic <- BIC(fit)
+        # fit_mdl <- fit_bic + (1 + sum(mycache[["node.defn.multi"]][row.num, ]) - num.na) * log(ncol(data.df.grouping)) # num.na: number of "unused" variables determinded in removing columns if rank deficient
+        fit_mdl <- fit_bic + (attributes(logLik(fit))$df-1) * log(ncol(data.df.grouping)) # logLik returns object of degrees of freedom which corresponds to the models parameters +1
+
+        c(fit_loglik, fit_aic, fit_bic, fit_mdl)
+      } else if(is.null(fit)){
+        # no convergence, singularity, rank-deficiency, return very low score
+        c(rep(-Inf, 4))
       } else {
-        X <- data.matrix(cbind(rep(1, length(data.df[, 1])), data.df.multi[, as.logical(mycache[["node.defn.multi"]][row.num, ])]))
+        stop("Unknown state of fit. I should never end up here.")
       }
-    } else {
-      if ("multinomial" %in% data.dists[as.logical(mycache$node.defn.adj[row.num, ])]) {
-        X <- data.matrix(cbind(data.df.multi[, as.logical(mycache[["node.defn.multi"]][row.num, ])]))
+    } else if (is.null(group.var)) {
+      # we have no grouping (do what was always done).
+      child <- mycache[["children"]][row.num]
+      distribution <- data.dists[child]
+      Y <- data.matrix(data.df[, child])
+
+      if (is.null(adj.vars)) {
+        if ("multinomial" %in% data.dists[as.logical(mycache$node.defn[row.num, ])]) {
+          X <- data.matrix(cbind(data.df.multi[, as.logical(mycache[["node.defn.multi"]][row.num, ])]))
+        } else {
+          X <- data.matrix(cbind(rep(1, length(data.df[, 1])), data.df.multi[, as.logical(mycache[["node.defn.multi"]][row.num, ])]))
+        }
       } else {
-        X <- data.matrix(cbind(rep(1, length(data.df[, 1])), data.df.multi[, as.logical(mycache[["node.defn.multi"]][row.num, ])]))
+        if ("multinomial" %in% data.dists[as.logical(mycache$node.defn.adj[row.num, ])]) {
+          X <- data.matrix(cbind(data.df.multi[, as.logical(mycache[["node.defn.multi"]][row.num, ])]))
+        } else {
+          X <- data.matrix(cbind(rep(1, length(data.df[, 1])), data.df.multi[, as.logical(mycache[["node.defn.multi"]][row.num, ])]))
+        }
       }
-    }
 
-    ## Rank deficiency
-    num.na <- 0
+      ## Rank deficiency
+      num.na <- 0
 
-    R <- rank_cpp(X)
-    r <- ncol(X)
-    R_col <- R/r
+      R <- rank_cpp(X)
+      r <- ncol(X)
+      R_col <- R/r
 
-    if (R_col != 1 & as.character(distribution) == "binomial") {
-      Y1 <- if (is.factor(Y)) numeric(Y) else  Y
+      if (R_col != 1 & as.character(distribution) == "binomial") {
+        Y1 <- if (is.factor(Y)) numeric(Y) else  Y
 
-      while (rank_cpp(X)/ncol(X) != 1) {
-        X <- X[, -1]
-        num.na <- num.na + 1
-        if (is.null(ncol(X)))
-          X <- as.matrix(X)
-      }
+        while (rank_cpp(X)/ncol(X) != 1) {
+          X <- X[, -1]
+          num.na <- num.na + 1
+          if (is.null(ncol(X)))
+            X <- as.matrix(X)
+        }
 
-      tryCatch(fit <- irls_binomial_cpp_fast_br(A = X, b = Y1, maxit = control[["max.iters"]], tol = control[["tol"]]))
-      # tryCatch(fit <- irls_binomial_cpp_fast_br(A = X, b = Y1, maxit = control[["max.iters"]], tol = control[["tol"]]),
-      #  error = function(e) {
-      #       while (rank_cpp(X)/ncol(X) != 1) {
-      #         X <- X[, -1]
-      #         num.na <- num.na + 1
-      #         if (is.null(ncol(X)))
-      #           X <- as.matrix(X)
-      #       }
-      #       fit <- irls_binomial_cpp_fast_br(A = X, b = Y1, maxit = control[["max.iters"]], tol = control[["tol"]])
-      #   }, finally = fit)
+        tryCatch(fit <- irls_binomial_cpp_fast_br(A = X, b = Y1, maxit = control[["max.iters"]], tol = control[["tol"]]))
+        # tryCatch(fit <- irls_binomial_cpp_fast_br(A = X, b = Y1, maxit = control[["max.iters"]], tol = control[["tol"]]),
+        #  error = function(e) {
+        #       while (rank_cpp(X)/ncol(X) != 1) {
+        #         X <- X[, -1]
+        #         num.na <- num.na + 1
+        #         if (is.null(ncol(X)))
+        #           X <- as.matrix(X)
+        #       }
+        #       fit <- irls_binomial_cpp_fast_br(A = X, b = Y1, maxit = control[["max.iters"]], tol = control[["tol"]])
+        #   }, finally = fit)
+      } else {
+        switch(as.character(distribution),
+               binomial = {
+                 Y1 <- if (is.factor(Y)) numeric(Y) else Y
+                 fit <- irls_binomial_cpp_fast_br(A = X, b = Y1, maxit = control[["max.iters"]], tol = control[["tol"]])
+                 if (is.na(sum(fit[[1]]))) fit <- irls_binomial_cpp_fast_br(A = X, b = Y, maxit = control[["max.iters"]], tol = control[["tol"]])
+               }, gaussian = {
+                 suppressWarnings(fit <- irls_gaussian_cpp_fast(A = X, b = Y, maxit = control[["max.iters"]], tol = control[["tol"]]))
+               }, poisson = {
+                 suppressWarnings(fit <- irls_poisson_cpp_fast(A = X, b = Y, maxit = control[["max.iters"]], tol = control[["tol"]]))
+               }, multinomial = {
+
+                 Ymulti <- data.matrix(model.matrix(~-1 + data.df.lvl[, child]))
+
+                 p <- ncol(Ymulti)
+                 mask <- c(rep(FALSE, r + 1L), rep(c(FALSE, rep(TRUE, r)), p - 1L))
+
+                 tmp <- nnet.default(x = X, y = Ymulti, mask = mask, size = 0,
+                                     skip = TRUE, softmax = TRUE, rang = 0, trace = FALSE)
+
+                 fit <- NULL
+                 fit$loglik <- -tmp$value
+                 edf <- ifelse(length(tmp$lev) == 2L, 1, length(tmp$lev) - 1) * R
+                 fit$aic <- 2 * tmp$value + 2 * edf
+                 fit$bic <- 2 * tmp$value + edf * log(dim(data.df)[1])
+               })
+      }
+      c(fit$loglik,
+        fit$aic,
+        fit$bic,
+        fit$bic + (1 + sum(mycache[["node.defn.multi"]][row.num, ]) - num.na) * log(n))
     } else {
-      switch(as.character(distribution),
-             binomial = {
-               Y1 <- if (is.factor(Y)) numeric(Y) else Y
-               fit <- irls_binomial_cpp_fast_br(A = X, b = Y1, maxit = control[["max.iters"]], tol = control[["tol"]])
-               if (is.na(sum(fit[[1]]))) fit <- irls_binomial_cpp_fast_br(A = X, b = Y, maxit = control[["max.iters"]], tol = control[["tol"]])
-             }, gaussian = {
-               suppressWarnings(fit <- irls_gaussian_cpp_fast(A = X, b = Y, maxit = control[["max.iters"]], tol = control[["tol"]]))
-             }, poisson = {
-               suppressWarnings(fit <- irls_poisson_cpp_fast(A = X, b = Y, maxit = control[["max.iters"]], tol = control[["tol"]]))
-             }, multinomial = {
-
-               Ymulti <- data.matrix(model.matrix(~-1 + data.df.lvl[, child]))
-
-               p <- ncol(Ymulti)
-               mask <- c(rep(FALSE, r + 1L), rep(c(FALSE, rep(TRUE, r)), p - 1L))
-
-               tmp <- nnet.default(x = X, y = Ymulti, mask = mask, size = 0,
-                                   skip = TRUE, softmax = TRUE, rang = 0, trace = FALSE)
-
-               fit <- NULL
-               fit$loglik <- -tmp$value
-               edf <- ifelse(length(tmp$lev) == 2L, 1, length(tmp$lev) - 1) * R
-               fit$aic <- 2 * tmp$value + 2 * edf
-               fit$bic <- 2 * tmp$value + edf * log(dim(data.df)[1])
-             })
+      stop("Invalid `group.var`.")
     }
-    c(fit$loglik,
-      fit$aic,
-      fit$bic,
-      fit$bic + (1 + sum(mycache[["node.defn.multi"]][row.num, ]) - num.na) * log(n))
-  } else {
-    stop("Invalid `group.var`.")
-  }
-} # End of forLoopContent()
+  } # End of forLoopContent()
 
 #' \code{buildScoreCache.mle} and \code{buildScoreCache.bayes} are internal functions called by \code{buildScoreCache}.
 #'
@@ -341,8 +341,8 @@ buildScoreCache.mle <-
 
     ## which.nodes
     if (!is.null(which.nodes)) {
-        data.df <- data.df[, which.nodes]
-        data.dists <- data.dists[which.nodes]
+      data.df <- data.df[, which.nodes]
+      data.dists <- data.dists[which.nodes]
     }
 
     ## number of variable:
@@ -354,120 +354,107 @@ buildScoreCache.mle <-
 
     ## standardize gaussian variables to zero mean and sd=1 have at least one gaussian variable
     if (centre && !is.null(data.dists == "gaussian")) {
-        for (i in names(data.dists)[(data.dists == "gaussian")]) {
-            data.df[, i] <- (data.df[, i] - mean(data.df[, i]))/sd(data.df[, i])
-        }
+      for (i in names(data.dists)[(data.dists == "gaussian")]) {
+        data.df[, i] <- (data.df[, i] - mean(data.df[, i]))/sd(data.df[, i])
+      }
     }
 
     for (i in 1:nvars) {
-        if (data.dists[[i]] == "binomial" & !inherits(data.df[, i], "numeric")) {
-            data.df[, i] <- as.numeric(factor(data.df[, i])) - 1
-        }
-        if (data.dists[[i]] == "multinomial") {
-            data.df[, i] <- factor(data.df[, i])
-        }
+      if (data.dists[[i]] == "binomial" & !inherits(data.df[, i], "numeric")) {
+        data.df[, i] <- as.numeric(factor(data.df[, i])) - 1
+      }
+      if (data.dists[[i]] == "multinomial") {
+        data.df[, i] <- factor(data.df[, i])
+      }
     }
 
     # adjustment: storing of df
     if (!is.null(adj.vars)) {
-        data.df.adj <- data.df
-        data.df <- data.df[, -adj.vars]
-        nvars <- nvars - length(adj.vars)
+      data.df.adj <- data.df
+      data.df <- data.df[, -adj.vars]
+      nvars <- nvars - length(adj.vars)
     }
 
     ############################## Function to create the cache
+    # Create node specific max. parent vector
+    if (length(max.parents) == 1) {
+      max.parents <- rep(max.parents, nvars)
+    }
 
-
+    # Start of the cache creation
     if (!is.null(defn.res)) {
-        max.parents <- max(apply(defn.res[["node.defn"]], 1, sum))
-
+      # If the cache is already computed then we just need to update the max.parents
+      max.parents <- max(apply(defn.res[["node.defn"]], 1, sum))
     } else {
-        ## Computing the cache
-        fun.return <- function(x, n) {
-          # x: vector or single integer
-          # n: number of variables/columns in data.df
-          # returns: vector of size n-1 with 1 for all x and zeros otherwise
-            v <- rep(0, n - 1)
-            v[x] <- 1
-            return(v)
-        }
-
-        node.defn <- matrix(data = as.integer(0), nrow = 1L, ncol = nvars)
-        children <- 1
-
-        for (j in 1:nvars) {
-            if (j != 1) {
-                node.defn <- rbind(node.defn, matrix(data = as.integer(0),
-                                                     nrow = 1L, ncol = nvars))
-                children <- cbind(children, j)
-            }
-            # node.defn <- rbind(node.defn,matrix(data = 0,nrow = 1,ncol = n))
-
-          if(is.list(max.parents)){
-            stop("ISSUE: `max.parents` as list is not yet implemented further down here. Try with a single numeric value as max.parents instead.")
-            if(!is.null(which.nodes)){
-              stop("ISSUE: `max.parents` as list in combination with `which.nodes` is not yet implemented further down here. Try with single numeric as max.parents instead.")
-            }
-          } else if (is.numeric(max.parents) && length(max.parents)>1){
-            if (length(unique(max.parents)) == 1){
-              max.parents <- unique(max.parents)
-            } else {
-              stop("ISSUE: `max.parents` with node specific values that are not all the same, is not yet implemented further down here.")
-            }
-          }
+      ## Computing the cache
+      fun.return <- function(x, n) {
+        # x: vector or single integer
+        # n: number of variables/columns in data.df
+        # returns: vector of size n-1 with 1 for all x and zeros otherwise
+        v <- rep(0, n - 1)
+        v[x] <- 1
+        return(v)
+      }
 
-          if(max.parents == nvars){
-            max.parents <- max.parents-1
-            warning(paste("`max.par` == no. of variables. I set it to (no. of variables - 1)=", max.parents)) #NOTE: This might cause differences to method="bayes"!
-          }
+      # Generate all possible bit patterns for n variables, with a maximum of m 1s
+      generateBitPatterns <- function(n, m){
+        z <- rep(0, n)
+        do.call(rbind, lapply(0:m, function(i) t(apply(combn(1:n, i), 2, function(k){
+          z[k] <- 1
+          return(z)
+        })))
+        )
+      }
 
-            for (i in 1:(max.parents)) {
-                tmp <- t(combn(x = (nvars - 1), m = i, FUN = fun.return, n = nvars, simplify = TRUE))
-                tmp <- t(apply(X = tmp, MARGIN = 1, FUN = function(x) append(x = x, values = 0, after = j - 1)))
+      # Function to generate all possible combinations of parents
+      filteredCombinations <- function(x, m, bannedParents, retainedParents){
+        # These are the parents that cannot change
+        fixedParents <- bannedParents | retainedParents | (fun.return(x, length(x) + 2) + 1) %% 2
+
+        # These are the parents that can change
+        parentPossibleChoices <- which(fixedParents == 0)
+        numPossibleChoices <- length(parentPossibleChoices)
+        numRetainedParents <- sum(retainedParents)
+
+        # Generate all possible combinations of parents, taking account of banned, retained and maximum number of parents
+        parentChoices <- generateBitPatterns(numPossibleChoices, min(m-numRetainedParents, numPossibleChoices)) == 1
+        output <- t(apply(parentChoices, 1, function(pc){
+          combinedRow <- 1L*(retainedParents | fun.return(parentPossibleChoices[pc], length(x) + 2))
+          combinedRow
+        }))
+        output
+      }
 
-                node.defn <- rbind(node.defn, tmp)
+      children <- matrix(nrow = 1, ncol = 0)
+      node.defn.list <- list()
 
-                # children position
-                children <- cbind(children, t(rep(j, length(tmp[, 1]))))
-            }
+      for (j in 1:nvars){
+        if(is.list(max.parents)){
+          stop("ISSUE: `max.parents` as list is not yet implemented further down here. Try with a single numeric value as max.parents instead.")
+          if(!is.null(which.nodes)){
+            stop("ISSUE: `max.parents` as list in combination with `which.nodes` is not yet implemented further down here. Try with single numeric as max.parents instead.")
+          }
         }
 
-        # children <- rowSums(node.defn)
+        # The parents that are banned and retained for node j
+        bannedParents <- dag.banned[j, ]
+        retainedParents <- dag.retained[j, ]
+        # All possible parents for node j, which is all nodes except j
+        parentChoice <- c(seq.int(from = 1, length.out = j-1), seq.int(from = j+1, length.out = nvars-1))
+        # How many parents we are keeping for node j
+        numretainedParents <- sum(retainedParents)
+        # The maximum number of parents for node j
+        m <- max.parents[j]
+
+        # Generate all possible combinations of parents for node j
+        tmp <- filteredCombinations(x = parentChoice, m = m, bannedParents = bannedParents, retainedParents = retainedParents)
+        # We need a sparse matrix here to deal with large numbers of variables, otherwise memory usage can become very high
+        tmp2 <- Matrix::Matrix(tmp, sparse = TRUE)
+        node.defn.list[[length(node.defn.list) + 1]] <- tmp2
+        children <- cbind(children, t(rep(j, length(tmp2[, 1]))))
+
+        node.defn <- do.call(rbind, node.defn.list)
         colnames(node.defn) <- colnames(data.df)
-        ## Coerce numeric matrix into integer matrix !!!
-        node.defn <- apply(node.defn, c(1, 2), function(x) {
-            (as.integer(x))
-        })
-
-        children <- as.integer(children)
-        # node.defn_ <- node.defn
-
-        ## DAG RETAIN/BANNED
-        for (i in 1:nvars) {
-            for (j in 1:nvars) {
-
-                ## DAG RETAIN
-                if (dag.retained[i, j] != 0) {
-                  tmp.indices <- which(children == i & node.defn[, j] == 0)
-
-                  if (length(tmp.indices) != 0) {
-                    node.defn <- node.defn[-tmp.indices, ]
-                    children <- children[-tmp.indices]
-                  }
-                }
-
-                ## DAG BANNED
-                if (dag.banned[i, j] != 0) {
-                  tmp.indices <- which(children == i & node.defn[, j] == 1)
-
-                  if (length(tmp.indices) != 0) {
-                    node.defn <- node.defn[-tmp.indices, ]
-                    children <- children[-tmp.indices]
-                  }
-                }
-
-            }
-        }
 
         mycache <- list(children = as.integer(children), node.defn = (node.defn))
 
@@ -477,14 +464,14 @@ buildScoreCache.mle <-
 
         ###FIXME
         if (is.list(max.parents)) {
-            for (z in 1:nvars) {
-                tmp <- mycache[["node.defn"]][mycache[["children"]] == z, ]
-                if (is.null(dim(tmp))) stop("Increase parents for node ",z," (due to retain)")
-
-                if (any(diff(unlist(max.parents)) !=0))
-                    stop("For method='mle', unique number of parents required")
-                mycache[["node.defn"]][mycache[["children"]] == z, ] <- tmp[rowSums(tmp) <= unlist(max.parents[z]), ]
-            }
+          for (z in 1:nvars) {
+            tmp <- mycache[["node.defn"]][mycache[["children"]] == z, ]
+            if (is.null(dim(tmp))) stop("Increase parents for node ",z," (due to retain)")
+
+            if (any(diff(unlist(max.parents)) !=0))
+              stop("For method='mle', unique number of parents required")
+            mycache[["node.defn"]][mycache[["children"]] == z, ] <- tmp[rowSums(tmp) <= unlist(max.parents[z]), ]
+          }
         }
 
 
@@ -494,24 +481,24 @@ buildScoreCache.mle <-
 
         if (!is.null(adj.vars)) {
 
-            # mycache$node.defn.adj <- mycache$node.defn
+          # mycache$node.defn.adj <- mycache$node.defn
 
-            ## adding adjustment column set to zero mycache$node.defn.adj <- cbind(mycache$node.defn,matrix(data = 0,nrow = dim(mycache$node.defn)[1],ncol = length(adj.vars)))
-            mycache$node.defn <- cbind(mycache$node.defn, matrix(data = 0, nrow = dim(mycache$node.defn)[1], ncol = length(adj.vars)))
+          ## adding adjustment column set to zero mycache$node.defn.adj <- cbind(mycache$node.defn,matrix(data = 0,nrow = dim(mycache$node.defn)[1],ncol = length(adj.vars)))
+          mycache$node.defn <- cbind(mycache$node.defn, matrix(data = 0, nrow = dim(mycache$node.defn)[1], ncol = length(adj.vars)))
 
-            if (is.null(cor.vars)) {
-                cor.vars <- colnames(data.df)
-            }
+          if (is.null(cor.vars)) {
+            cor.vars <- colnames(data.df)
+          }
 
-            ## adjustment variables
+          ## adjustment variables
 
-            mycache$node.defn[mycache$children[match(cor.vars, colnames(data.df))], dim(data.df)[2]:dim(data.df.adj)] <- 1
+          mycache$node.defn[mycache$children[match(cor.vars, colnames(data.df))], dim(data.df)[2]:dim(data.df.adj)] <- 1
 
-            ## output
-            colnames(mycache$node.defn) <- c(colnames(data.df), adj.vars)
+          ## output
+          colnames(mycache$node.defn) <- c(colnames(data.df), adj.vars)
 
-            mycache$node.defn <- mycache$node.defn[, names(data.df.adj)]
-            data.df <- data.df.adj
+          mycache$node.defn <- mycache$node.defn[, names(data.df.adj)]
+          data.df <- data.df.adj
         }
 
         ##----------------------
@@ -522,18 +509,18 @@ buildScoreCache.mle <-
         repetition.multi <- vector(length = nvars)
 
         for (i in 1:nvars) {
-            if (data.dists[[i]] %in% c("binomial", "poisson", "gaussian")) {
-                repetition.multi[i] <- 1
-            } else {
-                repetition.multi[i] <- nlevels(data.df.lvl[, i])
-            }
+          if (data.dists[[i]] %in% c("binomial", "poisson", "gaussian")) {
+            repetition.multi[i] <- 1
+          } else {
+            repetition.multi[i] <- nlevels(data.df.lvl[, i])
+          }
         }
 
         if (!is.null(adj.vars)) {
-            mycache$node.defn.multi <- mycache$node.defn.adj[, rep(1:nvars, repetition.multi)]
-            data.df <- data.df.adj[, colnames(mycache$node.defn.adj)]
+          mycache$node.defn.multi <- mycache$node.defn.adj[, rep(1:nvars, repetition.multi)]
+          data.df <- data.df.adj[, colnames(mycache$node.defn.adj)]
         } else {
-            mycache$node.defn.multi <- mycache$node.defn[, rep(1:nvars, repetition.multi)]
+          mycache$node.defn.multi <- mycache$node.defn[, rep(1:nvars, repetition.multi)]
 
         }
 
@@ -542,21 +529,21 @@ buildScoreCache.mle <-
         data.df.multi <- NULL
 
         for (i in 1:nvars) {
-            if (data.dists[[i]] %in% c("binomial", "poisson", "gaussian")) {
-                data.df.multi <- as.data.frame(cbind(data.df.multi, data.df[, i]))
-                colnames(data.df.multi)[length(colnames(data.df.multi))] <- colnames(data.df)[i]
-            } else {
-                tmp <- model.matrix(~-1 + factor(data.df.lvl[, i]))
-                colnames(tmp) <- paste0(names(data.df.lvl)[i], levels(factor(data.df.lvl[, i])))
-                data.df.multi <- as.data.frame(cbind(data.df.multi, tmp))
-            }
+          if (data.dists[[i]] %in% c("binomial", "poisson", "gaussian")) {
+            data.df.multi <- as.data.frame(cbind(data.df.multi, data.df[, i]))
+            colnames(data.df.multi)[length(colnames(data.df.multi))] <- colnames(data.df)[i]
+          } else {
+            tmp <- model.matrix(~-1 + factor(data.df.lvl[, i]))
+            colnames(tmp) <- paste0(names(data.df.lvl)[i], levels(factor(data.df.lvl[, i])))
+            data.df.multi <- as.data.frame(cbind(data.df.multi, tmp))
+          }
         }
 
-    }
-    if (dry.run) {
+      }
+      if (dry.run) {
         return(mycache)
+      }
     }
-
     ## EOF cache creation
     row.num <- NULL   # To avoid check comment: 'no visible binding for global variable
     out <- list()
@@ -607,7 +594,7 @@ buildScoreCache.mle <-
           cl <- makeCluster(ncores,
                             type = control[["cluster.type"]],
                             rscript_args = "--no-environ" # only available for "FORK"
-                            )
+          )
         }
 
         registerDoParallel(cl)
@@ -678,5 +665,5 @@ buildScoreCache.mle <-
     out[["method"]] <- "mle"
 
     return(out)
-}
+  }