Merge pull request #27 from pawelqs/develop

Develop
pawelqs · Oct 13, 2023 · bd93223 · bd93223
2 parents 0d6a621 + 68ee349
commit bd93223
Show file tree

Hide file tree

Showing 29 changed files with 581 additions and 54 deletions.
diff --git a/.gitignore b/.gitignore
@@ -4,3 +4,4 @@ tests/testthat/Rplots.pdf
 inst/develop_file*
 
 inst/doc
+Rplots.pdf
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: cevomod
 Title: Cancer Evolution Models
-Version: 2.2.0
+Version: 2.3.0
 Authors@R: 
     person("Paweł", "Kuś", , "kpawel2210@gmail.com", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0002-4367-9821"))
@@ -31,7 +31,8 @@ Suggests:
     tidyverse,
     vdiffr,
     readthis,
-    rsample
+    rsample,
+    processx
 Config/testthat/edition: 3
 VignetteBuilder: knitr
 Imports: 

diff --git a/NAMESPACE b/NAMESPACE
@@ -91,6 +91,7 @@ export(add_to_cevodata)
 export(annotate_mutation_contexts)
 export(annotate_normal_cn)
 export(as_cevo_snvs)
+export(build_clip_container)
 export(calc_Mf_1f)
 export(calc_SFS)
 export(calc_cumulative_tails)
@@ -115,6 +116,7 @@ export(fit_powerlaw_tail_fixed)
 export(fit_powerlaw_tail_optim)
 export(fit_subclones)
 export(fit_subclones_bmix)
+export(fit_subclones_clip)
 export(fit_subclones_mclust)
 export(fix_powerlaw_N_mutations)
 export(generate_neutral_snvs)
@@ -124,6 +126,7 @@ export(get_Mf_1f)
 export(get_SFS)
 export(get_SNVs_wider)
 export(get_cevomod_verbosity)
+export(get_containers_dir)
 export(get_frequency_measure_name)
 export(get_model_names)
 export(get_models)
@@ -174,11 +177,13 @@ export(scale_fill_cevomod)
 export(scale_fill_pnw)
 export(set_cancer_type)
 export(set_cevomod_verbosity)
+export(set_containers_dir)
 export(shuffle)
 export(split_by)
 export(stat_cumulative_tail)
 export(theme_ellie)
 export(to_clip)
+export(unite_mutation_id)
 export(use_purity)
 export(variant_classification_filter)
 import(dplyr)

diff --git a/NEWS.md b/NEWS.md
@@ -1,4 +1,7 @@
 
+## cevomod 2.3.0
+* subclones can be fitted using [CliP](https://github.com/wwylab/CliP) [(Jiang et al., 2021)](https://www.biorxiv.org/content/10.1101/2021.03.31.437383v1) (`fit_subclones(method = "CliP")`). This option requires that the Apptainer is installed. CliP container image can be build with `build_clip_container()`
+
 ## cevomod 2.2.0
 * fit_powerlaw_tail_fixed() has a bootstrap option
 

diff --git a/R/cevo_snvs.R b/R/cevo_snvs.R
@@ -103,8 +103,13 @@ count_mutation_types <- function(snvs) {
 }
 
 
-unite_mutation_id <- function(snvs) {
-  unite(snvs, "mutation_id", "chrom":"alt", sep = "-")
+#' Unite many columns to create mutation_id column
+#' @param snvs SNVs
+#' @param sep Separator
+#' @param remove Remove united columns?
+#' @export
+unite_mutation_id <- function(snvs, sep = "-", remove = TRUE) {
+  unite(snvs, "mutation_id", "chrom":"alt", sep = sep, remove = remove)
 }
 
 

diff --git a/R/cevodata-export.R b/R/cevodata-export.R
@@ -120,17 +120,24 @@ save_clip_files <- function(clip_data, out_dir) {
   if (!dir.exists(out_dir)) {
     dir.create(out_dir)
   }
-  iwalk(
+
+  files <- imap(
     clip_data,
     function(x, sample_id) {
-      write_tsv(x$snvs, file.path(out_dir, str_c(sample_id, ".snv.tsv")))
-      write_tsv(x$cnvs, file.path(out_dir, str_c(sample_id, ".cnv.tsv")))
-      write_file(
-        as.character(x$purities),
-        file.path(out_dir, str_c(sample_id, ".purity.tsv"))
-      )
+      # sample_id2 <- str_replace(sample_id, " ", "_")
+      snvs_file <- file.path(out_dir, str_c(sample_id, ".snv.tsv"))
+      cnvs_file <- file.path(out_dir, str_c(sample_id, ".cnv.tsv"))
+      purity_file <- file.path(out_dir, str_c(sample_id, ".purity.tsv"))
+
+      write_tsv(x$snvs, snvs_file)
+      write_tsv(x$cnvs, cnvs_file)
+      write_file(as.character(x$purities), purity_file)
+
+      tibble(sample_id, snvs_file, cnvs_file, purity_file)
     }
   )
+
+  invisible(bind_rows(files))
 }
 
 
diff --git a/R/cevomod-persistent.R b/R/cevomod-persistent.R
@@ -0,0 +1,37 @@
+
+settings_dir <- tools::R_user_dir("cevomod", which = "config")
+settings_file <- file.path(settings_dir, "settings.rds")
+
+default_settings <- list(
+  containers_dir = NULL
+)
+
+
+get_settings <- function() {
+  if (file.exists(settings_file)) {
+    readRDS(settings_file)
+  } else {
+    default_settings
+  }
+}
+
+
+#' Get/Set the containers directory
+#' @param dir Path for containers
+#' @export
+set_containers_dir <- function(dir) {
+  settings <- get_settings()
+  settings$containers_dir <- dir
+
+  if(!dir.exists(settings_dir)) {
+    dir.create(settings_dir)
+  }
+  write_rds(settings, settings_file)
+}
+
+
+#' @rdname set_containers_dir
+#' @export
+get_containers_dir <- function() {
+  get_settings()$containers_dir
+}
diff --git a/R/containers.R b/R/containers.R
@@ -0,0 +1,43 @@
+#' Build the CliP Apptainer container
+#'
+#' CliP.sif is saved to
+#'   - out_dir, if provided
+#'   - if out_dir is NULL but the containers_dir was set using the [set_containers_dir()],
+#'     image will be saved to the set containers_dir.
+#'   - if out_dir is NULL and  the containers_dir was not set, image will
+#'     be saved to the current working dir.
+#' @param out_dir Path
+#' @param force Force build in image exists
+#' @export
+build_clip_container <- function(out_dir = NULL, force = FALSE) {
+  if (!is_apptainer_installed()) {
+    stop("Apptainer needs to be installed to build the containers")
+  }
+
+  if (is.null(out_dir) & !is.null(get_containers_dir())) {
+    out_dir <- get_containers_dir()
+  } else {
+    out_dir <- "."
+  }
+
+  sif_file <- file.path(out_dir, "CliP.sif") |>
+    str_replace("//", "/")
+
+  if (!file.exists(sif_file) | force) {
+    def_file <- system.file("CliP.def", package = "cevomod")
+    command <- str_c("apptainer build", sif_file, def_file, sep = " ")
+    system(command)
+  } else {
+    msg(sif_file, " exists. Set force = TRUE to overwrite it")
+  }
+}
+
+
+is_apptainer_installed <- function() {
+  rlang::check_installed("processx", reason = "to interact with system")
+  x <- processx::run("apptainer", args = "--version")
+  x$status == 0
+}
+
+
+
diff --git a/R/fit_powerlaw_tail_fixed.R b/R/fit_powerlaw_tail_fixed.R
@@ -22,6 +22,7 @@
 #'
 #' @param object SNVs tibble object
 #' @param rsq_treshold R-squared tresholds to keep model as neutral
+#' @param lm_length length of the linear model fits
 #' @param name name in the models' slot
 #' @param verbose verbose?
 #' @param ... other arguments
@@ -56,6 +57,7 @@ fit_powerlaw_tail_fixed <- function(object, ...) {
 #' @export
 fit_powerlaw_tail_fixed.cevodata <- function(object,
                                              rsq_treshold = 0.98,
+                                             lm_length = 0.05,
                                              name = "powerlaw_fixed",
                                              pct_left = 0.05, pct_right = 0.95,
                                              verbose = get_cevomod_verbosity(),
@@ -78,7 +80,7 @@ fit_powerlaw_tail_fixed.cevodata <- function(object,
     reframe(
       model = "powerlaw_fixed",
       component = "Neutral tail",
-      fit_optimal_lm(.data$data, rsq_treshold, pb)
+      fit_optimal_lm(.data$data, rsq_treshold, lm_length = lm_length, pb)
     )
   class(models) <- c("cevo_powerlaw_models", class(models))
 
@@ -89,7 +91,7 @@ fit_powerlaw_tail_fixed.cevodata <- function(object,
 }
 
 
-fit_optimal_lm <- function(data, rsq_treshold = 0.98, pb = NULL) {
+fit_optimal_lm <- function(data, rsq_treshold = 0.98, lm_length = 0.05, pb = NULL) {
   min_val <- min(data$f)
   max_val <- max(data$f)
   grid <- expand_grid(
@@ -98,7 +100,7 @@ fit_optimal_lm <- function(data, rsq_treshold = 0.98, pb = NULL) {
     ) |>
     mutate(length = .data$to - .data$from)
 
-  desired_length <- if ((max_val - min_val) >= 0.05) 0.05 else max(grid$length)
+  desired_length <- if ((max_val - min_val) >= lm_length) lm_length else max(grid$length)
   grid <- grid |>
     filter(near(.data$length, desired_length))
 

diff --git a/R/fit_powerlaw_tail_optim.R b/R/fit_powerlaw_tail_optim.R
@@ -203,6 +203,8 @@ fit_powerlaw_tail_optim.cevo_SFS_tbl <- function(object,
   data <- sfs |>
     left_join(bounds, by = "sample_id") |>
     filter(.data$f > .data$lower_bound, .data$f < .data$higher_bound) |>
+    mutate(length = n(), .by = "sample_id") |>
+    filter(length >= 3) |>
     select("sample_id", "f", "y") |>
     mutate(
       y = .data$y |>

diff --git a/R/fit_subclones.R b/R/fit_subclones.R
@@ -3,41 +3,72 @@
 #' Fit clonal and subclonal components of the model to the residuals of the
 #' power-law model
 #'
-#' @param object object
-#' @param N numbers of clones to for models
-#' @param powerlaw_model_name residual of which powerlaw model to use?
+#' @param object cevodata object
+#' @param N Vector of numbers of clones to for models
+#' @param powerlaw_model_name Residual of which powerlaw model to use?
 #'   powerlaw_fixed/powerlaw_optim
-#' @param snvs_name which snvs to to use?
-#' @param method clustering method to use: BMix and mclust are currently supported.
-#'   While mclust is a 3-4 times faster method, the BMix method is more accurate
-#'   and usually fast enough.
+#' @param snvs_name Which snvs to to use?
+#' @param cnvs_name Which cnvs to to use?
+#' @param method Clustering method to use. Currently supported methods:
+#'   - mclust - the fastest method, approximately 3-4 times faster than BMix,
+#'     but uses a gaussian mixture modelling
+#'   - BMix - is more accurate, considers subclones as binomial clusters,
+#'     slightly slower
+#'   - CliP - Clonal structure identification through penalizing pairwise
+#'     differences
 #' @param upper_f_limit ignore variants with f higher than
-#' @param verbose verbose?
+#' @param verbose Verbose?
 #' @name fit_subclones
 NULL
 
 
 
-#' @rdname fit_subclones
+#' @describeIn fit_subclones Provides a common interface for all other methods,
+#'   runs the selected method and passes all the required arguments down.
+#' @examples
+#' \dontrun{
+#' # Using BMix
+#' fit_subclones(test_data_fitted)
+#' # or
+#' fit_subclones_bmix(test_data_fitted)
+#'
+#' # Using mclust
+#' fit_subclones(test_data_fitted, method = "mclust")
+#' # or
+#' fit_subclones_mclust(test_data_fitted)
+#'
+#' # Using CliP
+#' set_containers_dir(selected_dir)
+#' build_clip_container()
+#' fit_subclones(test_data_fitted, method = "CliP")
+#' # or
+#' fit_subclones_clip(test_data_fitted)
+#' }
 #' @export
 fit_subclones <- function(object,
                           N = 1:3,
                           powerlaw_model_name = active_models(object),
                           snvs_name = default_SNVs(object),
+                          cnvs_name = default_CNVs(object),
                           method = "BMix",
                           upper_f_limit = 0.75,
+                          clip_sif = NULL,
+                          clip_input = file.path(tempdir(), "clip_input"),
+                          clip_output = file.path(tempdir(), "clip_output"),
                           verbose = get_cevomod_verbosity()) {
   if (method == "BMix") {
     object <- object |>
       fit_subclones_bmix(N, powerlaw_model_name, snvs_name, upper_f_limit, verbose)
   } else if (method == "mclust") {
     object <- object |>
       fit_subclones_mclust(N, powerlaw_model_name, snvs_name, upper_f_limit, verbose)
+  } else if (method == "CliP") {
+    object <- object |>
+      fit_subclones_clip(powerlaw_model_name, snvs_name, cnvs_name, upper_f_limit, verbose = verbose)
   } else {
-    stop("Currently supported methods are: BMix and mclust")
+    stop("Currently supported methods are: BMix, CliP, and mclust")
   }
 
-
   object
 }