Merge pull request #22 from pawelqs/develop

Develop
pawelqs · Jul 25, 2023 · cb2869c · cb2869c
2 parents 17e258f + 107e5c0
commit cb2869c
Show file tree

Hide file tree

Showing 11 changed files with 314 additions and 6 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: cevomod
 Title: Cancer Evolution Models
-Version: 2.0.0
+Version: 2.1.0
 Authors@R: 
     person("Paweł", "Kuś", , "kpawel2210@gmail.com", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0002-4367-9821"))
@@ -29,7 +29,8 @@ Suggests:
     shinyWidgets,
     testthat (>= 3.0.0),
     tidyverse,
-    vdiffr
+    vdiffr,
+    readthis
 Config/testthat/edition: 3
 VignetteBuilder: knitr
 Imports: 
@@ -59,6 +60,7 @@ Depends:
     R (>= 2.10)
 Remotes: 
     caravagnalab/mobster,
-    caravagnalab/BMix
+    caravagnalab/BMix,
+    pawelqs/readthis
 LazyData: true
 URL: https://pawelqs.github.io/cevomod/, https://github.com/pawelqs/cevomod
diff --git a/NAMESPACE b/NAMESPACE
@@ -8,6 +8,10 @@ S3method(add_CNV_data,cevodata)
 S3method(add_SNV_data,cevodata)
 S3method(add_patient_data,cevodata)
 S3method(add_sample_data,cevodata)
+S3method(add_to_cevodata,cevo_ASCAT)
+S3method(add_to_cevodata,cevo_FACETS)
+S3method(add_to_cevodata,cevo_Mutect)
+S3method(add_to_cevodata,cevo_Strelka)
 S3method(calc_Mf_1f,cevo_snvs)
 S3method(calc_Mf_1f,cevodata)
 S3method(calc_SFS,cevo_snvs)
@@ -76,8 +80,10 @@ export(SNVs_CNVs)
 export(active_models)
 export(add_CNV_data)
 export(add_SNV_data)
+export(add_data)
 export(add_patient_data)
 export(add_sample_data)
+export(add_to_cevodata)
 export(annotate_mutation_contexts)
 export(annotate_normal_cn)
 export(as_cevo_snvs)
@@ -169,6 +175,7 @@ export(split_by)
 export(stat_cumulative_tail)
 export(theme_ellie)
 export(to_clip)
+export(use_purity)
 export(variant_classification_filter)
 import(dplyr)
 import(forcats)

diff --git a/NEWS.md b/NEWS.md
@@ -1,4 +1,8 @@
 
+## cevomod 2.1.0
+* cevomod is integrated with a helper [readthis](https://pawelqs.github.io/readthis/index.html) package, designed for bulk reading of variant files from algorithms such as Mutect2, Strelka, ASCAT, or FACETS, in the cevomod-friendly data format. Objects returned by `readthis::read_*()` functions can be added to the cevodata object using a general `add_data()` function.
+
+
 ## cevomod 2.0.0
 * cevomod functions can no utilize VAF or CCF (Cancer Cell Fraction) as a measure
   of mutation frequency. CCF is calculated using the formula introduced in [Dentro et al. *Principles of Reconstructing the Subclonal Architecture of Cancers* (2015)](https://doi.org/10.1101/cshperspect.a026625)

diff --git a/R/cevodata-construction.R b/R/cevodata-construction.R
@@ -257,6 +257,33 @@ add_sample_data.cevodata <- function(object, data, ...) {
 }
 
 
+#' Choose purity measure
+#'
+#' <cevodata> metadata can contain purity measures in columns other than 'purity'.
+#' T his function can be used to set 'purity' values using values from requested
+#' column
+#'
+#' @param cd <cevodata> object
+#' @param name Name of the metadata column with chosen purity values
+#' @param verbose Verbose?
+#' @export
+use_purity <- function(cd, name, verbose = get_cevomod_verbosity()) {
+  if (name %not in% names(cd$metadata)) {
+    stop(
+      "`name` should be a name of the column in the metadata tibble, ",
+      "which should be used as purity measure"
+    )
+  } else {
+    msg("Using '", name, "' as default purity measure", verbose = verbose)
+    if (!is.null(cd$metadata[["purity"]])) {
+      cd$metadata$prev_purity <- cd$metadata$purity
+    }
+    cd$metadata$purity <- cd$metadata[[name]]
+    cd
+  }
+}
+
+
 is_cevodata_singlepatient <- function(object) {
   n_patients <- count_patients(object)
   if (is.na(n_patients)) {

diff --git a/R/cevodata-readthis_integration.R b/R/cevodata-readthis_integration.R
@@ -0,0 +1,126 @@
+
+#' readthis integration
+#'
+#' @description
+#' [readthis](https://github.com/pawelqs/readthis) package may be used to easily
+#' read the data from some popular mutation callers into R environment. readthis
+#' functions can be supplied not only with the single file paths, but also with
+#' lists of files or even paths to the directories with files to be loaded (and
+#' cevodata object is to store the data from many samples!)
+#'
+#' readthis functions return tibbles or list of tibbles. These tibbles/
+#' objects usually are instances of *cevo_<software_name>* S3 classes. cevomod
+#' implements methods that allow to add these types of data to the cevodata
+#' objects conveniently.
+#'
+#' @param cd <cevodata> object
+#' @param data Object read with readthis functions
+#' @param name Name for the data
+#' @param verbose Verbose?
+#' @param ... Other arguments
+#'
+#' @examples
+#' # library(cevomod)
+#'
+#' ascat_dir <- system.file("extdata", "ASCAT", package = "readthis")
+#' ascat <- readthis::read_ascat_files(ascat_dir)
+#' cd <- init_cevodata("Test dataset") |>
+#'   add_data(ascat)
+#'
+#' @name readthis-integration
+NULL
+
+
+
+#' @describeIn readthis-integration add_data() function takes cevodata as the
+#'   first argument, so it is a preferred method for adding data in R pipelines.
+#' @export
+add_data <- function(cd, data, ...) {
+  add_to_cevodata(data, cd)
+}
+
+
+#' @describeIn readthis-integration add_to_cevodata() is a generic with a set
+#'   of methods for different classes of `data`. These methods are called by
+#'   add_data() function.
+#' @export
+add_to_cevodata <- function(data, cd, name, verbose, ...) {
+  UseMethod("add_to_cevodata")
+}
+
+
+#' @export
+add_to_cevodata.cevo_ASCAT <- function(data, cd,
+                                       name = "ASCAT",
+                                       verbose = get_cevomod_verbosity(),
+                                       ...) {
+  sample_data <- data$sample_statistics |>
+    mutate(ascat_purity = 1 - .data$normal_contamination)
+  cd |>
+    add_CNV_data(data$cnvs, name = name) |>
+    add_sample_data(sample_data) |>
+    use_purity("ascat_purity", verbose = verbose)
+}
+
+
+#' @export
+add_to_cevodata.cevo_FACETS <- function(data, cd,
+                                        name = "FACETS",
+                                        verbose = get_cevomod_verbosity(),
+                                        ...) {
+  cnvs <- data |>
+    select(-"Purity", -"Ploidy")
+  sample_data <- data |>
+    select("sample_id", facets_purity = "Purity", facets_ploidy = "Ploidy") |>
+    unique()
+  cd |>
+    add_CNV_data(data, name = name) |>
+    add_sample_data(sample_data) |>
+    use_purity("facets_purity", verbose = verbose)
+}
+
+
+#' @export
+add_to_cevodata.cevo_Mutect <- function(data, cd,
+                                        name = "Mutect",
+                                        verbose = get_cevomod_verbosity(),
+                                        ...) {
+  patient_ids_present <- "patient_id" %in% names(data)
+
+  if (patient_ids_present) {
+    sample_data <- data |>
+      select("patient_id", "sample_id") |>
+      unique()
+    data$patient_id <- NULL
+  }
+
+  cd <- add_SNV_data(cd, data, name = name)
+  if (patient_ids_present) {
+    cd <- add_sample_data(cd, sample_data)
+  }
+
+  cd
+}
+
+
+#' @export
+add_to_cevodata.cevo_Strelka <- function(data, cd,
+                                         name = "Strelka",
+                                         verbose = get_cevomod_verbosity(),
+                                         ...) {
+  patient_ids_present <- "patient_id" %in% names(data)
+
+  if (patient_ids_present) {
+    sample_data <- data |>
+      select("patient_id", "sample_id") |>
+      unique()
+    data$patient_id <- NULL
+  }
+
+  cd <- add_SNV_data(cd, data, name = name)
+  if (patient_ids_present) {
+    cd <- add_sample_data(cd, sample_data)
+  }
+
+  cd
+}
diff --git a/README.md b/README.md
@@ -26,9 +26,9 @@ devtools::install_github("pawelqs/cevomod")
 ```
 
 
-## Chnages in version 2.0.0
-
-Starting with version 2.0.0, cevomod can use either VAF or CCF (Cancer Cell Fraction) as a measure of mutation frequency. CCF is a measure of mutation frequency corrected for tumor purity and copy number alterations. CCF can be calculated prior to mutation frequency intervalization using the `calc_mutation_frequencies()` function and requires information on total copy number in tumor and normal tissue and sample purity (tumor cell content). See the Vignettes for more examples.
+## Last changes
+* **v2.1.0** - cevomod is integrated with a helper [readthis](https://pawelqs.github.io/readthis/index.html) package, designed for bulk reading of variant files from algorithms such as Mutect2, Strelka, ASCAT, or FACETS, in the cevomod-friendly data format. Objects returned by `readthis::read_*()` functions can be added to the cevodata object using a general `add_data()` function.
+* **v2.0.0** - Starting with version 2.0.0, cevomod can use either VAF or CCF (Cancer Cell Fraction) as a measure of mutation frequency. CCF is a measure of mutation frequency corrected for tumor purity and copy number alterations. CCF can be calculated prior to mutation frequency intervalization using the `calc_mutation_frequencies()` function and requires information on total copy number in tumor and normal tissue and sample purity (tumor cell content). See the Vignettes for more examples.
 
 To see the previous changes in the package see the [Changelog](https://pawelqs.github.io/cevomod/news/index.html)
 

diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -21,6 +21,7 @@ reference:
   - starts_with("add_")
   - starts_with("default_")
   - set_cancer_type
+  - use_purity
 - title: cevodata transformations
 - contents:
   - filter.cevodata

diff --git a/man/readthis-integration.Rd b/man/readthis-integration.Rd
diff --git a/man/use_purity.Rd b/man/use_purity.Rd
diff --git a/tests/testthat/test-cevodata-readthis_integration.R b/tests/testthat/test-cevodata-readthis_integration.R
@@ -0,0 +1,65 @@
+test_that("adding ASCAT data works", {
+  ascat_dir <- system.file("extdata", "ASCAT", package = "readthis")
+  data <- readthis::read_ascat_files(ascat_dir, sample_id_pattern = "(?<=ASCAT\\/)[:alnum:]*(?=\\.)")
+  cd <- init_cevodata("Test dataset") |>
+    add_data(data)
+  expect_s3_class(cd, "cevodata")
+  expect_s3_class(CNVs(cd), "tbl")
+  expect_equal(cd$active_CNVs, "ASCAT")
+  expect_equal(dim(CNVs(cd)), c(20, 8))
+  expect_equal(cd$metadata$purity, c(0.99322, 0.99322))
+  expect_equal(cd$metadata$purity, cd$metadata$ascat_purity)
+})
+
+
+
+test_that("adding FACETS data works", {
+  facets_dir <- system.file("extdata", "FACETS", package = "readthis")
+  data <- readthis::read_facets_cnvs(facets_dir)
+  cd <- init_cevodata("Test dataset") |>
+    add_data(data)
+  expect_s3_class(cd, "cevodata")
+  expect_s3_class(CNVs(cd), "tbl")
+  expect_equal(cd$active_CNVs, "FACETS")
+  expect_equal(dim(CNVs(cd)), c(128, 18))
+  expect_equal(cd$metadata$purity, c(0.3, 0.3))
+  expect_equal(cd$metadata$purity, cd$metadata$facets_purity)
+})
+
+
+
+test_that("adding Mutect2 data works", {
+  path <- system.file("extdata", "Mutect", package = "readthis")
+  data <- readthis::read_mutect_snvs(
+    path,
+    patient_id_pattern = "(?<=Mutect\\/)[:alnum:]*(?=\\.)",
+    verbose = FALSE
+  )
+  cd <- init_cevodata("Test dataset") |>
+    add_data(data)
+  expect_s3_class(cd, "cevodata")
+  expect_s3_class(SNVs(cd), "tbl")
+  expect_equal(cd$active_SNVs, "Mutect")
+  expect_equal(dim(SNVs(cd)), c(16, 14))
+  expect_equal(cd$metadata$sample_id, c("S1_L1", "S1_P1", "S2_L1", "S2_P1"))
+  expect_equal(cd$metadata$patient_id, c("S1", "S1", "S2", "S2"))
+})
+
+
+
+test_that("adding Strelka data works", {
+  path <- system.file("extdata", "Strelka", package = "readthis")
+  data <- readthis::read_strelka_somatic_snvs(
+    path,
+    patient_id_pattern = "(?<=Strelka\\/)[:alnum:]*(?=\\.)",
+    verbose = FALSE
+  ) |>
+    mutate(sample_id = str_c(patient_id, sample_id, sep = "_"))
+  cd <- init_cevodata("Test dataset") |>
+    add_data(data)
+  expect_s3_class(cd, "cevodata")
+  expect_s3_class(SNVs(cd), "tbl")
+  expect_equal(cd$active_SNVs, "Strelka")
+  expect_equal(dim(SNVs(cd)), c(18, 11))
+  expect_equal(cd$metadata$sample_id, c("S1_TUMOR", "S2_TUMOR"))
+})
diff --git a/vignettes/get_started.Rmd b/vignettes/get_started.Rmd
@@ -58,6 +58,8 @@ cd
 
 `name` can be any string that is informative for the user.
 
+*To facilitate the use of cevomod with the data from popular variant callers such as Mutect2, Strelka2, ASCAT, or FACETS, we have implemented a * [readthis](https://pawelqs.github.io/readthis/index.html) *package. readthis functions are designed for bulk reading of many output variant files (they accept a path to a single file, named vector of file paths, or a path to a directory containing many files). Data objects read with readthis functions can be added to the cevodata object with a single call of general* `add_data()` *function. For more information see the* [readthis page](https://pawelqs.github.io/readthis/index.html).
+
 
 ## Variant Frequency Spectra