Skip to content

Commit

Permalink
Merge pull request #22 from pawelqs/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
pawelqs committed Jul 25, 2023
2 parents 17e258f + 107e5c0 commit cb2869c
Show file tree
Hide file tree
Showing 11 changed files with 314 additions and 6 deletions.
8 changes: 5 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: cevomod
Title: Cancer Evolution Models
Version: 2.0.0
Version: 2.1.0
Authors@R:
person("Paweł", "Kuś", , "kpawel2210@gmail.com", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-4367-9821"))
Expand Down Expand Up @@ -29,7 +29,8 @@ Suggests:
shinyWidgets,
testthat (>= 3.0.0),
tidyverse,
vdiffr
vdiffr,
readthis
Config/testthat/edition: 3
VignetteBuilder: knitr
Imports:
Expand Down Expand Up @@ -59,6 +60,7 @@ Depends:
R (>= 2.10)
Remotes:
caravagnalab/mobster,
caravagnalab/BMix
caravagnalab/BMix,
pawelqs/readthis
LazyData: true
URL: https://pawelqs.github.io/cevomod/, https://github.com/pawelqs/cevomod
7 changes: 7 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ S3method(add_CNV_data,cevodata)
S3method(add_SNV_data,cevodata)
S3method(add_patient_data,cevodata)
S3method(add_sample_data,cevodata)
S3method(add_to_cevodata,cevo_ASCAT)
S3method(add_to_cevodata,cevo_FACETS)
S3method(add_to_cevodata,cevo_Mutect)
S3method(add_to_cevodata,cevo_Strelka)
S3method(calc_Mf_1f,cevo_snvs)
S3method(calc_Mf_1f,cevodata)
S3method(calc_SFS,cevo_snvs)
Expand Down Expand Up @@ -76,8 +80,10 @@ export(SNVs_CNVs)
export(active_models)
export(add_CNV_data)
export(add_SNV_data)
export(add_data)
export(add_patient_data)
export(add_sample_data)
export(add_to_cevodata)
export(annotate_mutation_contexts)
export(annotate_normal_cn)
export(as_cevo_snvs)
Expand Down Expand Up @@ -169,6 +175,7 @@ export(split_by)
export(stat_cumulative_tail)
export(theme_ellie)
export(to_clip)
export(use_purity)
export(variant_classification_filter)
import(dplyr)
import(forcats)
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@

## cevomod 2.1.0
* cevomod is integrated with a helper [readthis](https://pawelqs.github.io/readthis/index.html) package, designed for bulk reading of variant files from algorithms such as Mutect2, Strelka, ASCAT, or FACETS, in the cevomod-friendly data format. Objects returned by `readthis::read_*()` functions can be added to the cevodata object using a general `add_data()` function.


## cevomod 2.0.0
* cevomod functions can no utilize VAF or CCF (Cancer Cell Fraction) as a measure
of mutation frequency. CCF is calculated using the formula introduced in [Dentro et al. *Principles of Reconstructing the Subclonal Architecture of Cancers* (2015)](https://doi.org/10.1101/cshperspect.a026625)
Expand Down
27 changes: 27 additions & 0 deletions R/cevodata-construction.R
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,33 @@ add_sample_data.cevodata <- function(object, data, ...) {
}


#' Choose purity measure
#'
#' <cevodata> metadata can contain purity measures in columns other than 'purity'.
#' T his function can be used to set 'purity' values using values from requested
#' column
#'
#' @param cd <cevodata> object
#' @param name Name of the metadata column with chosen purity values
#' @param verbose Verbose?
#' @export
use_purity <- function(cd, name, verbose = get_cevomod_verbosity()) {
if (name %not in% names(cd$metadata)) {
stop(
"`name` should be a name of the column in the metadata tibble, ",
"which should be used as purity measure"
)
} else {
msg("Using '", name, "' as default purity measure", verbose = verbose)
if (!is.null(cd$metadata[["purity"]])) {
cd$metadata$prev_purity <- cd$metadata$purity
}
cd$metadata$purity <- cd$metadata[[name]]
cd
}
}


is_cevodata_singlepatient <- function(object) {
n_patients <- count_patients(object)
if (is.na(n_patients)) {
Expand Down
126 changes: 126 additions & 0 deletions R/cevodata-readthis_integration.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@

#' readthis integration
#'
#' @description
#' [readthis](https://github.com/pawelqs/readthis) package may be used to easily
#' read the data from some popular mutation callers into R environment. readthis
#' functions can be supplied not only with the single file paths, but also with
#' lists of files or even paths to the directories with files to be loaded (and
#' cevodata object is to store the data from many samples!)
#'
#' readthis functions return tibbles or list of tibbles. These tibbles/
#' objects usually are instances of *cevo_<software_name>* S3 classes. cevomod
#' implements methods that allow to add these types of data to the cevodata
#' objects conveniently.
#'
#' @param cd <cevodata> object
#' @param data Object read with readthis functions
#' @param name Name for the data
#' @param verbose Verbose?
#' @param ... Other arguments
#'
#' @examples
#' # library(cevomod)
#'
#' ascat_dir <- system.file("extdata", "ASCAT", package = "readthis")
#' ascat <- readthis::read_ascat_files(ascat_dir)
#' cd <- init_cevodata("Test dataset") |>
#' add_data(ascat)
#'
#' @name readthis-integration
NULL



#' @describeIn readthis-integration add_data() function takes cevodata as the
#' first argument, so it is a preferred method for adding data in R pipelines.
#' @export
add_data <- function(cd, data, ...) {
add_to_cevodata(data, cd)
}


#' @describeIn readthis-integration add_to_cevodata() is a generic with a set
#' of methods for different classes of `data`. These methods are called by
#' add_data() function.
#' @export
add_to_cevodata <- function(data, cd, name, verbose, ...) {
UseMethod("add_to_cevodata")
}


#' @export
add_to_cevodata.cevo_ASCAT <- function(data, cd,
name = "ASCAT",
verbose = get_cevomod_verbosity(),
...) {
sample_data <- data$sample_statistics |>
mutate(ascat_purity = 1 - .data$normal_contamination)
cd |>
add_CNV_data(data$cnvs, name = name) |>
add_sample_data(sample_data) |>
use_purity("ascat_purity", verbose = verbose)
}


#' @export
add_to_cevodata.cevo_FACETS <- function(data, cd,
name = "FACETS",
verbose = get_cevomod_verbosity(),
...) {
cnvs <- data |>
select(-"Purity", -"Ploidy")
sample_data <- data |>
select("sample_id", facets_purity = "Purity", facets_ploidy = "Ploidy") |>
unique()
cd |>
add_CNV_data(data, name = name) |>
add_sample_data(sample_data) |>
use_purity("facets_purity", verbose = verbose)
}


#' @export
add_to_cevodata.cevo_Mutect <- function(data, cd,
name = "Mutect",
verbose = get_cevomod_verbosity(),
...) {
patient_ids_present <- "patient_id" %in% names(data)

if (patient_ids_present) {
sample_data <- data |>
select("patient_id", "sample_id") |>
unique()
data$patient_id <- NULL
}

cd <- add_SNV_data(cd, data, name = name)
if (patient_ids_present) {
cd <- add_sample_data(cd, sample_data)
}

cd
}


#' @export
add_to_cevodata.cevo_Strelka <- function(data, cd,
name = "Strelka",
verbose = get_cevomod_verbosity(),
...) {
patient_ids_present <- "patient_id" %in% names(data)

if (patient_ids_present) {
sample_data <- data |>
select("patient_id", "sample_id") |>
unique()
data$patient_id <- NULL
}

cd <- add_SNV_data(cd, data, name = name)
if (patient_ids_present) {
cd <- add_sample_data(cd, sample_data)
}

cd
}
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ devtools::install_github("pawelqs/cevomod")
```


## Chnages in version 2.0.0

Starting with version 2.0.0, cevomod can use either VAF or CCF (Cancer Cell Fraction) as a measure of mutation frequency. CCF is a measure of mutation frequency corrected for tumor purity and copy number alterations. CCF can be calculated prior to mutation frequency intervalization using the `calc_mutation_frequencies()` function and requires information on total copy number in tumor and normal tissue and sample purity (tumor cell content). See the Vignettes for more examples.
## Last changes
* **v2.1.0** - cevomod is integrated with a helper [readthis](https://pawelqs.github.io/readthis/index.html) package, designed for bulk reading of variant files from algorithms such as Mutect2, Strelka, ASCAT, or FACETS, in the cevomod-friendly data format. Objects returned by `readthis::read_*()` functions can be added to the cevodata object using a general `add_data()` function.
* **v2.0.0** - Starting with version 2.0.0, cevomod can use either VAF or CCF (Cancer Cell Fraction) as a measure of mutation frequency. CCF is a measure of mutation frequency corrected for tumor purity and copy number alterations. CCF can be calculated prior to mutation frequency intervalization using the `calc_mutation_frequencies()` function and requires information on total copy number in tumor and normal tissue and sample purity (tumor cell content). See the Vignettes for more examples.

To see the previous changes in the package see the [Changelog](https://pawelqs.github.io/cevomod/news/index.html)

Expand Down
1 change: 1 addition & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ reference:
- starts_with("add_")
- starts_with("default_")
- set_cancer_type
- use_purity
- title: cevodata transformations
- contents:
- filter.cevodata
Expand Down
54 changes: 54 additions & 0 deletions man/readthis-integration.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions man/use_purity.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

65 changes: 65 additions & 0 deletions tests/testthat/test-cevodata-readthis_integration.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
test_that("adding ASCAT data works", {
ascat_dir <- system.file("extdata", "ASCAT", package = "readthis")
data <- readthis::read_ascat_files(ascat_dir, sample_id_pattern = "(?<=ASCAT\\/)[:alnum:]*(?=\\.)")
cd <- init_cevodata("Test dataset") |>
add_data(data)
expect_s3_class(cd, "cevodata")
expect_s3_class(CNVs(cd), "tbl")
expect_equal(cd$active_CNVs, "ASCAT")
expect_equal(dim(CNVs(cd)), c(20, 8))
expect_equal(cd$metadata$purity, c(0.99322, 0.99322))
expect_equal(cd$metadata$purity, cd$metadata$ascat_purity)
})



test_that("adding FACETS data works", {
facets_dir <- system.file("extdata", "FACETS", package = "readthis")
data <- readthis::read_facets_cnvs(facets_dir)
cd <- init_cevodata("Test dataset") |>
add_data(data)
expect_s3_class(cd, "cevodata")
expect_s3_class(CNVs(cd), "tbl")
expect_equal(cd$active_CNVs, "FACETS")
expect_equal(dim(CNVs(cd)), c(128, 18))
expect_equal(cd$metadata$purity, c(0.3, 0.3))
expect_equal(cd$metadata$purity, cd$metadata$facets_purity)
})



test_that("adding Mutect2 data works", {
path <- system.file("extdata", "Mutect", package = "readthis")
data <- readthis::read_mutect_snvs(
path,
patient_id_pattern = "(?<=Mutect\\/)[:alnum:]*(?=\\.)",
verbose = FALSE
)
cd <- init_cevodata("Test dataset") |>
add_data(data)
expect_s3_class(cd, "cevodata")
expect_s3_class(SNVs(cd), "tbl")
expect_equal(cd$active_SNVs, "Mutect")
expect_equal(dim(SNVs(cd)), c(16, 14))
expect_equal(cd$metadata$sample_id, c("S1_L1", "S1_P1", "S2_L1", "S2_P1"))
expect_equal(cd$metadata$patient_id, c("S1", "S1", "S2", "S2"))
})



test_that("adding Strelka data works", {
path <- system.file("extdata", "Strelka", package = "readthis")
data <- readthis::read_strelka_somatic_snvs(
path,
patient_id_pattern = "(?<=Strelka\\/)[:alnum:]*(?=\\.)",
verbose = FALSE
) |>
mutate(sample_id = str_c(patient_id, sample_id, sep = "_"))
cd <- init_cevodata("Test dataset") |>
add_data(data)
expect_s3_class(cd, "cevodata")
expect_s3_class(SNVs(cd), "tbl")
expect_equal(cd$active_SNVs, "Strelka")
expect_equal(dim(SNVs(cd)), c(18, 11))
expect_equal(cd$metadata$sample_id, c("S1_TUMOR", "S2_TUMOR"))
})
2 changes: 2 additions & 0 deletions vignettes/get_started.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ cd

`name` can be any string that is informative for the user.

*To facilitate the use of cevomod with the data from popular variant callers such as Mutect2, Strelka2, ASCAT, or FACETS, we have implemented a * [readthis](https://pawelqs.github.io/readthis/index.html) *package. readthis functions are designed for bulk reading of many output variant files (they accept a path to a single file, named vector of file paths, or a path to a directory containing many files). Data objects read with readthis functions can be added to the cevodata object with a single call of general* `add_data()` *function. For more information see the* [readthis page](https://pawelqs.github.io/readthis/index.html).


## Variant Frequency Spectra

Expand Down

0 comments on commit cb2869c

Please sign in to comment.