Skip to content

Commit

Permalink
Merge pull request #27 from pawelqs/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
pawelqs committed Oct 13, 2023
2 parents 0d6a621 + 68ee349 commit bd93223
Show file tree
Hide file tree
Showing 29 changed files with 581 additions and 54 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ tests/testthat/Rplots.pdf
inst/develop_file*

inst/doc
Rplots.pdf
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: cevomod
Title: Cancer Evolution Models
Version: 2.2.0
Version: 2.3.0
Authors@R:
person("Paweł", "Kuś", , "kpawel2210@gmail.com", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-4367-9821"))
Expand Down Expand Up @@ -31,7 +31,8 @@ Suggests:
tidyverse,
vdiffr,
readthis,
rsample
rsample,
processx
Config/testthat/edition: 3
VignetteBuilder: knitr
Imports:
Expand Down
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ export(add_to_cevodata)
export(annotate_mutation_contexts)
export(annotate_normal_cn)
export(as_cevo_snvs)
export(build_clip_container)
export(calc_Mf_1f)
export(calc_SFS)
export(calc_cumulative_tails)
Expand All @@ -115,6 +116,7 @@ export(fit_powerlaw_tail_fixed)
export(fit_powerlaw_tail_optim)
export(fit_subclones)
export(fit_subclones_bmix)
export(fit_subclones_clip)
export(fit_subclones_mclust)
export(fix_powerlaw_N_mutations)
export(generate_neutral_snvs)
Expand All @@ -124,6 +126,7 @@ export(get_Mf_1f)
export(get_SFS)
export(get_SNVs_wider)
export(get_cevomod_verbosity)
export(get_containers_dir)
export(get_frequency_measure_name)
export(get_model_names)
export(get_models)
Expand Down Expand Up @@ -174,11 +177,13 @@ export(scale_fill_cevomod)
export(scale_fill_pnw)
export(set_cancer_type)
export(set_cevomod_verbosity)
export(set_containers_dir)
export(shuffle)
export(split_by)
export(stat_cumulative_tail)
export(theme_ellie)
export(to_clip)
export(unite_mutation_id)
export(use_purity)
export(variant_classification_filter)
import(dplyr)
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@

## cevomod 2.3.0
* subclones can be fitted using [CliP](https://github.com/wwylab/CliP) [(Jiang et al., 2021)](https://www.biorxiv.org/content/10.1101/2021.03.31.437383v1) (`fit_subclones(method = "CliP")`). This option requires that the Apptainer is installed. CliP container image can be build with `build_clip_container()`

## cevomod 2.2.0
* fit_powerlaw_tail_fixed() has a bootstrap option

Expand Down
9 changes: 7 additions & 2 deletions R/cevo_snvs.R
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,13 @@ count_mutation_types <- function(snvs) {
}


unite_mutation_id <- function(snvs) {
unite(snvs, "mutation_id", "chrom":"alt", sep = "-")
#' Unite many columns to create mutation_id column
#' @param snvs SNVs
#' @param sep Separator
#' @param remove Remove united columns?
#' @export
unite_mutation_id <- function(snvs, sep = "-", remove = TRUE) {
unite(snvs, "mutation_id", "chrom":"alt", sep = sep, remove = remove)
}


Expand Down
21 changes: 14 additions & 7 deletions R/cevodata-export.R
Original file line number Diff line number Diff line change
Expand Up @@ -120,17 +120,24 @@ save_clip_files <- function(clip_data, out_dir) {
if (!dir.exists(out_dir)) {
dir.create(out_dir)
}
iwalk(

files <- imap(
clip_data,
function(x, sample_id) {
write_tsv(x$snvs, file.path(out_dir, str_c(sample_id, ".snv.tsv")))
write_tsv(x$cnvs, file.path(out_dir, str_c(sample_id, ".cnv.tsv")))
write_file(
as.character(x$purities),
file.path(out_dir, str_c(sample_id, ".purity.tsv"))
)
# sample_id2 <- str_replace(sample_id, " ", "_")
snvs_file <- file.path(out_dir, str_c(sample_id, ".snv.tsv"))
cnvs_file <- file.path(out_dir, str_c(sample_id, ".cnv.tsv"))
purity_file <- file.path(out_dir, str_c(sample_id, ".purity.tsv"))

write_tsv(x$snvs, snvs_file)
write_tsv(x$cnvs, cnvs_file)
write_file(as.character(x$purities), purity_file)

tibble(sample_id, snvs_file, cnvs_file, purity_file)
}
)

invisible(bind_rows(files))
}


37 changes: 37 additions & 0 deletions R/cevomod-persistent.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@

settings_dir <- tools::R_user_dir("cevomod", which = "config")
settings_file <- file.path(settings_dir, "settings.rds")

default_settings <- list(
containers_dir = NULL
)


get_settings <- function() {
if (file.exists(settings_file)) {
readRDS(settings_file)
} else {
default_settings
}
}


#' Get/Set the containers directory
#' @param dir Path for containers
#' @export
set_containers_dir <- function(dir) {
settings <- get_settings()
settings$containers_dir <- dir

if(!dir.exists(settings_dir)) {
dir.create(settings_dir)
}
write_rds(settings, settings_file)
}


#' @rdname set_containers_dir
#' @export
get_containers_dir <- function() {
get_settings()$containers_dir
}
43 changes: 43 additions & 0 deletions R/containers.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#' Build the CliP Apptainer container
#'
#' CliP.sif is saved to
#' - out_dir, if provided
#' - if out_dir is NULL but the containers_dir was set using the [set_containers_dir()],
#' image will be saved to the set containers_dir.
#' - if out_dir is NULL and the containers_dir was not set, image will
#' be saved to the current working dir.
#' @param out_dir Path
#' @param force Force build in image exists
#' @export
build_clip_container <- function(out_dir = NULL, force = FALSE) {
if (!is_apptainer_installed()) {
stop("Apptainer needs to be installed to build the containers")
}

if (is.null(out_dir) & !is.null(get_containers_dir())) {
out_dir <- get_containers_dir()
} else {
out_dir <- "."
}

sif_file <- file.path(out_dir, "CliP.sif") |>
str_replace("//", "/")

if (!file.exists(sif_file) | force) {
def_file <- system.file("CliP.def", package = "cevomod")
command <- str_c("apptainer build", sif_file, def_file, sep = " ")
system(command)
} else {
msg(sif_file, " exists. Set force = TRUE to overwrite it")
}
}


is_apptainer_installed <- function() {
rlang::check_installed("processx", reason = "to interact with system")
x <- processx::run("apptainer", args = "--version")
x$status == 0
}



8 changes: 5 additions & 3 deletions R/fit_powerlaw_tail_fixed.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#'
#' @param object SNVs tibble object
#' @param rsq_treshold R-squared tresholds to keep model as neutral
#' @param lm_length length of the linear model fits
#' @param name name in the models' slot
#' @param verbose verbose?
#' @param ... other arguments
Expand Down Expand Up @@ -56,6 +57,7 @@ fit_powerlaw_tail_fixed <- function(object, ...) {
#' @export
fit_powerlaw_tail_fixed.cevodata <- function(object,
rsq_treshold = 0.98,
lm_length = 0.05,
name = "powerlaw_fixed",
pct_left = 0.05, pct_right = 0.95,
verbose = get_cevomod_verbosity(),
Expand All @@ -78,7 +80,7 @@ fit_powerlaw_tail_fixed.cevodata <- function(object,
reframe(
model = "powerlaw_fixed",
component = "Neutral tail",
fit_optimal_lm(.data$data, rsq_treshold, pb)
fit_optimal_lm(.data$data, rsq_treshold, lm_length = lm_length, pb)
)
class(models) <- c("cevo_powerlaw_models", class(models))

Expand All @@ -89,7 +91,7 @@ fit_powerlaw_tail_fixed.cevodata <- function(object,
}


fit_optimal_lm <- function(data, rsq_treshold = 0.98, pb = NULL) {
fit_optimal_lm <- function(data, rsq_treshold = 0.98, lm_length = 0.05, pb = NULL) {
min_val <- min(data$f)
max_val <- max(data$f)
grid <- expand_grid(
Expand All @@ -98,7 +100,7 @@ fit_optimal_lm <- function(data, rsq_treshold = 0.98, pb = NULL) {
) |>
mutate(length = .data$to - .data$from)

desired_length <- if ((max_val - min_val) >= 0.05) 0.05 else max(grid$length)
desired_length <- if ((max_val - min_val) >= lm_length) lm_length else max(grid$length)
grid <- grid |>
filter(near(.data$length, desired_length))

Expand Down
2 changes: 2 additions & 0 deletions R/fit_powerlaw_tail_optim.R
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,8 @@ fit_powerlaw_tail_optim.cevo_SFS_tbl <- function(object,
data <- sfs |>
left_join(bounds, by = "sample_id") |>
filter(.data$f > .data$lower_bound, .data$f < .data$higher_bound) |>
mutate(length = n(), .by = "sample_id") |>
filter(length >= 3) |>
select("sample_id", "f", "y") |>
mutate(
y = .data$y |>
Expand Down
53 changes: 42 additions & 11 deletions R/fit_subclones.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,41 +3,72 @@
#' Fit clonal and subclonal components of the model to the residuals of the
#' power-law model
#'
#' @param object object
#' @param N numbers of clones to for models
#' @param powerlaw_model_name residual of which powerlaw model to use?
#' @param object cevodata object
#' @param N Vector of numbers of clones to for models
#' @param powerlaw_model_name Residual of which powerlaw model to use?
#' powerlaw_fixed/powerlaw_optim
#' @param snvs_name which snvs to to use?
#' @param method clustering method to use: BMix and mclust are currently supported.
#' While mclust is a 3-4 times faster method, the BMix method is more accurate
#' and usually fast enough.
#' @param snvs_name Which snvs to to use?
#' @param cnvs_name Which cnvs to to use?
#' @param method Clustering method to use. Currently supported methods:
#' - mclust - the fastest method, approximately 3-4 times faster than BMix,
#' but uses a gaussian mixture modelling
#' - BMix - is more accurate, considers subclones as binomial clusters,
#' slightly slower
#' - CliP - Clonal structure identification through penalizing pairwise
#' differences
#' @param upper_f_limit ignore variants with f higher than
#' @param verbose verbose?
#' @param verbose Verbose?
#' @name fit_subclones
NULL



#' @rdname fit_subclones
#' @describeIn fit_subclones Provides a common interface for all other methods,
#' runs the selected method and passes all the required arguments down.
#' @examples
#' \dontrun{
#' # Using BMix
#' fit_subclones(test_data_fitted)
#' # or
#' fit_subclones_bmix(test_data_fitted)
#'
#' # Using mclust
#' fit_subclones(test_data_fitted, method = "mclust")
#' # or
#' fit_subclones_mclust(test_data_fitted)
#'
#' # Using CliP
#' set_containers_dir(selected_dir)
#' build_clip_container()
#' fit_subclones(test_data_fitted, method = "CliP")
#' # or
#' fit_subclones_clip(test_data_fitted)
#' }
#' @export
fit_subclones <- function(object,
N = 1:3,
powerlaw_model_name = active_models(object),
snvs_name = default_SNVs(object),
cnvs_name = default_CNVs(object),
method = "BMix",
upper_f_limit = 0.75,
clip_sif = NULL,
clip_input = file.path(tempdir(), "clip_input"),
clip_output = file.path(tempdir(), "clip_output"),
verbose = get_cevomod_verbosity()) {
if (method == "BMix") {
object <- object |>
fit_subclones_bmix(N, powerlaw_model_name, snvs_name, upper_f_limit, verbose)
} else if (method == "mclust") {
object <- object |>
fit_subclones_mclust(N, powerlaw_model_name, snvs_name, upper_f_limit, verbose)
} else if (method == "CliP") {
object <- object |>
fit_subclones_clip(powerlaw_model_name, snvs_name, cnvs_name, upper_f_limit, verbose = verbose)
} else {
stop("Currently supported methods are: BMix and mclust")
stop("Currently supported methods are: BMix, CliP, and mclust")
}


object
}

Expand Down
Loading

0 comments on commit bd93223

Please sign in to comment.