Merge pull request #453 from gagneurlab/dev

1.3.3
gagneurlab · Apr 14, 2023 · 594d7da · 594d7da
2 parents 8a6a490 + dec7b7c
commit 594d7da
Show file tree

Hide file tree

Showing 17 changed files with 69 additions and 29 deletions.
diff --git a/README.md b/README.md
@@ -12,7 +12,9 @@ The manuscript is available in [Nature Protocols](https://www.nature.com/article
 
 ## What's new
 
-Version 1.3.0 introduces the option to use FRASER 2.0 which is an improved version of FRASER that uses the Intron Jaccard Index metric instead of percent spliced in and splicing efficiency to quantify and later call aberrant splicing. To run FRASER 2.0, modify the `FRASER_version` parameter in the aberrantSplicing dictionary in the config file and adapt the `quantileForFiltering` and `deltaPsiCutoff` parameters. See [config template](https://github.com/gagneurlab/drop/blob/master/drop/template/config.yaml). Moreover, it allows to provide lists of genes to focus on and do the multiple testing correction instead of the usual transcriptome-wide approach. Refer to the [documentation](https://gagneurlab-drop.readthedocs.io/en/latest/prepare.html#limiting-fdr-correction-to-subsets-of-genes-of-interest).
+Versions 1.3.3, 1.3.2 and 1.3.1 fix some bugs.
+Version 1.3.0 introduces the option to use FRASER 2.0 which is an improved version of FRASER that uses the Intron Jaccard Index metric instead of percent spliced in and splicing efficiency to quantify and later call aberrant splicing. To run FRASER 2.0, modify the `FRASER_version` parameter in the aberrantSplicing dictionary in the config file and adapt the `quantileForFiltering` and `deltaPsiCutoff` parameters. See the [config template](https://github.com/gagneurlab/drop/blob/master/drop/template/config.yaml) for more details. When switching between FRASER versions, we recommend running DROP in a
+separate folder for each version. Moreover, DROP now allows users to provide lists of genes to focus on and do the multiple testing correction instead of the usual transcriptome-wide approach. Refer to the [documentation](https://gagneurlab-drop.readthedocs.io/en/latest/prepare.html#limiting-fdr-correction-to-subsets-of-genes-of-interest).
 
 `Snakemake v.7.8` introduced some changes in which changes in parameters can cause rules to be re-executed. More info [here](https://github.com/snakemake/snakemake/issues/1694). This affects DROP and causes certain rules in the AS and QC modules to be triggered even if they were already completed and there were no changes in the sample annotation or scripts. The workaround is to run DROP by adding the parameter `--rerun-triggers mtime`, e.g. `snakemake -n --rerun-triggers mtime` or `snakemake --cores 10 --rerun-triggers mtime`. We will investigate the rules in DROP to fix this.
 
@@ -31,7 +33,7 @@ mamba create -n drop_env -c conda-forge -c bioconda drop --override-channels
 
 In the case of mamba/conda troubles we recommend using the fixed `DROP_<version>.yaml` installation file we make available on our [public server](https://www.cmm.in.tum.de/public/paper/drop_analysis/). Install the current version and use the full path in the following command to install the conda environment `drop_env`
 ```
-mamba env create -f DROP_1.3.2.yaml
+mamba env create -f DROP_1.3.3.yaml
 ```
 
 Test installation with demo project

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -23,7 +23,7 @@
 author = 'Michaela Müller'
 
 # The full version, including alpha/beta/rc tags
-release_ = '1.3.2'
+release_ = '1.3.3'
 
 
 

diff --git a/docs/source/installation.rst b/docs/source/installation.rst
@@ -20,7 +20,7 @@ Install the latest version and use the full path in the following command to ins
 
 .. code-block:: bash
 
-    mamba env create -f DROP_1.3.2.yaml
+    mamba env create -f DROP_1.3.3.yaml
 
 Installation time: ~ 10min
 

diff --git a/docs/source/prepare.rst b/docs/source/prepare.rst
@@ -114,7 +114,8 @@ Aberrant splicing dictionary
 These parameters are directly used by the ``aberrantSplicing`` snakemake command. Each group must have at least ``10``
 samples. This module uses FRASER to detect aberrant splicing. We recently developed an improved version of FRASER that uses
 the Intron Jaccard Index instead of percent spliced in and splicing efficiency to call aberrant splicing. To use this improved version,
-set the ``FRASER_version`` parameter to 'FRASER2'.
+set the ``FRASER_version`` parameter to 'FRASER2'. When switching between FRASER versions, we recommend running DROP in a
+separate folder for each version.
 To use external counts, refer to the ``Using External Counts`` section. 
 
 ============================  =========  =====================================================================================================================================================================================================================  ======

diff --git a/drop/__init__.py b/drop/__init__.py
@@ -4,5 +4,5 @@
 from . import utils
 from . import demo
 
-__version__ = "1.3.2"
+__version__ = "1.3.3"
 
diff --git a/drop/cli.py b/drop/cli.py
@@ -17,7 +17,7 @@
 
 @click.group()
 @click_log.simple_verbosity_option(logger)
-@click.version_option('1.3.2',prog_name='drop')
+@click.version_option('1.3.3',prog_name='drop')
 
 
 def main():

diff --git a/drop/modules/aberrant-splicing-pipeline/Counting/02_psi_value_calculation_FraseR.R b/drop/modules/aberrant-splicing-pipeline/Counting/02_psi_value_calculation_FraseR.R
@@ -12,8 +12,8 @@
 #'   - counting_done: '`sm cfg.getProcessedDataDir() + 
 #'                "/aberrant_splicing/datasets/savedObjects/raw-local-{dataset}/counting.done" `'
 #'  output:
-#'  - theta:     '`sm cfg.getProcessedDataDir() +
-#'                    "/aberrant_splicing/datasets/savedObjects/raw-local-{dataset}/theta.h5"`'
+#'  - splice_metrics: '`sm expand(cfg.getProcessedDataDir() +
+#'                    "/aberrant_splicing/datasets/savedObjects/raw-local-{dataset}/{type}.h5", type=cfg.AS.getPsiTypeAssay(), allow_missing=True)`'
 #'  type: script
 #'--- 
 

diff --git a/drop/modules/aberrant-splicing-pipeline/Counting/03_filter_expression_FraseR.R b/drop/modules/aberrant-splicing-pipeline/Counting/03_filter_expression_FraseR.R
@@ -9,14 +9,14 @@
 #'   - workingDir: '`sm cfg.getProcessedDataDir() + "/aberrant_splicing/datasets/"`'
 #'   - exCountIDs: '`sm lambda w: sa.getIDsByGroup(w.dataset, assay="SPLICE_COUNT")`'
 #'  input:
-#'   - theta:  '`sm cfg.getProcessedDataDir() +
-#'                  "/aberrant_splicing/datasets/savedObjects/raw-local-{dataset}/theta.h5"`'
+#'   - splice_metrics: '`sm expand(cfg.getProcessedDataDir() +
+#'                  "/aberrant_splicing/datasets/savedObjects/raw-local-{dataset}/{type}.h5", type=cfg.AS.getPsiTypeAssay(), allow_missing=True)`'
 #'   - exCounts: '`sm lambda w: cfg.AS.getExternalCounts(w.dataset, "k_j_counts")`'
 #'  output:
 #'   - fds: '`sm cfg.getProcessedDataDir() +
 #'                  "/aberrant_splicing/datasets/savedObjects/{dataset}/fds-object.RDS"`'
-#'   - done: '`sm cfg.getProcessedDataDir() +
-#'                  "/aberrant_splicing/datasets/savedObjects/{dataset}/filter.done" `'
+#'   - done: '`sm expand(cfg.getProcessedDataDir() +
+#'                  "/aberrant_splicing/datasets/savedObjects/{dataset}/filter_{version}.done", version=cfg.AS.get("FRASER_version"), allow_missing=True)`'
 #'  threads: 3
 #'  type: script
 #'---
@@ -97,4 +97,9 @@ if (params$filter == TRUE) {
 seqlevels(fds) <- seqlevelsInUse(fds)
 colData(fds)$sampleID <- as.character(colData(fds)$sampleID)
 fds <- saveFraserDataSet(fds,dir = workingDir)
+
+# remove previous filter.done files and create new one
+outdir <- dirname(snakemake@output$done)
+prevFilterFiles <- grep("filter(.*)done", list.files(outdir), value=TRUE)
+unlink(file.path(outdir, prevFilterFiles))
 file.create(snakemake@output$done)
diff --git a/drop/modules/aberrant-splicing-pipeline/Counting/Summary.R b/drop/modules/aberrant-splicing-pipeline/Counting/Summary.R
@@ -8,8 +8,8 @@
 #'   - setup: '`sm cfg.AS.getWorkdir() + "/config.R"`'
 #'   - workingDir: '`sm cfg.getProcessedDataDir() + "/aberrant_splicing/datasets/"`'
 #'  input:
-#'   - filter: '`sm cfg.getProcessedDataDir() + 
-#'                "/aberrant_splicing/datasets/savedObjects/{dataset}/filter.done" `'
+#'   - filter: '`sm expand(cfg.getProcessedDataDir() +
+#'                  "/aberrant_splicing/datasets/savedObjects/{dataset}/filter_{version}.done", version=cfg.AS.get("FRASER_version"), allow_missing=True)`'
 #'  output:
 #'   - wBhtml: '`sm config["htmlOutputPath"] + 
 #'                  "/AberrantSplicing/{dataset}_countSummary.html"`'

diff --git a/drop/modules/aberrant-splicing-pipeline/FRASER/04_fit_hyperparameters_FraseR.R b/drop/modules/aberrant-splicing-pipeline/FRASER/04_fit_hyperparameters_FraseR.R
@@ -9,11 +9,11 @@
 #'   - workingDir: '`sm cfg.getProcessedDataDir() + "/aberrant_splicing/datasets/"`'
 #'  threads: 12
 #'  input:
-#'   - filter: '`sm cfg.getProcessedDataDir() + 
-#'                "/aberrant_splicing/datasets/savedObjects/{dataset}/filter.done" `'
+#'   - filter: '`sm expand(cfg.getProcessedDataDir() + 
+#'                "/aberrant_splicing/datasets/savedObjects/{dataset}/filter_{version}.done", version=cfg.AS.get("FRASER_version"), allow_missing=True)`'
 #'  output:
-#'   - hyper: '`sm cfg.getProcessedDataDir() + 
-#'                "/aberrant_splicing/datasets/savedObjects/{dataset}/hyper.done" `'
+#'   - hyper: '`sm expand(cfg.getProcessedDataDir() + 
+#'                "/aberrant_splicing/datasets/savedObjects/{dataset}/hyper_{version}.done", version=cfg.AS.get("FRASER_version"), allow_missing=True)`'
 #'  type: script
 #'---
 
@@ -66,4 +66,9 @@ for(type in psiTypes){
     fds <- saveFraserDataSet(fds)
 }
 fds <- saveFraserDataSet(fds)
+
+# remove previous hyper.done files and create new one
+outdir <- dirname(snakemake@output$hyper)
+prevFilterFiles <- grep("hyper(.*)done", list.files(outdir), value=TRUE)
+unlink(file.path(outdir, prevFilterFiles))
 file.create(snakemake@output$hyper)
diff --git a/drop/modules/aberrant-splicing-pipeline/FRASER/05_fit_autoencoder_FraseR.R b/drop/modules/aberrant-splicing-pipeline/FRASER/05_fit_autoencoder_FraseR.R
@@ -9,8 +9,8 @@
 #'   - workingDir: '`sm cfg.getProcessedDataDir() + "/aberrant_splicing/datasets/"`'
 #'  threads: 20
 #'  input:
-#'   - hyper: '`sm cfg.getProcessedDataDir() + 
-#'                "/aberrant_splicing/datasets/savedObjects/{dataset}/hyper.done" `'
+#'   - hyper: '`sm expand(cfg.getProcessedDataDir() + 
+#'                "/aberrant_splicing/datasets/savedObjects/{dataset}/hyper_{version}.done", version=cfg.AS.get("FRASER_version"), allow_missing=True)`'
 #'  output:
 #'   - fdsout: '`sm expand(cfg.getProcessedDataDir() + 
 #'                  "/aberrant_splicing/datasets/savedObjects/{dataset}/predictedMeans_{type}.h5", type=cfg.AS.getPsiTypeAssay(), allow_missing=True)`'
@@ -41,3 +41,11 @@ for(type in psiTypes){
     fds <- saveFraserDataSet(fds)
 }
 
+# remove .h5 files from previous runs with other FRASER version
+fdsDir <- dirname(snakemake@output$fdsout[1])
+for(type in psiTypesNotUsed){
+    predMeansFile <- file.path(fdsDir, paste0("predictedMeans_", type, ".h5"))
+    if(file.exists(predMeansFile)){
+        unlink(predMeansFile)
+    }
+}
diff --git a/drop/modules/aberrant-splicing-pipeline/FRASER/06_annotate_genes.R b/drop/modules/aberrant-splicing-pipeline/FRASER/06_annotate_genes.R
@@ -60,3 +60,11 @@ fds_input <- annotateIntronReferenceOverlap(fds_input, txdb)
 # save fds
 fds <- saveFraserDataSet(fds_input, dir=outputDir, name = paste(dataset, annotation, sep = '--'), rewrite = TRUE)
 
+# remove .h5 files from previous runs with other FRASER version
+fdsDir <- dirname(snakemake@output$fdsout[1])
+for(type in psiTypesNotUsed){
+    predMeansFile <- file.path(fdsDir, paste0("predictedMeans_", type, ".h5"))
+    if(file.exists(predMeansFile)){
+        unlink(predMeansFile)
+    }
+}
diff --git a/drop/modules/aberrant-splicing-pipeline/FRASER/07_calculation_stats_FraseR.R b/drop/modules/aberrant-splicing-pipeline/FRASER/07_calculation_stats_FraseR.R
@@ -40,13 +40,11 @@ fraser_sample_ids <- snakemake@params$ids
 subsets <- parse_subsets_for_FDR(snakemake@params$genes_to_test,
                                  sampleIDs=fraser_sample_ids)
 
-# Load Zscores data
+# Load FRASER data
 fds <- loadFraserDataSet(dir=workingDir, name=paste(dataset, annotation, sep = '--'))
 
 # Calculate stats
 for (type in psiTypes) {
-    # Zscores
-    fds <- calculateZscore(fds, type=type)
     # Pvalues
     fds <- calculatePvalues(fds, type=type)
     # Adjust Pvalues
@@ -55,3 +53,14 @@ for (type in psiTypes) {
 
 fds <- saveFraserDataSet(fds)
 
+# remove .h5 files from previous runs with other FRASER version
+fdsDir <- dirname(snakemake@output$fdsout[1])
+pvalFiles <- grep("p(.*)BetaBinomial_(.*).h5", 
+                  list.files(fdsDir), 
+                  value=TRUE)
+for(type in psiTypesNotUsed){
+    pvalFilesType <- grep(type, pvalFiles, value=TRUE)
+    for(pFile in pvalFilesType){
+        unlink(file.path(fdsDir, pFile))
+    }
+}
diff --git a/drop/modules/aberrant-splicing-pipeline/config.R b/drop/modules/aberrant-splicing-pipeline/config.R
@@ -35,9 +35,11 @@ h5disableFileLocking()
 # set psiTypes to run based on preference in config.yaml
 cfg <- yaml::read_yaml("config.yaml")
 if(cfg$aberrantSplicing$FRASER_version == "FRASER2"){
-    psiTypes <- c("jaccard")
     pseudocount(0.1)
+    psiTypes <- c("jaccard")
+    psiTypesNotUsed <- c("psi5", "psi3", "theta")
 } else{
-    psiTypes <- c("psi5", "psi3", "theta")
     pseudocount(1)
+    psiTypes <- c("psi5", "psi3", "theta")
+    psiTypesNotUsed <- c("jaccard")
 }
diff --git a/drop/requirementsR.txt b/drop/requirementsR.txt
@@ -1,7 +1,7 @@
 package	version	ref
 devtools
 gagneurlab/OUTRIDER	1.17.2	HEAD
-gagneurlab/FRASER	1.99.0	HEAD
+gagneurlab/FRASER	1.99.1	HEAD
 gagneurlab/tMAE	1.0.4	HEAD
 VariantAnnotation		
 rmarkdown		

diff --git a/setup.cfg b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.3.2
+current_version = 1.3.3
 commit = True
 
 [bumpversion:file:setup.py]

diff --git a/setup.py b/setup.py
@@ -21,7 +21,7 @@
 
 setuptools.setup(
     name="drop",
-    version="1.3.2",
+    version="1.3.3",
 
     author="Vicente A. Yépez, Michaela Müller, Nicholas H. Smith, Daniela Klaproth-Andrade, Luise Schuller, Ines Scheller, Christian Mertes <mertes@in.tum.de>, Julien Gagneur <gagneur@in.tum.de>",
     author_email="yepez@in.tum.de",