nf-core · nictru · Jun 10, 2024 · May 31, 2024 · May 31, 2024 · May 31, 2024
diff --git a/assets/schema_input.json b/assets/schema_input.json
@@ -26,6 +26,13 @@
                 "exists": true,
                 "pattern": "^\\S+\\.f(ast)?q\\.gz$",
                 "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'"
+            },
+            "strandedness": {
+                "type": "string",
+                "enum": ["unstranded", "forward", "reverse", "auto"],
+                "default": "auto",
+                "errorMessage": "Strandedness must be one of 'unstranded', 'forward', 'reverse' or 'auto'",
+                "meta": ["strandedness"]
             }
         },
         "required": ["sample", "fastq_1"]

diff --git a/conf/modules.config b/conf/modules.config
@@ -28,36 +28,36 @@ process {
 
     // TRIMMING courtesy of nf-core/rnaseq
 
-if (!params.skip_trimming) {
-    process {
-        withName: '.*:FASTQC_TRIMGALORE:TRIMGALORE' {
-            ext.args   = {
-                [
-                    "--fastqc_args '-t ${task.cpus}' ",
-                    params.trim_nextseq > 0 ? "--nextseq ${params.trim_nextseq}" : ''
-                ].join(' ').trim()
-            }
-            publishDir = [
-                [
-                    path: { "${params.outdir}/quality_control/trimgalore/fastqc" },
-                    mode: params.publish_dir_mode,
-                    pattern: "*.{html,zip}"
-                ],
-                [
-                    path: { "${params.outdir}/quality_control/trimgalore" },
-                    mode: params.publish_dir_mode,
-                    pattern: "*.fq.gz",
-                    enabled: params.save_trimmed
-                ],
-                [
-                    path: { "${params.outdir}/quality_control/trimgalore" },
-                    mode: params.publish_dir_mode,
-                    pattern: "*.txt"
+    if (!params.skip_trimming) {
+        process {
+            withName: '.*:FASTQC_TRIMGALORE:TRIMGALORE' {
+                ext.args   = {
+                    [
+                        "--fastqc_args '-t ${task.cpus}' ",
+                        params.trim_nextseq > 0 ? "--nextseq ${params.trim_nextseq}" : ''
+                    ].join(' ').trim()
+                }
+                publishDir = [
+                    [
+                        path: { "${params.outdir}/quality_control/trimgalore/fastqc" },
+                        mode: params.publish_dir_mode,
+                        pattern: "*.{html,zip}"
+                    ],
+                    [
+                        path: { "${params.outdir}/quality_control/trimgalore" },
+                        mode: params.publish_dir_mode,
+                        pattern: "*.fq.gz",
+                        enabled: params.save_trimmed
+                    ],
+                    [
+                        path: { "${params.outdir}/quality_control/trimgalore" },
+                        mode: params.publish_dir_mode,
+                        pattern: "*.txt"
+                    ]
                 ]
-            ]
+            }
         }
     }
-}
 
     // PREPARE GENOME
     withName: CLEAN_FASTA {
@@ -143,6 +143,8 @@ if (!params.skip_trimming) {
         ]
     }
 
+    // circRNA
+
     withName: '.*:SEGEMEHL:INDEX' {
         publishDir = [
             path: { "${params.outdir}/references/index/segemehl" },
@@ -152,8 +154,6 @@ if (!params.skip_trimming) {
         ]
     }
 
-    // circRNA
-
     withName: '.*:SEGEMEHL:ALIGN' {
         ext.args = [ "",
             "-b",
@@ -295,13 +295,9 @@ if (!params.skip_trimming) {
     }
 
     withName: '.*:FIND_CIRC:ALIGN' {
-        ext.args = [ "",
-            "--very-sensitive",
-            "--mm",
-            "-D 20",
-            "--score-min=C,-15,0",
-            "-q"
-            ].join(' ').trim()
+        ext.args = { "--very-sensitive --mm -D 20 --score-min=C,-15,0 -q " +
+            (!meta.strandedness || meta.strandedness == 'unstranded' || meta.strandedness == 'auto' ? '' :
+                meta.strandedness == 'forward' ? ' --norc' : ' --nofw') }
         publishDir = [
             path: { "${params.outdir}/circrna_discovery/find_circ/intermediates/${meta.id}" },
             mode: params.publish_dir_mode,
@@ -331,6 +327,8 @@ if (!params.skip_trimming) {
     }
 
     withName: '.*:FIND_CIRC:MAIN' {
+        ext.args = { !meta.strandedness || meta.strandedness == 'unstranded' || meta.strandedness == 'auto' ? '' :
+                meta.strandedness == 'forward' ? ' --norc' : ' --nofw' }
         publishDir = [
             path: { "${params.outdir}/circrna_discovery/find_circ/intermediates/${meta.id}" },
             mode: params.publish_dir_mode,

diff --git a/docs/usage.md b/docs/usage.md
@@ -24,21 +24,22 @@ The pipeline will auto-detect whether a sample is single- or paired-end using th
 A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice.
 
 ```csv title="samplesheet.csv"
-sample,fastq_1,fastq_2
-CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz
-CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz
-CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz
-TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz,
-TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz,
-TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz,
-TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz,
+sample,fastq_1,fastq_2,strandedness
+CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,auto
+CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz,
+CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz,
+TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz,,forward
+TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz,,reverse
+TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz,,unstranded
+TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz,,unstranded
 ```
 
-| Column    | Description                                                                                                                                                                            |
-| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `sample`  | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). |
-| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz".                                                             |
-| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz".                                                             |
+| Column         | Description                                                                                                                                                                             |
+| -------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `sample`       | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`).  |
+| `fastq_1`      | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz".                                                              |
+| `fastq_2`      | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz".                                                              |
+| `strandedness` | Strandedness of the library. Options are `auto`, `unstranded`, `forward`, `reverse`. Default is `auto`. Make sure to use the same strandedness for each library/run of the same sample. |
 
 An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline.
 

diff --git a/modules/local/ciriquant/main.nf b/modules/local/ciriquant/main.nf
@@ -26,6 +26,8 @@ process CIRIQUANT {
     def args = task.ext.args ?: ''
     prefix = task.ext.prefix ?: "${meta.id}"
     def VERSION = '2.1.0'
+    def strandedness = meta.strandedness ?: 'auto'
+    def library_type = strandedness == 'auto' ? '' : strandedness == 'unstranded' ? '-l 0' : strandedness == 'forward' ? '-l 1' : '-l 2'
     """
     BWA=`which bwa`
     HISAT2=`which hisat2`
@@ -48,6 +50,7 @@ process CIRIQUANT {
         --no-gene \\
         -o ${prefix} \\
         -p ${prefix} \\
+        ${library_type} \\
         ${args}
 
     cat <<-END_VERSIONS > versions.yml

diff --git a/modules/local/dcc/main.nf b/modules/local/dcc/main.nf
@@ -22,13 +22,15 @@ process DCC {
     script:
     def args = task.ext.args ?: ''
     prefix = task.ext.prefix ?: "${meta.id}"
+    def strandedness = meta.strandedness ?: 'auto'
+    def strand_args = strandedness == 'auto' || strandedness == 'unstranded' ? '-N' : strandedness == 'forward' ? '' : '-ss'
     if(meta.single_end){
         """
         sed -i 's/^chr//g' $gtf
 
         mkdir ${prefix} && mv ${prefix}.Chimeric.out.junction ${prefix} && printf "${prefix}/${prefix}.Chimeric.out.junction" > samplesheet
 
-        DCC @samplesheet -D -an $gtf -Pi -ss -F -M -Nr 1 1 -fg -A $fasta -N -T ${task.cpus}
+        DCC @samplesheet -D -an $gtf -F -M -Nr 1 1 -A $fasta $strand_args -T ${task.cpus}
 
         awk '{print \$6}' CircCoordinates >> strand
         paste CircRNACount strand | tail -n +2 | awk -v OFS="\\t" '{print \$1,\$2,\$3,\$5,\$4}' >> ${prefix}.txt
@@ -46,7 +48,7 @@ process DCC {
         mkdir ${prefix}_mate1 && mv ${prefix}_mate1.Chimeric.out.junction ${prefix}_mate1 && printf "${prefix}_mate1/${prefix}_mate1.Chimeric.out.junction" > mate1file
         mkdir ${prefix}_mate2 && mv ${prefix}_mate2.Chimeric.out.junction ${prefix}_mate2 && printf "${prefix}_mate2/${prefix}_mate2.Chimeric.out.junction" > mate2file
 
-        DCC @samplesheet -mt1 @mate1file -mt2 @mate2file -D -an $gtf -Pi -ss -F -M -Nr 1 1 -fg -A $fasta -N -T ${task.cpus}
+        DCC @samplesheet -mt1 @mate1file -mt2 @mate2file -D -an $gtf -Pi -F -M -Nr 1 1 -A $fasta $strand_args -T ${task.cpus}
 
         awk '{print \$6}' CircCoordinates >> strand
         paste CircRNACount strand | tail -n +2 | awk -v OFS="\\t" '{print \$1,\$2,\$3,\$5,\$4}' >> ${prefix}.txt

diff --git a/modules/local/find_circ/find_circ/main.nf b/modules/local/find_circ/find_circ/main.nf
@@ -23,6 +23,9 @@ process FIND_CIRC {
 
     script:
     prefix = task.ext.prefix ?: "${meta.id}"
+    args = task.ext.args ?: ""
+    args2 = task.ext.args2 ?: ""
+    def strand_arg = meta.strandedness && (meta.strandedness == 'forward' || meta.strandedness == 'reverse') ? "--stranded" : ""
     def VERSION = '1.2'
     """
     INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/.rev.1.bt2//"`
@@ -37,8 +40,9 @@ process FIND_CIRC {
         --score-min=C,-15,0 \\
         -q \\
         -x \$INDEX \\
+        $args \\
         -U $anchors | \\
-        find_circ.py  --genome=$fasta --prefix=${prefix} --stats=${prefix}.sites.log --reads=${prefix}.sites.reads > ${prefix}.sites.bed
+        find_circ.py --genome=$fasta $strand_arg $args2 --prefix=${prefix} --stats=${prefix}.sites.log --reads=${prefix}.sites.reads > ${prefix}.sites.bed
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

diff --git a/subworkflows/local/utils_nfcore_circrna_pipeline/main.nf b/subworkflows/local/utils_nfcore_circrna_pipeline/main.nf
@@ -170,6 +170,12 @@ def validateInputSamplesheet(input) {
         error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}")
     }
 
+    // Check that multiple runs of the same sample are of the same strandedness i.e. auto / unstranded / forward / reverse
+    def strandedness_ok = metas.collect{ it.strandedness }.unique().size == 1
+    if (!strandedness_ok) {
+        error("Please check input samplesheet -> Multiple runs of a sample must be of the same strandedness: ${metas[0].id}")
+    }
+
     return [ metas[0], fastqs ]
 }
 //