diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index ea27a5843..b290e0901 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -2,6 +2,7 @@ "name": "nfcore", "image": "nfcore/gitpod:latest", "remoteUser": "gitpod", + "runArgs": ["--privileged"], // Configure tool-specific properties. "customizations": { @@ -9,15 +10,7 @@ "vscode": { // Set *default* container specific settings.json values on container create. "settings": { - "python.defaultInterpreterPath": "/opt/conda/bin/python", - "python.linting.enabled": true, - "python.linting.pylintEnabled": true, - "python.formatting.autopep8Path": "/opt/conda/bin/autopep8", - "python.formatting.yapfPath": "/opt/conda/bin/yapf", - "python.linting.flake8Path": "/opt/conda/bin/flake8", - "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", - "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", - "python.linting.pylintPath": "/opt/conda/bin/pylint" + "python.defaultInterpreterPath": "/opt/conda/bin/python" }, // Add the IDs of extensions you want installed when the container is created. diff --git a/.editorconfig b/.editorconfig index b78de6e65..72dda289a 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,7 +8,7 @@ trim_trailing_whitespace = true indent_size = 4 indent_style = space -[*.{md,yml,yaml,html,css,scss,js,cff}] +[*.{md,yml,yaml,html,css,scss,js}] indent_size = 2 # These files are edited and tested upstream in nf-core/modules @@ -18,7 +18,16 @@ end_of_line = unset insert_final_newline = unset trim_trailing_whitespace = unset indent_style = unset -indent_size = unset +[/subworkflows/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset [/assets/email*] indent_size = unset + +# ignore python and markdown +[*.{py,md}] +indent_style = unset diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 6566cc5d0..d8a9ab914 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -9,6 +9,7 @@ Please use the pre-filled template to save time. However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) +> [!NOTE] > If you need help using or modifying nf-core/circrna then the best place to ask is on the nf-core Slack [#circrna](https://nfcore.slack.com/channels/circrna) channel ([join our Slack here](https://nf-co.re/join/slack)). ## Contribution workflow @@ -25,6 +26,12 @@ If you're not used to this workflow with git, you can start with some [docs from ## Tests +You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command: + +```bash +nf-test test --profile debug,test,docker --verbose +``` + When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. @@ -85,7 +92,7 @@ Once there, use `nf-core schema build` to add to `nextflow_schema.json`. Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. -The process resources can be passed on to the tool dynamically within the process with the `${task.cpu}` and `${task.memory}` variables in the `script:` block. +The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block. ### Naming schemes @@ -116,4 +123,3 @@ To get started: Devcontainer specs: - [DevContainer config](.devcontainer/devcontainer.json) -- [Dockerfile](.devcontainer/Dockerfile) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index d02326640..42e95fffe 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -42,9 +42,9 @@ body: attributes: label: System information description: | - * Nextflow version _(eg. 22.10.1)_ + * Nextflow version _(eg. 23.04.0)_ * Hardware _(eg. HPC, Desktop, Cloud)_ * Executor _(eg. slurm, local, awsbatch)_ - * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter or Charliecloud)_ + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ * OS _(eg. CentOS Linux, macOS, Linux Mint)_ * Version of nf-core/circrna _(eg. 1.1, 1.5, 1.8.2)_ diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 8e964d358..9d44ed38f 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -15,9 +15,11 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/circ - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! -- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/circrna/tree/master/.github/CONTRIBUTING.md)- [ ] If necessary, also make a PR on the nf-core/circrna _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/circrna/tree/master/.github/CONTRIBUTING.md) +- [ ] If necessary, also make a PR on the nf-core/circrna _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 96940e169..8f7ea0ebb 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -8,13 +8,13 @@ on: types: [published] workflow_dispatch: jobs: - run-tower: + run-platform: name: Run AWS full tests if: github.repository == 'nf-core/circrna' runs-on: ubuntu-latest steps: - - name: Launch workflow via tower - uses: nf-core/tower-action@v3 + - name: Launch workflow via Seqera Platform + uses: seqeralabs/action-tower-launch@v2 # TODO nf-core: You can customise AWS full pipeline tests as required # Add full size test data (but still relatively small datasets for few samples) # on the `test_full.config` test runs with only one set of parameters @@ -22,13 +22,18 @@ jobs: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/circrna/work-${{ github.sha }} parameters: | { + "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/circrna/results-${{ github.sha }}" } - profiles: test_full,aws_tower - - uses: actions/upload-artifact@v3 + profiles: test_full + + - uses: actions/upload-artifact@v4 with: - name: Tower debug log file - path: tower_action_*.log + name: Seqera Platform debug log file + path: | + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 6677bb32b..cbea731e1 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -5,25 +5,29 @@ name: nf-core AWS test on: workflow_dispatch: jobs: - run-tower: + run-platform: name: Run AWS tests if: github.repository == 'nf-core/circrna' runs-on: ubuntu-latest steps: - # Launch workflow using Tower CLI tool action - - name: Launch workflow via tower - uses: nf-core/tower-action@v3 + # Launch workflow using Seqera Platform CLI tool action + - name: Launch workflow via Seqera Platform + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/circrna/work-${{ github.sha }} parameters: | { "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/circrna/results-test-${{ github.sha }}" } - profiles: test,aws_tower - - uses: actions/upload-artifact@v3 + profiles: test + + - uses: actions/upload-artifact@v4 with: - name: Tower debug log file - path: tower_action_*.log + name: Seqera Platform debug log file + path: | + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index 10173ce45..777845e70 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -13,13 +13,13 @@ jobs: - name: Check PRs if: github.repository == 'nf-core/circrna' run: | - { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/circrna ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] + { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/circrna ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] # If the above check failed, post a comment on the PR explaining the failure # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets - name: Post PR comment if: failure() - uses: mshick/add-pr-comment@v1 + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 with: message: | ## This PR is against the `master` branch :x: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 98016682d..69b4d2d9c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,19 +24,52 @@ jobs: strategy: matrix: NXF_VER: - - "22.10.1" + - "23.04.0" - "latest-everything" - profile: - - "test" + steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 with: version: "${{ matrix.NXF_VER }}" + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - name: Run pipeline with test data + # TODO nf-core: You can customise CI pipeline run tests as required + # For example: adding multiple test runs with different parameters + # Remember that you can parallelise this by using strategy.matrix + run: | + echo "digest=$(echo RNA_3.10.1_${{ github.workspace }} | md5sum | cut -c 1-25)" >> $GITHUB_OUTPUT + + - name: Cache test data + id: cache-testdata + uses: actions/cache@v3 + with: + path: test-datasets/ + key: ${{ steps.hash_workspace.outputs.digest }} + + - name: Check out test data + if: steps.cache-testdata.outputs.cache-hit != 'true' + uses: actions/checkout@v3 + with: + repository: nf-core/test-datasets + ref: circrna + path: test-datasets/ + + - name: Replace remote paths in samplesheets + run: | + for f in ${{ github.workspace }}/test-datasets/*.csv; do + sed -i "s=https://raw.githubusercontent.com/nf-core/test-datasets/circrna/=${{ github.workspace }}/test-datasets/=g" $f + echo "========== $f ============" + cat $f + echo "========================================" + done; + - name: Run pipeline with test data run: | - nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.profile }},docker --outdir ./results + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results --test_data_base ${{ github.workspace }}/test-datasets diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml new file mode 100644 index 000000000..0b6b1f272 --- /dev/null +++ b/.github/workflows/clean-up.yml @@ -0,0 +1,24 @@ +name: "Close user-tagged issues and PRs" +on: + schedule: + - cron: "0 0 * * 0" # Once a week + +jobs: + clean-up: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9 + with: + stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." + stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." + close-issue-message: "This issue was closed because it has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor and then staled for 20 days with no activity." + days-before-stale: 30 + days-before-close: 20 + days-before-pr-close: -1 + any-of-labels: "awaiting-changes,awaiting-feedback" + exempt-issue-labels: "WIP" + exempt-pr-labels: "WIP" + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml new file mode 100644 index 000000000..2d20d6442 --- /dev/null +++ b/.github/workflows/download_pipeline.yml @@ -0,0 +1,86 @@ +name: Test successful pipeline download with 'nf-core download' + +# Run the workflow when: +# - dispatched manually +# - when a PR is opened or reopened to master branch +# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. +on: + workflow_dispatch: + inputs: + testbranch: + description: "The specific branch you wish to utilize for the test execution of nf-core download." + required: true + default: "dev" + pull_request: + types: + - opened + - edited + - synchronize + branches: + - master + pull_request_target: + branches: + - master + +env: + NXF_ANSI_LOG: false + +jobs: + download: + runs-on: ubuntu-latest + steps: + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + architecture: "x64" + - uses: eWaterCycle/setup-singularity@931d4e31109e875b13309ae1d07c70ca8fbc8537 # v7 + with: + singularity-version: 3.8.3 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install git+https://github.com/nf-core/tools.git@dev + + - name: Get the repository name and current branch set as environment variable + run: | + echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} + echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} + echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} + + - name: Download the pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + run: | + nf-core download ${{ env.REPO_LOWERCASE }} \ + --revision ${{ env.REPO_BRANCH }} \ + --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ + --compress "none" \ + --container-system 'singularity' \ + --container-library "quay.io" -l "docker.io" -l "ghcr.io" \ + --container-cache-utilisation 'amend' \ + --download-configuration + + - name: Inspect download + run: tree ./${{ env.REPOTITLE_LOWERCASE }} + + - name: Run the downloaded pipeline (stub) + id: stub_run_pipeline + continue-on-error: true + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results + - name: Run the downloaded pipeline (stub run not supported) + id: run_pipeline + if: ${{ job.steps.stub_run_pipeline.status == failure() }} + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -profile test,singularity --outdir ./results diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index 7e2c46b3d..765c84e84 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -4,7 +4,7 @@ on: types: [created] jobs: - deploy: + fix-linting: # Only run if comment is on a PR with the main repo, and if it contains the magic keywords if: > contains(github.event.comment.html_url, '/pull/') && @@ -13,10 +13,17 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@v3 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 with: token: ${{ secrets.nf_core_bot_auth_token }} + # indication that the linting is being fixed + - name: React on comment + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: eyes + # Action runs on the issue comment, so we don't get the PR by default # Use the gh cli to check out the PR - name: Checkout Pull Request @@ -24,32 +31,59 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - uses: actions/setup-node@v3 + # Install and run pre-commit + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" - - name: Install Prettier - run: npm install -g prettier @prettier/plugin-php + - name: Install pre-commit + run: pip install pre-commit - # Check that we actually need to fix something - - name: Run 'prettier --check' - id: prettier_status - run: | - if prettier --check ${GITHUB_WORKSPACE}; then - echo "name=result::pass" >> $GITHUB_OUTPUT - else - echo "name=result::fail" >> $GITHUB_OUTPUT - fi + - name: Run pre-commit + id: pre-commit + run: pre-commit run --all-files + continue-on-error: true - - name: Run 'prettier --write' - if: steps.prettier_status.outputs.result == 'fail' - run: prettier --write ${GITHUB_WORKSPACE} + # indication that the linting has finished + - name: react if linting finished succesfully + if: steps.pre-commit.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: "+1" - name: Commit & push changes - if: steps.prettier_status.outputs.result == 'fail' + id: commit-and-push + if: steps.pre-commit.outcome == 'failure' run: | git config user.email "core@nf-co.re" git config user.name "nf-core-bot" git config push.default upstream git add . git status - git commit -m "[automated] Fix linting with Prettier" + git commit -m "[automated] Fix code linting" git push + + - name: react if linting errors were fixed + id: react-if-fixed + if: steps.commit-and-push.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: hooray + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: confused + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + issue-number: ${{ github.event.issue.number }} + body: | + @${{ github.actor }} I tried to fix the linting errors, but it didn't work. Please fix them manually. + See [CI log](https://github.com/nf-core/circrna/actions/runs/${{ github.run_id }}) for more details. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 858d622ef..1fcafe880 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -11,74 +11,34 @@ on: types: [published] jobs: - EditorConfig: + pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - - uses: actions/setup-node@v3 - - - name: Install editorconfig-checker - run: npm install -g editorconfig-checker - - - name: Run ECLint check - run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') - - Prettier: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - uses: actions/setup-node@v3 - - - name: Install Prettier - run: npm install -g prettier - - - name: Run Prettier --check - run: prettier --check ${GITHUB_WORKSPACE} - - PythonBlack: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - name: Check code lints with Black - uses: psf/black@stable - - # If the above check failed, post a comment on the PR explaining the failure - - name: Post PR comment - if: failure() - uses: mshick/add-pr-comment@v1 + - name: Set up Python 3.12 + uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - message: | - ## Python linting (`black`) is failing - - To keep the code consistent with lots of contributors, we run automated code consistency checks. - To fix this CI test, please run: - - * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` - * Fix formatting errors in your pipeline: `black .` - - Once you push these changes the test should pass, and you can hide this comment :+1: + python-version: "3.12" - We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + - name: Install pre-commit + run: pip install pre-commit - Thanks again for your contribution! - repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false + - name: Run pre-commit + run: pre-commit run --all-files nf-core: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: "3.7" + python-version: "3.12" architecture: "x64" - name: Install dependencies @@ -99,7 +59,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 396351862..40acc23f5 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,17 +11,17 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@v2 + uses: dawidd6/action-download-artifact@09f2f74827fd3a8607589e5ad7f9398816f540fe # v3 with: workflow: linting.yml workflow_conclusion: completed - name: Get PR number id: pr_number - run: echo "name=pr_number::$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT + run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@v2 + uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml new file mode 100644 index 000000000..03ecfcf72 --- /dev/null +++ b/.github/workflows/release-announcements.yml @@ -0,0 +1,75 @@ +name: release-announcements +# Automatic release toot and tweet anouncements +on: + release: + types: [published] + workflow_dispatch: + +jobs: + toot: + runs-on: ubuntu-latest + steps: + - name: get topics and convert to hashtags + id: get_topics + run: | + echo "topics=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ')" >> $GITHUB_OUTPUT + + - uses: rzr/fediverse-action@master + with: + access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} + host: "mstdn.science" # custom host if not "mastodon.social" (default) + # GitHub event payload + # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release + message: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + + ${{ steps.get_topics.outputs.topics }} #nfcore #openscience #nextflow #bioinformatics + + send-tweet: + runs-on: ubuntu-latest + + steps: + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.10" + - name: Install dependencies + run: pip install tweepy==4.14.0 + - name: Send tweet + shell: python + run: | + import os + import tweepy + + client = tweepy.Client( + access_token=os.getenv("TWITTER_ACCESS_TOKEN"), + access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), + consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), + consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"), + ) + tweet = os.getenv("TWEET") + client.create_tweet(text=tweet) + env: + TWEET: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} + TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} + TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} + TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} + + bsky-post: + runs-on: ubuntu-latest + steps: + - uses: zentered/bluesky-post-action@80dbe0a7697de18c15ad22f4619919ceb5ccf597 # v0.1.0 + with: + post: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + env: + BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }} + BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }} + # diff --git a/.gitignore b/.gitignore index 84a65f08f..8800274c3 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,5 @@ testing/ testing* *.pyc test_outdir/ +nf-* +!modules/* diff --git a/.gitpod.yml b/.gitpod.yml index 85d95ecc8..d6113266e 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -1,14 +1,19 @@ image: nfcore/gitpod:latest - +tasks: + - name: Update Nextflow and setup pre-commit + command: | + pre-commit install --install-hooks + nextflow self-update + - name: unset JAVA_TOOL_OPTIONS + command: | + unset JAVA_TOOL_OPTIONS vscode: extensions: # based on nf-core.nf-core-extensionpack - - codezombiech.gitignore # Language support for .gitignore files - # - cssho.vscode-svgviewer # SVG viewer - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code - - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar - mechatroner.rainbow-csv # Highlight columns in csv files in different colors - # - nextflow.nextflow # Nextflow syntax highlighting + # - nextflow.nextflow # Nextflow syntax highlighting - oderwat.indent-rainbow # Highlight indentation level - streetsidesoftware.code-spell-checker # Spelling checker for source code + - charliermarsh.ruff # Code linter Ruff diff --git a/.nf-core.yml b/.nf-core.yml index 3805dc81c..e0b85a77f 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1 +1,2 @@ repository_type: pipeline +nf_core_version: "2.14.1" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..4dc0f1dcd --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,13 @@ +repos: + - repo: https://github.com/pre-commit/mirrors-prettier + rev: "v3.1.0" + hooks: + - id: prettier + additional_dependencies: + - prettier@3.2.5 + + - repo: https://github.com/editorconfig-checker/editorconfig-checker.python + rev: "2.7.3" + hooks: + - id: editorconfig-checker + alias: ec diff --git a/CHANGELOG.md b/CHANGELOG.md index d24a1785e..27df403bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.0.0 - [date] +## vdev - [date] Initial release of nf-core/circrna, created with the [nf-core](https://nf-co.re/) template. diff --git a/CITATIONS.md b/CITATIONS.md index 9b05020cd..7b0c25be0 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -10,73 +10,157 @@ ## Pipeline tools -- **bedtools** Quinlan, A.R. & Hall, I.M., (2010). BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics , 26(6), pp.841–842. Available at: [http://dx.doi.org/10.1093/bioinformatics/btq033](http://dx.doi.org/10.1093/bioinformatics/btq033). Download: [https://github.com/arq5x/bedtools2/releases](https://github.com/arq5x/bedtools2/) +- [BEDTools](https://pubmed.ncbi.nlm.nih.gov/20110278/) -- **Bowite** Langmead, B., Trapnell, C., Pop, M. et al., (2009). Ultrafast and memory-efficient alignment of short DNA sequences to the human genome. Genome Biol 10, R25. Availabe at: [https://doi.org/10.1186/gb-2009-10-3-r25](https://doi.org/10.1186/gb-2009-10-3-r25). Download: [https://sourceforge.net/projects/bowtie-bio/](https://sourceforge.net/projects/bowtie-bio/) + > Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 2010 Mar 15;26(6):841-2. doi: 10.1093/bioinformatics/btq033. Epub 2010 Jan 28. PubMed PMID: 20110278; PubMed Central PMCID: PMC2832824. -- **Bowtie2** Langmead, B. & Salzberg, S. L. (2012). Fast gapped-read alignment with Bowtie 2. Nature methods, 9(4), p. 357–359. Available at: [10.1038/nmeth.1923](https:/dx.doi.org/10.1038/nmeth.1923). Download: [http://bowtie-bio.sourceforge.net/bowtie2/index.shtml](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml) +- [Bowtie](https://doi.org/10.1186/gb-2009-10-3-r25) -- **bwa** Li, H., & Durbin, R. (2009). Fast and accurate short read alignment with Burrows-Wheeler transform. Bioinformatics , 25(14), 1754–1760. Available at: [https://doi.org/10.1093/bioinformatics/btp324](https://doi.org/10.1093/bioinformatics/btp324). Download: [http://bio-bwa.sourceforge.net/bwa.shtml](http://bio-bwa.sourceforge.net/bwa.shtml). + > Langmead, B., Trapnell, C., Pop, M. et al., 2009. Ultrafast and memory-efficient alignment of short DNA sequences to the human genome. Genome Biol 10, R25. doi: 10.1186/gb-2009-10-3-r25 -- **CIRCexplorer2** Zhang XO, Dong R, Zhang Y, Zhang JL, Luo Z, Zhang J, Chen LL, Yang L. (2016). Diverse alternative back-splicing and alternative splicing landscape of circular RNAs. Genome Res. 2016 Sep;26(9):1277-87. Available at: [https://doi.org/10.1101/gr.202895.115](https://doi.org/10.1101/gr.202895.115). Download: [https://circexplorer2.readthedocs.io/en/latest/tutorial/setup/](https://circexplorer2.readthedocs.io/en/latest/tutorial/setup/) +- [Bowtie2](https:/dx.doi.org/10.1038/nmeth.1923) -- **circRNA finder** Westholm, J.O., Lai, E.C., et al. (2016). Genome-wide Analysis of Drosophila Circular RNAs Reveals Their Structural and Sequence Properties and Age-Dependent Neural Accumulation Westholm et al. Cell Reports. Available at: [https://doi.org/10.1016/j.celrep.2014.10.062](https://doi.org/10.1016/j.celrep.2014.10.062). Download: [https://github.com/orzechoj/circRNA_finder](https://github.com/orzechoj/circRNA_finder) + > Langmead, B. and Salzberg, S. L. 2012 Fast gapped-read alignment with Bowtie 2. Nature methods, 9(4), p. 357–359. doi: 10.1038/nmeth.1923. -- **CIRIquant** Zhang, J., Chen, S., Yang, J. et al. (2020). Accurate quantification of circular RNAs identifies extensive circular isoform switching events. Nat Commun 11, 90. Available at: [https://doi.org/10.1038/s41467-019-13840-9](https://doi.org/10.1038/s41467-019-13840-9). Download: [https://github.com/bioinfo-biols/CIRIquant](https://github.com/bioinfo-biols/CIRIquant) +- [BWA](https://www.ncbi.nlm.nih.gov/pubmed/19451168/) -- **DCC** Jun Cheng, Franziska Metge, Christoph Dieterich, (2016). Specific identification and quantification of circular RNAs from sequencing data, Bioinformatics, 32(7), 1094–1096. Available at: [https://doi.org/10.1093/bioinformatics/btv656](https://doi.org/10.1093/bioinformatics/btv656). Download: [https://github.com/dieterich-lab/DCC](https://github.com/dieterich-lab/DCC) + > Li H, Durbin R. Fast and accurate short read alignment with Burrows-Wheeler transform. Bioinformatics. 2009 Jul 15;25(14):1754-60. doi: 10.1093/bioinformatics/btp324. Epub 2009 May 18. PubMed PMID: 19451168; PubMed Central PMCID: PMC2705234. + +- [CIRCexplorer2](https://doi.org/10.1101/gr.202895.115) + + > Zhang XO, Dong R, Zhang Y, Zhang JL, Luo Z, Zhang J, Chen LL, Yang L. (2016). Diverse alternative back-splicing and alternative splicing landscape of circular RNAs. Genome Res. 2016 Sep;26(9):1277-87. + +- [circRNA finder](https://doi.org/10.1016/j.celrep.2014.10.062) + + > Westholm, J.O., Lai, E.C., et al. (2016). Genome-wide Analysis of Drosophila Circular RNAs Reveals Their Structural and Sequence Properties and Age-Dependent Neural Accumulation Westholm et al. Cell Reports. + +- [CIRIquant](https://doi.org/10.1038/s41467-019-13840-9) + + > Zhang, J., Chen, S., Yang, J. et al. (2020). Accurate quantification of circular RNAs identifies extensive circular isoform switching events. Nat Commun 11, 90. + +- [DCC](https://doi.org/10.1093/bioinformatics/btv656) + + > Jun Cheng, Franziska Metge, Christoph Dieterich, (2016). Specific identification and quantification of circular RNAs from sequencing data, Bioinformatics, 32(7), 1094–1096. - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) -- **find circ** Memczak, S., Jens, M., Elefsinioti, A., Torti, F., Krueger, J., Rybak, A., Maier, L., Mackowiak, S. D., Gregersen, L. H., Munschauer, M., Loewer, A., Ziebold, U., Landthaler, M., Kocks, C., le Noble, F., & Rajewsky, N. (2013). Circular RNAs are a large class of animal RNAs with regulatory potency. Nature, 495(7441), 333–338. Available at: [https://doi.org/10.1038/nature11928](https://doi.org/10.1038/nature11928). Download: [https://github.com/marvin-jens/find_circ](https://github.com/marvin-jens/find_circ) + > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. + +- [find circ](https://doi.org/10.1038/nature11928) + + > Memczak, S., Jens, M., Elefsinioti, A., Torti, F., Krueger, J., Rybak, A., Maier, L., Mackowiak, S. D., Gregersen, L. H., Munschauer, M., Loewer, A., Ziebold, U., Landthaler, M., Kocks, C., le Noble, F., & Rajewsky, N. (2013). Circular RNAs are a large class of animal RNAs with regulatory potency. Nature, 495(7441), 333–338. + +- [GATK](https://pubmed.ncbi.nlm.nih.gov/20644199/) + + > McKenna A, Hanna M, Banks E, et al.: The Genome Analysis Toolkit: a MapReduce framework for analyzing next-generation DNA sequencing data. Genome Res. 2010 Sep;20(9):1297-303. doi: 10.1101/gr.107524.110. Epub 2010 Jul 19. PubMed PMID: 20644199; PubMed Central PMCID: PMC2928508. + +- [HISAT2](https://pubmed.ncbi.nlm.nih.gov/31375807/) + + > Kim D, Paggi JM, Park C, Bennett C, Salzberg SL. Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype. Nat Biotechnol. 2019 Aug;37(8):907-915. doi: 10.1038/s41587-019-0201-4. Epub 2019 Aug 2. PubMed PMID: 31375807. + +- [MapSplice2](https://doi.org/10.1093/nar/gkq622) + + > Wang, K., Liu J., et al. (2010) MapSplice: Accurate mapping of RNA-seq reads for splice junction discovery, Nucleic Acids Research, 38(18), 178. + +- [miRanda](https://doi.org/10.1186/gb-2003-5-1-r1) + + > Enright, A.J., John, B., Gaul, U. et al. (2003). MicroRNA targets in Drosophila. Genome Biol 5, R1. + +- [find circ](https://doi.org/10.1038/nature11928) + + > Memczak, S., Jens, M., Elefsinioti, A., Torti, F., Krueger, J., Rybak, A., Maier, L., Mackowiak, S. D., Gregersen, L. H., Munschauer, M., Loewer, A., Ziebold, U., Landthaler, M., Kocks, C., le Noble, F., & Rajewsky, N. (2013). Circular RNAs are a large class of animal RNAs with regulatory potency. Nature, 495(7441), 333–338. + +- [GATK](https://pubmed.ncbi.nlm.nih.gov/20644199/) + + > McKenna A, Hanna M, Banks E, et al.: The Genome Analysis Toolkit: a MapReduce framework for analyzing next-generation DNA sequencing data. Genome Res. 2010 Sep;20(9):1297-303. doi: 10.1101/gr.107524.110. Epub 2010 Jul 19. PubMed PMID: 20644199; PubMed Central PMCID: PMC2928508. -- **HISAT2** Kim, D., Paggi, J.M., Park, C. et al. (2019). Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype. Nat Biotechnol 37, 907–915 (2019). Available at: [https://doi.org/10.1038/s41587-019-0201-4](https://doi.org/10.1038/s41587-019-0201-4). Download: [http://daehwankimlab.github.io/hisat2/download/](http://daehwankimlab.github.io/hisat2/download/) +- [HISAT2](https://pubmed.ncbi.nlm.nih.gov/31375807/) -- **MapSplice2** Wang, K., Liu J., et al. (2010) MapSplice: Accurate mapping of RNA-seq reads for splice junction discovery, Nucleic Acids Research, 38(18), 178. Avaialable at: [https://doi.org/10.1093/nar/gkq622](https://doi.org/10.1093/nar/gkq622). Download: [http://www.netlab.uky.edu/p/bioinfo/MapSplice2Download](http://www.netlab.uky.edu/p/bioinfo/MapSplice2Download) + > Kim D, Paggi JM, Park C, Bennett C, Salzberg SL. Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype. Nat Biotechnol. 2019 Aug;37(8):907-915. doi: 10.1038/s41587-019-0201-4. Epub 2019 Aug 2. PubMed PMID: 31375807. -- **miRanda** Enright, A.J., John, B., Gaul, U. et al. (2003). MicroRNA targets in Drosophila. Genome Biol 5, R1. Available at: [https://doi.org/10.1186/gb-2003-5-1-r1](https://doi.org/10.1186/gb-2003-5-1-r1). Download: [http://cbio.mskcc.org/miRNA2003/miranda.html](http://cbio.mskcc.org/miRNA2003/miranda.html). +- [MapSplice2](https://doi.org/10.1093/nar/gkq622) + + > Wang, K., Liu J., et al. (2010) MapSplice: Accurate mapping of RNA-seq reads for splice junction discovery, Nucleic Acids Research, 38(18), 178. + +- [miRanda](https://doi.org/10.1186/gb-2003-5-1-r1) + + > Enright, A.J., John, B., Gaul, U. et al. (2003). MicroRNA targets in Drosophila. Genome Biol 5, R1. - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. -- **R**: R Core Team (2020). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. Download: [https://www.R-project.org/](https://www.R-project.org/). +- [R](https://www.R-project.org/) + + > R Core Team (2020). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. + + - [biomaRt](https://doi.org/10.1038/nprot.2009.97) + + > Durinck S, Spellman PT, Birney E, Huber W. (2009). Mapping identifiers for the integration of genomic datasets with the R/Bioconductor package biomaRt. Nat Protoc. 4(8):1184-91. + + - [circlize](https://doi.org/10.1093/bioinformatics/btu393) + + > Zuguang Gu, Lei Gu, Roland Eils, Matthias Schlesner, Benedikt Brors (2014). circlize implements and enhances circular visualization in R , Bioinformatics, 30,(19) 2811–2812. + + - [DESeq2](https://doi.org/10.1186/s13059-014-0550-8) + + > Love, M.I., Huber, W. & Anders, S. (2014). Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biol 15, 550. + + - [EnhancedVolcano](https://bioconductor.org/packages/release/bioc/html/EnhancedVolcano.html) - - **biomaRt** Durinck S, Spellman PT, Birney E, Huber W. (2009). Mapping identifiers for the integration of genomic datasets with the R/Bioconductor package biomaRt. Nat Protoc. 4(8):1184-91. Available at: [https://doi.org/10.1038/nprot.2009.97](https://doi.org/10.1038/nprot.2009.97). Download: [https://bioconductor.org/packages/release/bioc/html/biomaRt.html](https://bioconductor.org/packages/release/bioc/html/biomaRt.html) + > Blighe K, Rana S, Lewis M (2020). EnhancedVolcano: Publication-ready volcano plots with enhanced colouring and labeling. - - **circlize** Zuguang Gu, Lei Gu, Roland Eils, Matthias Schlesner, Benedikt Brors (2014). circlize implements and enhances circular visualization in R , Bioinformatics, 30,(19) 2811–2812. Available at: [https://doi.org/10.1093/bioinformatics/btu393](https://doi.org/10.1093/bioinformatics/btu393). Download: [https://cran.r-project.org/web/packages/circlize/index.html](https://cran.r-project.org/web/packages/circlize/index.html) + - [ggplot2](https://ggplot2.tidyverse.org) - - **DESeq2** Love, M.I., Huber, W. & Anders, S. (2014). Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biol 15, 550. Available at: [https://doi.org/10.1186/s13059-014-0550-8](https://doi.org/10.1186/s13059-014-0550-8). Download: [https://bioconductor.org/packages/release/bioc/html/DESeq2.html](https://bioconductor.org/packages/release/bioc/html/DESeq2.html) + > Wickham H (2016). ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York. ISBN 978-3-319-24277-4. - - **EnhancedVolcano** Blighe K, Rana S, Lewis M (2020). EnhancedVolcano: Publication-ready volcano plots with enhanced colouring and labeling. Download: [https://bioconductor.org/packages/release/bioc/html/EnhancedVolcano.html](https://bioconductor.org/packages/release/bioc/html/EnhancedVolcano.html) + - [ggpubr](https://rpkgs.datanovia.com/ggpubr/) - - **ggplot2** Wickham H (2016). ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York. ISBN 978-3-319-24277-4, Download: [https://ggplot2.tidyverse.org](https://ggplot2.tidyverse.org). + > Kassambara A. (2020). ggpubr: 'ggplot2' Based Publication Ready Plots. - - **ggpubr** Kassambara A. (2020). ggpubr: 'ggplot2' Based Publication Ready Plots. Download: [https://rpkgs.datanovia.com/ggpubr/](https://rpkgs.datanovia.com/ggpubr/) + - [ihw](https://doi.org/10.1038/nmeth.3885) - - **ihw** Ignatiadis, N., Klaus, B., Zaugg, J. et al. (2016). Data-driven hypothesis weighting increases detection power in genome-scale multiple testing. Nat Methods 13, 577–580. Available at: [https://doi.org/10.1038/nmeth.3885](https://doi.org/10.1038/nmeth.3885). Download: [https://bioconductor.org/packages/release/bioc/html/IHW.html](https://bioconductor.org/packages/release/bioc/html/IHW.html) + > Ignatiadis, N., Klaus, B., Zaugg, J. et al. (2016). Data-driven hypothesis weighting increases detection power in genome-scale multiple testing. Nat Methods 13, 577–580. - - **PCAtools** Blighe K, Lun A (2020). PCAtools: PCAtools: Everything Principal Components Analysis. Download: [https://bioconductor.org/packages/release/bioc/html/PCAtools.html](https://bioconductor.org/packages/release/bioc/html/PCAtools.html) + - [PCAtools](https://bioconductor.org/packages/release/bioc/html/PCAtools.html) - - **pheatmap** Kolde, R. (2019) Pretty Heatmaps. Download: [https://cran.r-project.org/package=pheatmap](https://cran.r-project.org/package=pheatmap) + > Blighe K, Lun A (2020). PCAtools: PCAtools: Everything Principal Components Analysis. - - **pvclust** Suzuki R., Shimodaira H., (2006). Pvclust: an R package for assessing the uncertainty in hierarchical clustering, Bioinformatics, 22(12), 1540–1542. Available at: [https://doi.org/10.1093/bioinformatics/btl117](https://doi.org/10.1093/bioinformatics/btl117). Download: [https://cran.r-project.org/web/packages/pvclust/index.html](https://cran.r-project.org/web/packages/pvclust/index.html) + - [pheatmap](https://cran.r-project.org/package=pheatmap) -- **SAMtools** Li, H., Handsaker, B., Wysoker, A., Fennell, T., Ruan, J., Homer, N., … 1000 Genome Project Data Processing Subgroup. (2009). The Sequence Alignment/Map format and SAMtools. Bioinformatics , 25(16), 2078–2079. [https://doi.org/10.1093/bioinformatics/btp352](https://doi.org/10.1093/bioinformatics/btp352). Download: [http://www.htslib.org/](http://www.htslib.org/) + > Kolde, R. (2019) Pretty Heatmaps. -- **Segemehl** Hoffmann S, Otto C, Kurtz S, Sharma CM, Khaitovich P, Vogel J, Stadler PF, Hackermueller J: "Fast mapping of short sequences with mismatches, insertions and deletions using index structures", PLoS Comput Biol (2009) vol. 5 (9) pp. e1000502. Available at: [https://doi.org/10.1371/journal.pcbi.1000502](https://doi.org/10.1371/journal.pcbi.1000502). Download: [https://www.bioinf.uni-leipzig.de/Software/segemehl/](https://www.bioinf.uni-leipzig.de/Software/segemehl/) + - [pvclust](https://doi.org/10.1093/bioinformatics/btl117) -- **STAR** Dobin, A., Davis, C. A., Schlesinger, F., Drenkow, J., Zaleski, C., Jha, S., Batut, P., Chaisson, M., & Gingeras, T. R. (2013). STAR: ultrafast universal RNA-seq aligner. Bioinformatics (Oxford, England), 29(1), 15–21. Available at: [https://doi.org/10.1093/bioinformatics/bts635](https://doi.org/10.1093/bioinformatics/bts635). Download: [https://github.com/alexdobin/STAR](https://github.com/alexdobin/STAR) + > Suzuki R., Shimodaira H., (2006). Pvclust: an R package for assessing the uncertainty in hierarchical clustering, Bioinformatics, 22(12), 1540–1542. -- **StringTie** Pertea, M., Pertea, G., Antonescu, C. et al. (2015). StringTie enables improved reconstruction of a transcriptome from RNA-seq reads. Nat Biotechnol 33, 290–295. Available at: [https://doi.org/10.1038/nbt.3122](https://doi.org/10.1038/nbt.3122). Download: [https://ccb.jhu.edu/software/stringtie/](https://ccb.jhu.edu/software/stringtie/) +- [SAMtools](https://pubmed.ncbi.nlm.nih.gov/19505943/) -- **TargetScan** Agarwal V, Bell GW, Nam JW, Bartel DP. (2015). Predicting effective microRNA target sites in mammalian mRNAs. Elife, 4:e05005. Available at: [https://doi.org/10.7554/elife.05005](https://doi.org/10.7554/elife.05005). Download: [http://www.targetscan.org/cgi-bin/targetscan/data_download.vert72.cgi](http://www.targetscan.org/cgi-bin/targetscan/data_download.vert72.cgi) + > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. -- **ViennaRNA** Lorenz, R., Bernhart, S.H., Höner zu Siederdissen, C. et al. (2011). ViennaRNA Package 2.0. Algorithms Mol Biol 6, 26. Available at: [https://doi.org/10.1186/1748-7188-6-26](https://doi.org/10.1186/1748-7188-6-26). Download: [https://www.tbi.univie.ac.at/RNA/#download](https://www.tbi.univie.ac.at/RNA/#download) +- [Segemehl](https://doi.org/10.1371/journal.pcbi.1000502) + + > Hoffmann S, Otto C, Kurtz S, Sharma CM, Khaitovich P, Vogel J, Stadler PF, Hackermueller J: "Fast mapping of short sequences with mismatches, insertions and deletions using index structures", PLoS Comput Biol (2009) vol. 5 (9) pp. e1000502. + +- [STAR](https://pubmed.ncbi.nlm.nih.gov/23104886/) + + > Dobin A, Davis CA, Schlesinger F, Drenkow J, Zaleski C, Jha S, Batut P, Chaisson M, Gingeras TR. STAR: ultrafast universal RNA-seq aligner Bioinformatics. 2013 Jan 1;29(1):15-21. doi: 10.1093/bioinformatics/bts635. Epub 2012 Oct 25. PubMed PMID: 23104886; PubMed Central PMCID: PMC3530905. + +- [StringTie2](https://pubmed.ncbi.nlm.nih.gov/31842956/) + + > Kovaka S, Zimin AV, Pertea GM, Razaghi R, Salzberg SL, Pertea M. Transcriptome assembly from long-read RNA-seq alignments with StringTie2 Genome Biol. 2019 Dec 16;20(1):278. doi: 10.1186/s13059-019-1910-1. PubMed PMID: 31842956; PubMed Central PMCID: PMC6912988. + +- [TargetScan](https://doi.org/10.7554/elife.05005) + + > Agarwal V, Bell GW, Nam JW, Bartel DP. (2015). Predicting effective microRNA target sites in mammalian mRNAs. Elife, 4:e05005. + +- [ViennaRNA](https://doi.org/10.1186/1748-7188-6-26) + + > Lorenz, R., Bernhart, S.H., Höner zu Siederdissen, C. et al. (2011). ViennaRNA Package 2.0. Algorithms Mol Biol 6, 26. ## Test data References -Dong Cao (2021). An autoregulation loop in fust-1 for circular RNA regulation in Caenorhabditis elegans. Biorxiv. Available at: [https://doi.org/10.1101/2021.03.22.436400](https://doi.org/10.1101/2021.03.22.436400). +> Cao D. An autoregulation loop in fust-1 for circular RNA regulation in Caenorhabditis elegans. Genetics. 2021 Nov 5;219(3):iyab145. doi: 10.1093/genetics/iyab145. PMID: 34740247; PMCID: PMC8570788. ## Software packaging/containerisation tools @@ -94,5 +178,8 @@ Dong Cao (2021). An autoregulation loop in fust-1 for circular RNA regulation in - [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. + - [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index f4fd052f1..c089ec78c 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,18 +1,20 @@ -# Code of Conduct at nf-core (v1.0) +# Code of Conduct at nf-core (v1.4) ## Our Pledge -In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core, pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: +In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: - Age +- Ability - Body size +- Caste - Familial status - Gender identity and expression - Geographical location - Level of experience - Nationality and national origins - Native language -- Physical and neurological ability +- Neurodiversity - Race or ethnicity - Religion - Sexual identity and orientation @@ -22,80 +24,133 @@ Please note that the list above is alphabetised and is therefore not ranked in a ## Preamble -> Note: This Code of Conduct (CoC) has been drafted by the nf-core Safety Officer and been edited after input from members of the nf-core team and others. "We", in this document, refers to the Safety Officer and members of the nf-core core team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will amended periodically to keep it up-to-date, and in case of any dispute, the most current version will apply. +:::note +This Code of Conduct (CoC) has been drafted by Renuka Kudva, Cris Tuñí, and Michael Heuer, with input from the nf-core Core Team and Susanna Marquez from the nf-core community. "We", in this document, refers to the Safety Officers and members of the nf-core Core Team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will be amended periodically to keep it up-to-date. In case of any dispute, the most current version will apply. +::: -An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). Our current safety officer is Renuka Kudva. +An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). + +Our Safety Officers are Saba Nafees, Cris Tuñí, and Michael Heuer. nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals. -We have therefore adopted this code of conduct (CoC), which we require all members of our community and attendees in nf-core events to adhere to in all our workspaces at all times. Workspaces include but are not limited to Slack, meetings on Zoom, Jitsi, YouTube live etc. +We have therefore adopted this CoC, which we require all members of our community and attendees of nf-core events to adhere to in all our workspaces at all times. Workspaces include, but are not limited to, Slack, meetings on Zoom, gather.town, YouTube live etc. -Our CoC will be strictly enforced and the nf-core team reserve the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. +Our CoC will be strictly enforced and the nf-core team reserves the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. -We ask all members of our community to help maintain a supportive and productive workspace and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. +We ask all members of our community to help maintain supportive and productive workspaces and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. -Questions, concerns or ideas on what we can include? Contact safety [at] nf-co [dot] re +Questions, concerns, or ideas on what we can include? Contact members of the Safety Team on Slack or email safety [at] nf-co [dot] re. ## Our Responsibilities -The safety officer is responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. +Members of the Safety Team (the Safety Officers) are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. -The safety officer in consultation with the nf-core core team have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. +The Safety Team, in consultation with the nf-core core team, have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this CoC, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. -Members of the core team or the safety officer who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and be subject to the same actions as others in violation of the CoC. +Members of the core team or the Safety Team who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and will be subject to the same actions as others in violation of the CoC. -## When are where does this Code of Conduct apply? +## When and where does this Code of Conduct apply? -Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events. This includes but is not limited to the following listed alphabetically and therefore in no order of preference: +Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events, such as hackathons, workshops, bytesize, and collaborative workspaces on gather.town. These guidelines include, but are not limited to, the following (listed alphabetically and therefore in no order of preference): - Communicating with an official project email address. - Communicating with community members within the nf-core Slack channel. - Participating in hackathons organised by nf-core (both online and in-person events). -- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence. -- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, Jitsi, YouTube live etc. +- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence, and on the nf-core gather.town workspace. +- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, gather.town, Jitsi, YouTube live etc. - Representing nf-core on social media. This includes both official and personal accounts. ## nf-core cares 😊 -nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include but are not limited to the following (listed in alphabetical order): +nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include, but are not limited to, the following (listed in alphabetical order): - Ask for consent before sharing another community member’s personal information (including photographs) on social media. - Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. -- Celebrate your accomplishments at events! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) +- Celebrate your accomplishments! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) - Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) - Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) - Focus on what is best for the team and the community. (When in doubt, ask) -- Graciously accept constructive criticism, yet be unafraid to question, deliberate, and learn. +- Accept feedback, yet be unafraid to question, deliberate, and learn. - Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) -- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communications to be kind.**) +- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communication to be kind.**) - Take breaks when you feel like you need them. -- Using welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack.) +- Use welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack) ## nf-core frowns on 😕 -The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this code of conduct. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces. +The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this CoC. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces: - Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. - “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. - Spamming or trolling of individuals on social media. -- Use of sexual or discriminatory imagery, comments, or jokes and unwelcome sexual attention. -- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion or work experience. +- Use of sexual or discriminatory imagery, comments, jokes, or unwelcome sexual attention. +- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion, or work experience. ### Online Trolling -The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the added issue of online trolling. This is unacceptable, reports of such behaviour will be taken very seriously, and perpetrators will be excluded from activities immediately. +The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the risk of online trolling. This is unacceptable — reports of such behaviour will be taken very seriously and perpetrators will be excluded from activities immediately. -All community members are required to ask members of the group they are working within for explicit consent prior to taking screenshots of individuals during video calls. +All community members are **required** to ask members of the group they are working with for explicit consent prior to taking screenshots of individuals during video calls. -## Procedures for Reporting CoC violations +## Procedures for reporting CoC violations If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible. -You can reach out to members of the [nf-core core team](https://nf-co.re/about) and they will forward your concerns to the safety officer(s). +You can reach out to members of the Safety Team (Saba Nafees, Cris Tuñí, and Michael Heuer) on Slack. Alternatively, contact a member of the nf-core core team [nf-core core team](https://nf-co.re/about), and they will forward your concerns to the Safety Team. + +Issues directly concerning members of the Core Team or the Safety Team will be dealt with by other members of the core team and the safety manager — possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson and details will be shared in due course. + +All reports will be handled with the utmost discretion and confidentiality. + +You can also report any CoC violations to safety [at] nf-co [dot] re. In your email report, please do your best to include: + +- Your contact information. +- Identifying information (e.g. names, nicknames, pseudonyms) of the participant who has violated the Code of Conduct. +- The behaviour that was in violation and the circumstances surrounding the incident. +- The approximate time of the behaviour (if different than the time the report was made). +- Other people involved in the incident, if applicable. +- If you believe the incident is ongoing. +- If there is a publicly available record (e.g. mailing list record, a screenshot). +- Any additional information. + +After you file a report, one or more members of our Safety Team will contact you to follow up on your report. + +## Who will read and handle reports + +All reports will be read and handled by the members of the Safety Team at nf-core. + +If members of the Safety Team are deemed to have a conflict of interest with a report, they will be required to recuse themselves as per our Code of Conduct and will not have access to any follow-ups. + +To keep this first report confidential from any of the Safety Team members, please submit your first report by direct messaging on Slack/direct email to any of the nf-core members you are comfortable disclosing the information to, and be explicit about which member(s) you do not consent to sharing the information with. + +## Reviewing reports + +After receiving the report, members of the Safety Team will review the incident report to determine whether immediate action is required, for example, whether there is immediate threat to participants’ safety. + +The Safety Team, in consultation with members of the nf-core core team, will assess the information to determine whether the report constitutes a Code of Conduct violation, for them to decide on a course of action. + +In the case of insufficient information, one or more members of the Safety Team may contact the reporter, the reportee, or any other attendees to obtain more information. -Issues directly concerning members of the core team will be dealt with by other members of the core team and the safety manager, and possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson, and details will be shared in due course. +Once additional information is gathered, the Safety Team will collectively review and decide on the best course of action to take, if any. The Safety Team reserves the right to not act on a report. -All reports will be handled with utmost discretion and confidentially. +## Confidentiality + +All reports, and any additional information included, are only shared with the team of safety officers (and possibly members of the core team, in case the safety officer is in violation of the CoC). We will respect confidentiality requests for the purpose of protecting victims of abuse. + +We will not name harassment victims, beyond discussions between the safety officer and members of the nf-core team, without the explicit consent of the individuals involved. + +## Enforcement + +Actions taken by the nf-core’s Safety Team may include, but are not limited to: + +- Asking anyone to stop a behaviour. +- Asking anyone to leave the event and online spaces either temporarily, for the remainder of the event, or permanently. +- Removing access to the gather.town and Slack, either temporarily or permanently. +- Communicating to all participants to reinforce our expectations for conduct and remind what is unacceptable behaviour; this may be public for practical reasons. +- Communicating to all participants that an incident has taken place and how we will act or have acted — this may be for the purpose of letting event participants know we are aware of and dealing with the incident. +- Banning anyone from participating in nf-core-managed spaces, future events, and activities, either temporarily or permanently. +- No action. ## Attribution and Acknowledgements @@ -106,6 +161,22 @@ All reports will be handled with utmost discretion and confidentially. ## Changelog -### v1.0 - March 12th, 2021 +### v1.4 - February 8th, 2022 + +- Included a new member of the Safety Team. Corrected a typographical error in the text. + +### v1.3 - December 10th, 2021 + +- Added a statement that the CoC applies to nf-core gather.town workspaces. Corrected typographical errors in the text. + +### v1.2 - November 12th, 2021 + +- Removed information specific to reporting CoC violations at the Hackathon in October 2021. + +### v1.1 - October 14th, 2021 + +- Updated with names of new Safety Officers and specific information for the hackathon in October 2021. + +### v1.0 - March 15th, 2021 - Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. diff --git a/README.md b/README.md index 7ca6d3b52..a1e288e12 100644 --- a/README.md +++ b/README.md @@ -1,82 +1,143 @@ -# ![nf-core/circrna](docs/images/nf-core-circrna_logo_light.png#gh-light-mode-only) ![nf-core/circrna](docs/images/nf-core-circrna_logo_dark.png#gh-dark-mode-only) +

+ + + nf-core/circrna + +

-[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/circrna/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![GitHub Actions CI Status](https://github.com/nf-core/circrna/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/circrna/actions/workflows/ci.yml) +[![GitHub Actions Linting Status](https://github.com/nf-core/circrna/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/circrna/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/circrna/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.1-23aa62.svg)](https://www.nextflow.io/) +[![GitHub Actions CI Status](https://github.com/nf-core/circrna/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/circrna/actions?query=workflow%3A%22nf-core+CI%22) +[![GitHub Actions Linting Status](https://github.com/nf-core/circrna/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/circrna/actions?query=workflow%3A%22nf-core+linting%22)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/circrna/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) + +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) -[![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/circrna) +[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/circrna) -[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23circrna-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/circrna)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23circrna-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/circrna)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction -**nf-core/circrna** is a bioinformatics best-practice analysis pipeline for circRNA quantification, differential expression analysis and miRNA target prediction of RNA-Seq data. - -The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community! - -On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources.The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/circrna/results). - -## Pipeline summary - -1. Raw read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) -2. Adapter trimming ([`Trim Galore!`](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/)) -3. circRNA quantification - 1. [`CIRIquant`](https://github.com/Kevinzjy/CIRIquant) - 2. [`STAR 2-Pass mode`](https://github.com/alexdobin/STAR) - 1. [`CIRCexplorer2`](https://circexplorer2.readthedocs.io/en/latest/) - 2. [`circRNA finder`](https://github.com/orzechoj/circRNA_finder) - 3. [`DCC`](https://github.com/dieterich-lab/DCC) - 3. [`find circ`](https://github.com/marvin-jens/find_circ) - 4. [`MapSplice`](http://www.netlab.uky.edu/p/bioinfo/MapSplice2) - 5. [`Segemehl`](https://www.bioinf.uni-leipzig.de/Software/segemehl/) -4. circRNA annotation -5. Export mature spliced length as FASTA file -6. Annotate parent gene, underlying transcripts. -7. circRNA count matrix -8. miRNA target prediction - 1. [`miRanda`](http://cbio.mskcc.org/miRNA2003/miranda.html) - 2. [`TargetScan`](http://www.targetscan.org/cgi-bin/targetscan/data_download.vert72.cgi) - 3. Filter results, miRNAs must be called by both tools -9. Differential expression analysis [`DESeq2`](https://bioconductor.org/packages/release/bioc/html/DESeq2.html) -10. Circular - Linear ratio tests ['CircTest'](https://github.com/dieterich-lab/CircTest) -11. MultiQC report [`MultiQC`](http://multiqc.info/) - -## Quick Start - -1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=22.10.1`) +**nf-core/circrna** is a bioinformatics pipeline to analyse total RNA sequencing data obtained from organisms with a reference genome and annotation. It takes a samplesheet and FASTQ files as input, performs quality control (QC), trimming, back-splice junction (BSJ) detection, annotation, quantification and miRNA target prediction of circular RNAs. -2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) (you can follow [this tutorial](https://singularity-tutorial.github.io/01-installation/)), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(you can use [`Conda`](https://conda.io/miniconda.html) both to install Nextflow itself and also to manage software within pipelines. Please only use it within pipelines as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_. +The pipeline is still under development, but the BSJ detection and quantification steps are already implemented and functional. The following features are planned to be implemented soon: -3. Download the pipeline and test it on a minimal dataset with a single command: +- Isoform-level circRNA detection and quantification +- circRNA-miRNA interaction analysis using [SPONGE](https://doi.org/10.1093/bioinformatics/btz314) and [spongEffects](https://doi.org/10.1093/bioinformatics/btad276) +- Improved downstream analyses - ```bash - nextflow run nf-core/circrna -profile test,YOURPROFILE --outdir - ``` +If you want to contribute, feel free to create an issue or pull request on the [GitHub repository](https://github.com/nf-core/circrna) or join the [Slack channel](https://nf-co.re/join/slack). - Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string. - - > - The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`. - > - Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile ` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. - > - If you are using `singularity`, please use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs. - > - If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs. +## Pipeline summary -4. Start running your own analysis! +![Metro Map](./docs/images/metro-map.png) + +- Raw read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) +- Adapter trimming ([`Trim Galore!`](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/)) +- BSJ detection + - [`CIRIquant`](https://github.com/Kevinzjy/CIRIquant) + - [`STAR 2-Pass mode`](https://github.com/alexdobin/STAR) + - [`CIRCexplorer2`](https://circexplorer2.readthedocs.io/en/latest/) + - [`circRNA finder`](https://github.com/orzechoj/circRNA_finder) + - [`DCC`](https://github.com/dieterich-lab/DCC) + - [`find circ`](https://github.com/marvin-jens/find_circ) + - [`MapSplice`](http://www.netlab.uky.edu/p/bioinfo/MapSplice2) + - [`Segemehl`](https://www.bioinf.uni-leipzig.de/Software/segemehl/) +- circRNA annotation + - Based on a GTF file + - Based on database files (if provided) +- Extract circRNA sequences and build circular transcriptome +- Merge circular transcriptome with linear transcriptome derived from provided GTF +- Quantification of combined circular and linear transcriptome + - [`psirc-quant`](https://github.com/Christina-hshi/psirc) +- miRNA binding affinity analysis (only if the `mature` parameter is provided) + - Normalizes miRNA expression (only if the `mirna_expression` parameter is provided) + - Binding site prediction + - [`miRanda`](http://cbio.mskcc.org/miRNA2003/miranda.html) + - [`TargetScan`](http://www.targetscan.org/cgi-bin/targetscan/data_download.vert72.cgi) + - Perform majority vote on binding sites + - Compute correlations between miRNA and transcript expression levels (only if the `mirna_expression` parameter is provided) +- Statistical tests (only if the `phenotype` parameter is provided) + - [`CircTest`](https://github.com/dieterich-lab/CircTest) +- MultiQC report [`MultiQC`](http://multiqc.info/) + +## Usage + +> [!NOTE] +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. + +First, prepare a samplesheet with your input data that looks as follows: + +```csv title="samplesheet.csv" +sample,fastq_1,fastq_2 +CONTROL,CONTROL_R1.fastq.gz,CONTROL_R2.fastq.gz +TREATMENT,TREATMENT_R1.fastq.gz,TREATMENT_R2.fastq.gz +``` + +Each row represents a fastq file (single-end) or a pair of fastq files (paired end). + +Now, you can run the pipeline using: + +```bash +nextflow run nf-core/circrna \ + -profile \ + --input samplesheet.csv \ + --outdir +``` + +> [!WARNING] +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; +> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). + +For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/circrna/usage) and the [parameter documentation](https://nf-co.re/circrna/parameters). + +## Pipeline output + +To see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/circrna/results) tab on the nf-core website pipeline page. +For more details about the output files and reports, please refer to the +[output documentation](https://nf-co.re/circrna/output). + +```bash +nextflow run nf-core/circrna \ + -profile \ + --input samplesheet.csv \ + --outdir +``` + +> [!WARNING] +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; +> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). + +For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/circrna/usage) and the [parameter documentation](https://nf-co.re/circrna/parameters). + +## Pipeline output + +To see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/circrna/results) tab on the nf-core website pipeline page. +For more details about the output files and reports, please refer to the +[output documentation](https://nf-co.re/circrna/output). - ```bash - nextflow run nf-core/circrna --input samplesheet.csv --outdir --genome GRCh37 -profile --tool 'ciriquant' --module 'circrna_discovery,mirna_prediction,differential_expression' --bsj_reads 2 - ``` +## Credits -## Documentation +nf-core/circrna was originally written by [Barry Digby](https://github.com/BarryDigby). +It was later refactored, extended and improved by [Nico Trummer](https://github.com/nictru). -The nf-core/circrna pipeline comes with documentation about the pipeline [usage](https://nf-co.re/circrna/usage), [parameters](https://nf-co.re/circrna/parameters) and [output](https://nf-co.re/circrna/output). +We thank the following people for their extensive assistance in the development of this pipeline (in alphabetical order): -## Credits +- [Alexander Peltzer](https://github.com/apeltzer) +- [Ben Whittle](https://github.com/bj-w) +- [Kevin Menden](https://github.com/KevinMenden) +- [Malte Weyrich](https://github.com/mweyrich28) +- [Marieke Vromman](https://github.com/MariekeVromman) +- [Maxime Garcia](https://github.com/maxulysse) +- [Phil Ewels](https://github.com/ewels) -nf-core/circrna was originally written by Barry Digby. +## Acknowledgements -We thank the following people for their extensive assistance in the development of this pipeline: +![SFI](./docs/images/Genomics-Data-Science-original.png) ## Contributions and Support @@ -87,7 +148,14 @@ For further information or help, don't hesitate to get in touch on the [Slack `# ## Citations - + + +> **nf-core/circrna: a portable workflow for the quantification, miRNA target prediction and differential expression analysis of circular RNAs.** +> +> Barry Digby, Stephen P. Finn, & Pilib Ó Broin +> +> [BMC Bioinformatics 24, 27 (2023)](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-022-05125-8) +> doi: [10.1186/s12859-022-05125-8](https://doi.org/10.1186/s12859-022-05125-8) An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. diff --git a/assets/email_template.html b/assets/email_template.html index 03b6ba6ed..ddd621265 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -12,7 +12,7 @@ -

nf-core/circrna v${version}

+

nf-core/circrna ${version}

Run Name: $runName

<% if (!success){ diff --git a/assets/email_template.txt b/assets/email_template.txt index 65572a8f2..927b5b6a4 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -4,7 +4,7 @@ |\\ | |__ __ / ` / \\ |__) |__ } { | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, `._,._,' - nf-core/circrna v${version} + nf-core/circrna ${version} ---------------------------------------------------- Run Name: $runName diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index 32e39806b..3affe6b57 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -3,17 +3,21 @@ description: "Suggested text and references to use when describing pipeline usag section_name: "nf-core/circrna Methods Description" section_href: "https://github.com/nf-core/circrna" plot_type: "html" -## TODO nf-core: Update the HTML below to your prefered methods description, e.g. add publication citation for this pipeline +## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline ## You inject any metadata in the Nextflow '${workflow}' object data: |

Methods

-

Data was processed using nf-core/circrna v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020).

+

Data was processed using nf-core/circrna v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

${workflow.commandLine}
+

${tool_citations}

References

    -
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. https://doi.org/10.1038/nbt.3820
  • -
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. https://doi.org/10.1038/s41587-020-0439-x
  • +
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. doi: 10.1038/nbt.3820
  • +
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. doi: 10.1038/s41587-020-0439-x
  • +
  • Grüning, B., Dale, R., Sjödin, A., Chapman, B. A., Rowe, J., Tomkins-Tinch, C. H., Valieris, R., Köster, J., & Bioconda Team. (2018). Bioconda: sustainable and comprehensive software distribution for the life sciences. Nature Methods, 15(7), 475–476. doi: 10.1038/s41592-018-0046-7
  • +
  • da Veiga Leprevost, F., Grüning, B. A., Alves Aflitos, S., Röst, H. L., Uszkoreit, J., Barsnes, H., Vaudel, M., Moreno, P., Gatto, L., Weber, J., Bai, M., Jimenez, R. C., Sachsenberg, T., Pfeuffer, J., Vera Alvarez, R., Griss, J., Nesvizhskii, A. I., & Perez-Riverol, Y. (2017). BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics (Oxford, England), 33(16), 2580–2582. doi: 10.1093/bioinformatics/btx192
  • + ${tool_bibliography}
Notes:
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index ecd398c22..570f6c0dc 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/circrna + This report has been generated by the nf-core/circrna analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-circrna-methods-description": order: -1000 @@ -11,3 +11,5 @@ report_section_order: order: -1002 export_plots: true + +disable_version_detection: true diff --git a/assets/nf-core-circrna_logo_light.png b/assets/nf-core-circrna_logo_light.png index b1316cff2..85a200b32 100644 Binary files a/assets/nf-core-circrna_logo_light.png and b/assets/nf-core-circrna_logo_light.png differ diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 5f653ab7b..8d222a80e 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,3 @@ -sample,fastq_1,fastq_2 -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz, +sample,fastq_1,fastq_2,strandedness +SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,forward +SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,auto diff --git a/assets/schema_annotation.json b/assets/schema_annotation.json new file mode 100644 index 000000000..4158373d5 --- /dev/null +++ b/assets/schema_annotation.json @@ -0,0 +1,34 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/circrna/master/assets/schema_annotation.json", + "title": "nf-core/circrna pipeline - params.annotation schema", + "description": "Schema for the file provided with params.annotation", + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Annotation file name must be provided and cannot contain spaces", + "meta": ["id"] + }, + "file": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.bed$", + "errorMessage": "Annotation file must be provided and must be a BED file" + }, + "min_overlap": { + "type": "number", + "minimum": 0, + "maximum": 1, + "default": 0.9, + "errorMessage": "Minimum overlap must be a number between 0 and 1", + "meta": ["min_overlap"] + } + }, + "required": ["name", "file"] + } +} diff --git a/assets/schema_input.json b/assets/schema_input.json index 41716366c..7e9257a06 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -10,25 +10,29 @@ "sample": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces" + "errorMessage": "Sample name must be provided and cannot contain spaces", + "meta": ["id"] }, "fastq_1": { "type": "string", + "format": "file-path", + "exists": true, "pattern": "^\\S+\\.f(ast)?q\\.gz$", "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" }, "fastq_2": { - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + }, + "strandedness": { + "type": "string", + "enum": ["unstranded", "forward", "reverse", "auto"], + "default": "auto", + "errorMessage": "Strandedness must be one of 'unstranded', 'forward', 'reverse' or 'auto'", + "meta": ["strandedness"] } }, "required": ["sample", "fastq_1"] diff --git a/assets/schema_phenotype.json b/assets/schema_phenotype.json new file mode 100644 index 000000000..bdeeaef82 --- /dev/null +++ b/assets/schema_phenotype.json @@ -0,0 +1,23 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/circrna/master/assets/schema_phenotype.json", + "title": "nf-core/circrna pipeline - params.phenotype schema", + "description": "Schema for the file provided with params.phenotype", + "type": "array", + "items": { + "type": "object", + "properties": { + "sample": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Sample name must be provided and cannot contain spaces" + }, + "condition": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Condition name must be provided and cannot contain spaces" + } + }, + "required": ["sample", "condition"] + } +} diff --git a/assets/slackreport.json b/assets/slackreport.json index 043d02f27..aa9256200 100644 --- a/assets/slackreport.json +++ b/assets/slackreport.json @@ -3,7 +3,7 @@ { "fallback": "Plain-text summary of the attachment.", "color": "<% if (success) { %>good<% } else { %>danger<%} %>", - "author_name": "sanger-tol/readmapping v${version} - ${runName}", + "author_name": "nf-core/circrna ${version} - ${runName}", "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", "fields": [ diff --git a/bin/DEA.R b/bin/DEA.R deleted file mode 100755 index 5e1658f70..000000000 --- a/bin/DEA.R +++ /dev/null @@ -1,656 +0,0 @@ -#!/usr/bin/env Rscript - -# Automated differential expression analysis script for nf-core/circrna -# Relies on Stirngtie prepde.py outputs -# Would be fun to adapt to STAR + Kallisto for own use in future. - -get_args <- function(){ - - argp <- arg_parser( - description="this script is designed to automate DESeq2 analysis for circRNA nextflow script", - hide.opts=TRUE) - - argp <- add_argument( - parser=argp, - arg="gene_counts", - short="g", - help="gene_count_matrix.csv produced by prepDE.py downstream of Stringtie quant with -e flag", - default="gene_count_matrix.csv") - - argp <- add_argument( - parser=argp, - arg="phenotype", - short="p", - help="file containing sample metadata information to guide the design", - default="phenotype.csv") - - argp <- add_argument( - parser=argp, - arg="circRNA", - short="c", - help="circRNA counts matrix", - default="circRNA_matrix.txt") - - argp <- add_argument( - parser=argp, - arg="species", - short="s", - help="species ID", - default="hsa") - - argp <- add_argument( - parser=argp, - arg="map", - short="m", - help="ensDB", - "default"="ensemblDatabase.txt") - - argv <- parse_args( - parser=argp, - argv = commandArgs(trailingOnly = TRUE)) - - return(argv) - -} - -giveError <- function(message){ - cat(paste("\n", message, sep="")) - quit() -} - -usage <- function(){giveError("USAGE: DEA.R ")} - - -stage_data <- function(gene_counts, phenotype, circRNA, species, map){ - - inputdata <- list() - - dbmap <- read.table(map, sep="\t", header=T, quote="", stringsAsFactors=FALSE) - gene_mat <- read.csv(gene_counts, row.names="gene_id", check.names=F) - circ <- read.table(circRNA, sep ="\t", header = T, stringsAsFactors=FALSE) - - # Merge circRNA genomic loci to ID - circ$circ <- with(circ, paste0(Chr, sep=":", Start, sep="-", Stop, sep=":", Strand)) - rownames(circ) <- circ$circ - circ <- subset(circ, select=-c(Chr, Start, Stop, Strand, circ)) - - # R converts '-' to '.' in colnames here and results in failures. - # If you need to make this 'smarter' check that colnames contains '.', - # compare gsub to colnames(gene_mat), then apply. - colnames(circ) <- gsub("\\.", "-", colnames(circ)) - - ## add pseudocount of 1 - gene_mat <- gene_mat + 1 - circ <- circ + 1 - - inputdata$pheno <- checkinputdata(phenotype) - cols <- rownames(inputdata$pheno) - - if(identical(rownames(inputdata$pheno), colnames(gene_mat))){ - circ <- circ[, cols,] - }else{ - giveError(c("Samples in phenotype file do not match sequencing sample names.\n", - "Please check that phenotype samples match gene_count_matrix.csv headers.\n", - "*Make sure they are sorted in alphabetical order:", - "tail -n +2 phenotype.txt | sort -k1,1\n\n")) - } - - inputdata$gene <- gene_mat - inputdata$circ <- circ - inputdata$design <- makedesign(inputdata$pheno) - inputdata$species <- species - inputdata$map <- dbmap - - inputdata$gene <- ens2symbol(inputdata$gene, inputdata) - - save.image(file='inputdata.RData') - - return(inputdata) -} - - -checkinputdata <- function(phenotype){ - - # Stage phenotype file - pheno <- read.csv(phenotype, row.names=1, header = T, stringsAsFactors = T) - - # Check if there are at least 3 replicates (DESeq2 fails if < 3) - if(min(table(pheno$condition)) >= 3){ - print("Suitable sample size for DE analysis") - }else{ - giveError("Not enough samples per condition to perform DE analysis!") - } - - # Rename sex to gender.. - if("sex" %in% names(pheno)){ - print("Renaming sex to gender in phenotype file") - rename <- gsub("sex", "gender", names(pheno)) - names(pheno) <- rename - } - - # Check gender is only male, female, unknown - if ("gender" %in% names(pheno)) { - if (! all(unique(pheno$gender) %in% c("m", "f", "u"))) { - giveError("SAMPLEINFO ERROR:\nOnly the values m [male], f [female] and u [unknown] are supported in field .\n") - } - } - - ## check if all columns are factors. If numeric, convert to factor. - factor_cols <- sapply(pheno, is.factor) - if(all(factor_cols) == TRUE){ - print("All columns in phenotype are factors and suitable for analysis.") - }else{ - numeric_cols <- sapply(pheno, is.numeric) - names <- colnames(pheno)[numeric_cols] - print(paste0("Column(s) ", names, " is numeric. Converting to factor.")) - pheno[numeric_cols] <- as.data.frame(lapply(pheno[numeric_cols], factor)) - final_check <- sapply(pheno, is.factor) - if(all(final_check) == TRUE){ - print("Finished coverting to factor") - }else{ - giveError("Error in converting to factors. See checkinputdata function.") - } - } - - return(pheno) - -} - - - -makedesign <- function(phenotype){ - - # Covariates i.e explanatory variables. - covariates <- names(phenotype)[which(!names(phenotype) %in% c("condition"))] - design <- formula(paste("~", paste(c(covariates, "condition"), sep="", collapse=" + "))) - return(design) - -} - - - - -ens2symbol <- function(mat, inputdata){ - - ## designed to work on input gene_count_matrix.csv file - ## everything else downstream no longer needs to be converted - - ## figure out if working with ENS, or ENS IDs - - mat <- as.data.frame(mat) - map <- inputdata$map - species <- inputdata$species - - if(all(grepl(pattern="^ENSG", rownames(mat)))){ - filter = "ensembl_gene_id" - if(all(grepl(pattern=".", rownames(mat)))){ - filter = "ensembl_gene_id_version" - } - }else{ - filter = "external_gene_name" - } - - if(filter == "external_gene_name"){ - print("Using external gene name as gene symbol") - }else{ - print("Setting up Mart to convert ENS IDs to gene symbols") - ## set up Mart - mart_call <- as.character(subset(map$command, map$species == species)) - print("ENS2SYMBOL") - mart <- eval(str2expression(mart_call)) - } - - ## now go about converting ENS2SYMBOL - if(filter == "ensembl_gene_id"){ - - mat$ensembl_gene_id <- rownames(mat) - info <- getBM(attributes=c("ensembl_gene_id","external_gene_name"), - filters = c("ensembl_gene_id"), - values = mat$ensembl_gene_id, - mart = mart, - useCache=FALSE) - - tmp <- merge(mat, info, by="ensembl_gene_id") - tmp$external_gene_name <- make.names(tmp$external_gene_name, unique = T) - rownames(tmp) <- tmp$external_gene_name - tmp <- subset(tmp, select=-c(ensembl_gene_id, external_gene_name)) - - mat <- tmp - print("input mat ensembl gene id detected and converted") - return(mat) - }else if(filter == "ensembl_gene_id_version"){ - - mat$ensembl_gene_id_version <- rownames(mat) - info <- getBM(attributes=c("ensembl_gene_id_version","external_gene_name"), - filters = c("ensembl_gene_id_version"), - values = mat$ensembl_gene_id_version, - mart = mart, - useCache=FALSE) - - tmp <- merge(mat, info, by="ensembl_gene_id_version") - tmp$external_gene_name <- make.names(tmp$external_gene_name, unique = T) - rownames(tmp) <- tmp$external_gene_name - tmp <- subset(tmp, select=-c(ensembl_gene_id_version, external_gene_name)) - - mat <- tmp - print("input mat ensembl gene id version detected and converted") - return(mat) - }else{ - print("NO change made to input mat ") - return(mat) - } - -} - -get_upregulated <- function(df){ - - key <- intersect(rownames(df)[which(df$log2FoldChange>=1)], rownames(df)[which(df$pvalue<=0.05)]) - results <- as.data.frame((df)[which(rownames(df) %in% key),]) - return(results) - -} - -get_downregulated <- function(df){ - - key <- intersect(rownames(df)[which(df$log2FoldChange<=-1)], rownames(df)[which(df$pvalue<=0.05)]) - results <- as.data.frame((df)[which(rownames(df) %in% key),]) - return(results) - -} - -# Data type provided at end of script to activate RNA-Seq / circRNA analysis. -DESeq2 <- function(inputdata, data_type){ - - if(data_type == "RNA-Seq"){ - outdir <- "RNA-Seq/" - - dds <- DESeqDataSetFromMatrix( - countData=inputdata$gene, - colData=inputdata$pheno, - design = inputdata$design) - - levels <- as.character(unique(inputdata$pheno$condition)) - for(level in levels){ - reference <- level - contrasts <- levels[levels != paste0(reference)] - dds$condition <- relevel(dds$condition, ref = paste0(reference)) - dds <- DESeq(dds, quiet=TRUE) - - DESeq2_plots(dds, outdir) - - for(var in contrasts){ - contrast <- paste(var, "vs", reference, sep="_") - DEG <- getDESeqDEAbyContrast(dds, contrast, reference, var, outdir, inputdata) - } - - save.image(file='RNA-Seq.RData') - - } - }else if(data_type == "circRNA"){ - outdir <- "circRNA/" - - ## use gene sizeFactors - tmp <- DESeqDataSetFromMatrix( - countData=inputdata$gene, - colData=inputdata$pheno, - design = inputdata$design) - tmp <- DESeq(tmp, quiet=TRUE) - - sizefactors <- estimateSizeFactorsForMatrix(counts(tmp)) - rm(tmp) - - dds <- DESeqDataSetFromMatrix( - countData=inputdata$circ, - colData=inputdata$pheno, - design = inputdata$design) - - levels <- as.character(unique(inputdata$pheno$condition)) - for(level in levels){ - reference <- level - contrasts <- levels[levels != paste0(reference)] - dds$condition <- relevel(dds$condition, ref = paste0(reference)) - dds <- DESeq(dds, quiet=TRUE) - sizeFactors(dds) <- sizefactors - - DESeq2_plots(dds, outdir) - - for(var in contrasts){ - contrast <- paste(var, "vs", reference, sep="_") - DEG <- getDESeqDEAbyContrast(dds, contrast, reference, var, outdir) - } - - save.image(file='circRNA.RData') - } - }else{ - giveError("Data type not provided correctly, check end of script") - } - return(DEG) -} - - -getDESeqDEAbyContrast <- function(dds, contrast, reference, var, outdir, inputdata) { - - res <- results(dds, filterFun=ihw, alpha=0.05, contrast=c("condition", var, reference)) - cat('\n\nSummary data from DESeq2 for ', contrast, ':', sep="") - summary(res) - - ma_plot(res, contrast, outdir) - - up_regulated <- get_upregulated(res) - down_regulated <- get_downregulated(res) - - de_up <- rownames(up_regulated) - de_down <- rownames(down_regulated) - de <- c(de_up, de_down) - cts <- counts(dds, normalized=T) - - # attempt boxplots here - if(outdir == "circRNA/"){ - make_boxplots(de, cts, contrast) - } - - log2 <- log2(cts +1) - global_heatmap(de, log2, contrast, outdir) - - if(outdir == "RNA-Seq/"){ - up_regulated <- tibble::rownames_to_column(up_regulated, "ID") - down_regulated <- tibble::rownames_to_column(down_regulated, "ID") - }else{ - up_regulated <- tibble::rownames_to_column(up_regulated, "ID") - down_regulated <- tibble::rownames_to_column(down_regulated, "ID") - } - - dir <- paste(outdir, contrast, sep="") - dir.create(dir) - write.table(up_regulated, file.path(dir, paste("DESeq2", contrast, "up_regulated_differential_expression.txt", sep="_")), sep="\t", row.names=F, quote=F) - write.table(down_regulated, file.path(dir, paste("DESeq2", contrast, "down_regulated_differential_expression.txt", sep="_")), sep="\t", row.names=F, quote=F) - - res_df <- as.data.frame(res) - - #if(outdir == "RNA-Seq/"){ - # ann_res <- ens2symbol(res_df, inputdata) - #}else{ - # ann_res <- res_df - #} - - volcano_plot(res_df, contrast, outdir) - - pdf(file.path(dir, paste("DESeq2", contrast, "fold_change_distribution.pdf", sep="_")), width=8, height=8) - hist(res$log2FoldChange, breaks=50, col="seagreen", xlab=paste("(Fold change)", contrast, sep=" "), main="Distribution of differential expression fold change") - abline(v=c(-1,1), col="black", lwd=2, lty=2) - legend("topright", "Fold change <-1 and >1", lwd=2, lty=2) - dev.off() - - pdf(file.path(dir, paste("DESeq2", contrast, "pvalue_distribution.pdf", sep="_")), width=8, height=8) - hist(res$pvalue, breaks=50, col="seagreen", xlab=paste("P-Value (Fold change)", contrast, sep=" "), main="Distribution of P-Values") - abline(v=c(0.05),col="black",lwd=2,lty=2) - legend("topright", "P-Value <0.05",lwd=2,lty=2) - dev.off() - - pdf(file.path(dir, paste("DESeq2", contrast, "Adj_pvalue_distribution.pdf", sep="_")), width=8, height=8) - hist(res$padj, breaks=50, col="seagreen", xlab=paste("P-Adj (Fold change)", contrast, sep=" "), main="Distribution of AdjP-Values") - abline(v=c(0.05),col="black",lwd=2,lty=2) - legend("top", "P-Adj <0.05",lwd=2,lty=2) - dev.off() -} - - -DESeq2_plots <- function(dds, outdir){ - - dir.create("DESeq2_QC") - dir.create(paste("DESeq2_QC/", outdir, sep="")) - dir=paste("DESeq2_QC/", outdir, sep="") - - pdf(file.path(dir, "DESeq2_dispersion.pdf"), width=8, height=8) - plotDispEsts(dds) - dev.off() - - counts <- counts(dds, normalized=T) - - if(outdir == "RNA-Seq/"){ - counts <- ens2symbol(counts, inputdata) - log2 <- log2(counts + 1) - }else{ - counts <- as.data.frame(counts) - log2 <- log2(counts + 1) - } - - write_counts <- tibble::rownames_to_column(counts, "ID") - write_log2 <- tibble::rownames_to_column(log2, "ID") - - write.table(write_counts, file.path(outdir, "DESeq2_normalized_counts.txt"), sep="\t", quote=F, row.names = F) - write.table(write_log2, file.path(outdir, "DESeq2_log2_transformed_counts.txt"), sep="\t", quote=F, row.names = F) - - sample_to_sample_heatmap(log2, outdir) - sample_to_sample_dendogram(log2, outdir) - PCA_plot(log2, outdir) - -} - - -ma_plot <- function(res, contrast, outdir){ - dir <- paste(outdir, contrast, sep="") - dir.create(dir) - pdf(file.path(dir, paste("DESeq2", contrast, "MA_plot.pdf", sep="_")), width=8, height=8) - plotMA(res) - dev.off() - -} - -sample_to_sample_heatmap <- function(log2, outdir){ - - dir.create("DESeq2_QC") - dir.create(paste("DESeq2_QC/", outdir, sep="")) - dir=paste("DESeq2_QC/", outdir, sep="") - - sampleDists <- dist(t(log2)) - sampleDistMatrix <- as.matrix(sampleDists) - pdf(file.path(dir, "DESeq2_sample_heatmap.pdf"), width=8, height=8) - pheatmap(mat=sampleDistMatrix, - clustering_distance_rows=sampleDists, - clustering_distance_cols=sampleDists, - col=colorRampPalette( rev(brewer.pal(9, "Blues")) )(255), - fontsize_row=8) - dev.off() - -} - - - -sample_to_sample_dendogram <- function(log2, outdir){ - - dir.create("DESeq2_QC") - dir.create(paste("DESeq2_QC/", outdir, sep="")) - dir=paste("DESeq2_QC/", outdir, sep="") - - d=t(log2) - d=dist(d) - hc=hclust(d, method="complete") - print("test hclust") - print(head(d)) - pdf(file.path(dir, "DESeq2_sample_dendogram.pdf")) - plot(hc) - dev.off() - -} - - -PCA_plot <- function(log2, outdir){ - - p <- pca(log2, metadata=inputdata$pheno) - - for(exp_var in names(inputdata$pheno)){ - dir.create("DESeq2_QC") - dir=paste("DESeq2_QC/", outdir, sep="") - pdf(file.path(dir, paste("DESeq2", exp_var, "PCA.pdf", sep="_"))) - biplot <- biplot(p, - colby=paste(exp_var), - hline=0, - vline=0, - legendPosition="right", - legendLabSize=12, - legendIconSize=8, - lab = TRUE, - labSize = 0.0, - drawConnectors=FALSE, - title="PCA bi-plot", - subtitle="PC1 vs. PC2") - plot(biplot) - dev.off() - } -} - - -volcano_plot <- function(res, contrast, outdir){ - - res <- na.omit(res) - - min_width <- min(res$log2FoldChange) - max_width <- max(res$log2FoldChange) - symmetric_plot <- max(max_width, abs(min_width)) - min_width <- symmetric_plot * -1 - max_width <- symmetric_plot - max_height <- -log10(min(res[res$pvalue>0, 5])) - - up <- subset(res, res$log2FoldChange > 1 & res$pvalue <= 0.05) - up <- up[order(-up$log2FoldChange),] - up_list <- head(rownames(up), n=10L) - - down <- subset(res, res$log2FoldChange < 1 & res$pvalue <= 0.05) - down <- down[order(down$log2FoldChange),] - down_list <- head(rownames(down), n=10L) - - plot_top_20 <- c(up_list, down_list) - - dir <- paste(outdir, contrast, sep="") - dir.create(dir) - pdf(file.path(dir, paste("DESeq2", contrast, "volcano_plot.pdf", sep="_"))) - p <- EnhancedVolcano(res, - lab=rownames(res), - x="log2FoldChange", - y="pvalue", - selectLab=FALSE, - drawConnectors=FALSE, - FCcutoff=1.0, - pCutoff=0.05, - title="Volcano Plot", - subtitle=paste(contrast), - #legendVisible=F, - caption = paste0('Total Genes = ', nrow(res)), - xlim=c(min_width, max_width), - ylim=c(0, max_height), - pointSize = 1.5) - plot(p) - dev.off() - -} - - -global_heatmap <- function(de, log2, contrast, outdir){ - - # Split contrast e.g normal_vs_tumor -> "normal", "tumor" - levels <- unlist(strsplit(contrast, "_vs_")) - pheno <- inputdata$pheno - # subset phenotype file for contrast samples - pheno_subset <- subset(pheno, pheno$condition %in% levels) - # check it worked? - #print(pheno_subset) - # subset log2 counts for contrast samples - mat <- log2[,rownames(pheno_subset)] - # subset de genes/circRNAs - mat <- mat[de,] - # Perform scaling and centering on DE expr data - mat <- t(mat) - mat <- scale(mat, center=T) - mat <- t(mat) - - dir <- paste(outdir, contrast, sep="") - dir.create(dir) - pdf(file.path(dir, paste("DESeq2", contrast, "heatmap.pdf", sep="_"))) - pheatmap(mat, - annotation_col=pheno_subset, - color=greenred(75), - cluster_rows = T, - show_rownames = F) - dev.off() - -} - - - -make_boxplots <- function(de, cts, contrast){ - - pheno <- inputdata$pheno - levels <- unlist(strsplit(contrast, "_vs_")) - pheno_subset <- subset(pheno, pheno$condition %in% levels) - # subset counts for levels of interest - counts <- cts[,rownames(pheno_subset)] - # subset for de genes - counts <- as.data.frame(counts[de,]) - dir.create("boxplots") - dir.create(paste("boxplots/", contrast, sep="")) - dir=paste("boxplots/", contrast, sep="") - for( i in 1:nrow(counts)){ - circ_id <- rownames(counts[i,]); - mat <- as.data.frame(t(counts[i,])); - mat <- cbind(mat, pheno_subset); - names <- c("counts", "condition"); - colnames(mat) <- names; - - p1 <- ggboxplot(mat, x="condition", y="counts", - fill="condition", palette = "npg", - title = paste(circ_id), - ylab = "normalized counts", xlab="", - add = c("dotplot"), - add.params = list(size=0.5, jitter=0.1), - legend = "none", - bxp.errorbar = T, - bxp.errorbar.width = 0.2, width=0.3, - ggtheme = theme_classic()) + - rotate_x_text(angle = 0) + - theme(plot.title = element_text(face = "bold", size=16, hjust = 0.5)) + - theme(axis.text.x = element_text( colour = "black", size=14)) + - theme(axis.title.y = element_text(size=14, face = "italic")) + - theme(axis.title.x = element_blank()) + - theme(axis.text.y = element_text(color = "black", size=10)) - - pdf(file.path(dir, paste(circ_id, "boxplot.pdf", sep="_"))) - plot(p1) - dev.off() - } -} - - -options(error=function()traceback(2)) -suppressPackageStartupMessages(library("argparser")) -#suppressPackageStartupMessages(library("BiocParallel")) -suppressPackageStartupMessages(library("biomaRt")) -suppressPackageStartupMessages(library("DESeq2")) -suppressPackageStartupMessages(library("dplyr")) -#suppressPackageStartupMessages(library("edgeR")) -suppressPackageStartupMessages(library("EnhancedVolcano")) -#suppressPackageStartupMessages(library("EnsDb.Hsapiens.v86")) -#suppressPackageStartupMessages(library("genefilter")) #for rowVars -suppressPackageStartupMessages(library("ggplot2")) -suppressPackageStartupMessages(library("ggpubr")) -#suppressPackageStartupMessages(library("ggrepel")) -#suppressPackageStartupMessages(library("ggfortify")) -suppressPackageStartupMessages(library("gplots")) -suppressPackageStartupMessages(library("IHW")) -#suppressPackageStartupMessages(library("limma")) -#suppressPackageStartupMessages(library("parallel")) -suppressPackageStartupMessages(library("PCAtools")) -suppressPackageStartupMessages(library("pheatmap")) -suppressPackageStartupMessages(library("RColorBrewer")) -#suppressPackageStartupMessages(library("readr")) -#suppressPackageStartupMessages(library("Rsubread")) -#suppressPackageStartupMessages(library("tximport")) -#suppressPackageStartupMessages(library("VennDiagram")) - -arg <- get_args() - -inputdata <- stage_data(arg$gene_counts, arg$phenotype, arg$circRNA, arg$species, arg$map) -dir.create("RNA-Seq") -dir.create("circRNA") -x <- DESeq2(inputdata, "RNA-Seq") -y <- DESeq2(inputdata, "circRNA") diff --git a/bin/ID_to_BED.sh b/bin/ID_to_BED.sh deleted file mode 100755 index 432ef3b8a..000000000 --- a/bin/ID_to_BED.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env bash - -file=$1 - -while IFS='' read -r line; do - - name=$(echo $line) - chr=$(echo $line | cut -d: -f1) - start=$(echo $line | cut -d- -f1 | cut -d: -f2) - stop=$(echo $line | cut -d- -f2 | cut -d: -f1) - sign=$(echo $line | cut -d: -f3) - - echo -e "$chr\t$start\t$stop\t$name\t0\t$sign" >> ${name}.bed - -done < $file diff --git a/bin/annotate_outputs.sh b/bin/annotate_outputs.sh deleted file mode 100755 index 92abc21b0..000000000 --- a/bin/annotate_outputs.sh +++ /dev/null @@ -1,146 +0,0 @@ -#!/usr/bin/env bash - -## -## Expected Input: -## Chr Start Stop name count strand -## name= chr":"start"-"stop":"strand - -echo "====================================================================================" -echo "[nf-core/circrna]: circRNA annotation script " -echo "[nf-core/circrna]: Author: Barry Digby " -echo "[nf-core/circrna]: Institution: National University of Ireland, Galway " -echo "====================================================================================" - -mkdir -p bed12 - -EB=$1 - -while IFS='' read -r line; do - - name=$(echo $line | awk '{print $4}') - count=$(echo $line | awk '{print $5}') - touch ${name}.bed - echo "$line" >> ${name}.bed_tmp - sed 's/[\t]*$//' ${name}.bed_tmp > ${name}.bed && rm ${name}.bed_tmp - - bedtools intersect -a filt.gtf -b ${name}.bed -s -f 1.00 > ${name}.gtf - - start=$(echo $line | awk '{print $2}') - stop=$(echo $line | awk '{print $3}') - - echo "[nf-core/circrna]: Starting analysis for: $name" - - # is the gtf file NOT (-s) empty? i.e did it overlap biotypes? - if [[ -s ${name}.gtf ]]; - then - - echo "[nf-core/circrna]: $name overlaps features in GTF file" - echo "[nf-core/circrna]: Inspecting Genes..." - - gene_id=$(awk -F'gene_id ' '{print $2}' ${name}.gtf | \ - awk -F';' '{print $1}' | sed 's/"//g' | sort -u | paste -s -d, -) - - tx_id=$(awk -F'transcript_id ' '{print $2}' ${name}.gtf | \ - awk -F';' '{print $1}' | sed 's/"//g' | sort -u | paste -s -d, -) - - echo "[nf-core/circrna]: Overlapping Gene IDs: $gene_id" - echo "[nf-core/circrna]: Converting to BED12" - - gtfToGenePred ${name}.gtf ${name}.genepred - genePredToBed ${name}.genepred ${name}_predtobed.bed - - # Attempting perfect exon boundary overlaps - echo "[nf-core/circrna]: Attempting to fit circRNA to gene exon boundaries" - awk -v OFS="\t" -v start="$start" -v stop="$stop" \ - '{if($2==start && $3==stop) print $0}' ${name}_predtobed.bed | \ - sort -rnk10 | head -n 1 > ${name}.bed12.bed - - # Resulting file not empty? i.e perfectly overlapped with exon boundaries? - if [[ -s ${name}.bed12.bed ]]; - then - echo "[nf-core/circrna]: ${name} fits gene exons, is a circRNA" - type="circRNA" - else - - echo "[nf-core/circrna]: circRNA overlaps exons, but not boundaries" - echo "[nf-core/circrna]: Investigating if EIciRNA or acceptable to take underlying transcript" - echo "[nf-core/circrna]: Retrying with longest underlying transcript" - - awk -v OFS="\t" '{$13 = $3 - $2; print}' ${name}_predtobed.bed | \ - sort -rnk13 | cut -f13 --complement | head -n 1 > ${name}.bed12.bed_tmp - - tx_len=$(awk -v OFS="\t" '{$13 = $3 - $2; print}' ${name}_predtobed.bed | \ - sort -rnk13 | awk '{print $13}' | head -n 1) - - circ_len=$(awk -v OFS="\t" '{$7 = $3 - $2; print}' ${name}.bed | awk '{print $7}') - - echo "[nf-core/circrna]: Best transcript length: $tx_len" - echo "[nf-core/circrna]: $name length: $circ_len" - - difference=$(($circ_len - $tx_len)) - - if [[ $difference -gt $EB ]]; - then - - echo "[nf-core/circrna]: Transcript exon boundaries more than ${EB}bp off $name" - echo "[nf-core/circrna]: Treating as EIciRNA" - - type="EIciRNA" - block_count=1 - block_size=$(($stop-$start)) - rgb="0,0,0" - block_start=0 - awk -v OFS="\t" -v thick=$start -v rgb=$rgb -v count=$block_count -v start=$block_start -v size=$block_size \ - '{print $0, thick, thick, rgb, count, size, start}' ${name}.bed > ${name}.bed12.bed - rm ${name}.bed12.bed_tmp - else - - echo "[nf-core/circrna]: Transcript exon boundaries within ${EB}bp ${name}" - echo "[nf-core/circrna]: Treating ${name} as circRNA." - type="circRNA" - mv ${name}.bed12.bed_tmp ${name}.bed12.bed - fi - fi - else - - echo "[nf-core/circrna]: $name returned no GTF overlaps." - echo "[nf-core/circrna]: Treating as an intronic circRNA" - - gene_id="NA" - tx_id="NA" - type="ciRNA" - block_count=1 - block_size=$(($stop-$start)) - rgb="0,0,0" - block_start=0 - awk -v OFS="\t" -v thick=$start -v rgb=$rgb -v count=$block_count -v start=$block_start -v size=$block_size \ - '{print $0, thick, thick, rgb, count, size, start}' ${name}.bed > ${name}.bed12.bed - fi - # add type, geneid tx_id and count - awk -v type="$type" -v gene="$gene_id" -v tx="$tx_id" -v count="$count" 'BEGIN{FS=OFS="\t"}{$5=count;$13=type;$14=gene;$15=tx}1' ${name}.bed12.bed > ${name}.bed12.bed_tmp - awk -v OFS="\t" -v name=$name '{$4 = name; print}' ${name}.bed12.bed_tmp > ${name}.bed12.bed_tmp1 - - rm ${name}.bed12.bed - rm ${name}.bed12.bed_tmp - mv ${name}.bed12.bed_tmp1 ${name}.bed12.bed - echo "[nf-core/circrna]: cleaning up intermediate files" - rm -f ${name}.gtf - rm -f ${name}.genepred - rm -f ${name}_predtobed.bed - rm -f ${name}.bed - - cp ${name}.bed12.bed bed12/ - rm -rf ${name}.bed12.bed - echo "====================================================================================" - echo "====================================================================================" - -done < circs.bed - -# remove the trailing commas that appear on end of Exon Size, Exon Starts (col 11, 12) -cat bed12/*.bed12.bed > master_bed12.bed.tmp - -awk 'BEGIN{FS=OFS="\t"} {gsub(/,$/,"",$11);gsub(/,$/,"",$12)} 1' master_bed12.bed.tmp > master_bed12.bed && rm master_bed12.bed.tmp - -echo " Thank you for using nf-core/circrna! - Barry " -echo "====================================================================================" -echo "====================================================================================" diff --git a/bin/backsplice_gen.sh b/bin/backsplice_gen.sh deleted file mode 100755 index 2801c52bf..000000000 --- a/bin/backsplice_gen.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env bash - -## .fa = backsplice .fasta = canonical to publish - -file_prefix=$(basename $1 .fa) - -# Split multi-fasta file into single records -mkdir canonical_seqs && mv $1 canonical_seqs/ -ls -d canonical_seqs/*.fa | while read -r line; do -cat $line | awk '{if (substr($0, 1, 1)==">") {filename=(substr($0,2) ".fa")} print $0 > filename}' -done - -# add backsplice site to individual fasta files -for file in *.fa; do - fn=$(basename $file .fa) - cat $file | grep ">" > header.txt - cat $file | grep -v ">" | cut -c1-20 > 20bp.txt - cat $file | grep -v ">" > seqs.txt - paste seqs.txt 20bp.txt | sed -e 's/\t//' > seqs_20bp.txt - paste header.txt seqs_20bp.txt | sed -e 's/\t/\n/' > ${fn}.fasta - rm header.txt 20bp.txt seqs.txt seqs_20bp.txt -done - -# rm single fasta entries from while read line -rm *.fa -# merge backsplice fastas into one for miRNA pred -cat *.fasta > ${file_prefix}.fa -# remove intermediate for loop fasta file -rm *.fasta -# move canonical seqs back to publish to outdir -ls -d canonical_seqs/*.fa | while read -r line; do -mv $line ${file_prefix}.fasta -done diff --git a/bin/check_empty.sh b/bin/check_empty.sh deleted file mode 100755 index f45532d0e..000000000 --- a/bin/check_empty.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/bash - -## When consolidating tool outputs for a sample, check if any are empty. -## This will prevent R from throwing errors - -for i in *bed; do - - if [[ -s $i ]]; - then - : - else - echo "${i}" >> remove_empty_sample.txt - fi - -done - -## sample.csv contains all tool bed files by default -mv samples.csv checkme.csv - -## if there are empty tool outputs then remove them from samples.csv and rewrite. -if [[ -s remove_empty_sample.txt ]]; -then - grep -vf remove_empty_sample.txt checkme.csv > samples.csv -else - mv checkme.csv samples.csv -fi diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py deleted file mode 100755 index aea3b88b5..000000000 --- a/bin/check_samplesheet.py +++ /dev/null @@ -1,183 +0,0 @@ -#!/usr/bin/env python3 - -""" - Borrowed from nf-core/rnaseq, the original from template 2.6 kept breaking at: - "[CRITICAL] The given sample sheet does not appear to contain a header." - Even though the input samplesheet looked to be perfectly fine using vim :set list. - - Strandedness commented out should you wish to include this in future releases. -""" - - -import os -import sys -import errno -import argparse - - -def parse_args(args=None): - Description = "Reformat nf-core/rnaseq samplesheet file and check its contents." - Epilog = "Example usage: python check_samplesheet.py " - - parser = argparse.ArgumentParser(description=Description, epilog=Epilog) - parser.add_argument("FILE_IN", help="Input samplesheet file.") - parser.add_argument("FILE_OUT", help="Output file.") - return parser.parse_args(args) - - -def make_dir(path): - if len(path) > 0: - try: - os.makedirs(path) - except OSError as exception: - if exception.errno != errno.EEXIST: - raise exception - - -def print_error(error, context="Line", context_str=""): - error_str = f"ERROR: Please check samplesheet -> {error}" - if context != "" and context_str != "": - error_str = f"ERROR: Please check samplesheet -> {error}\n{context.strip()}: '{context_str.strip()}'" - print(error_str) - sys.exit(1) - - -def check_samplesheet(file_in, file_out): - """ - This function checks that the samplesheet follows the following structure: - sample,fastq_1,fastq_2,strandedness - SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz,forward - SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz,forward - SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz,,forward - For an example see: - https://github.com/nf-core/test-datasets/blob/rnaseq/samplesheet/v3.1/samplesheet_test.csv - """ - - sample_mapping_dict = {} - with open(file_in, "r", encoding="utf-8-sig") as fin: - - ## Check header - MIN_COLS = 2 ## edit by BDigby as not using strandedness yet. - # HEADER = ["sample", "fastq_1", "fastq_2", "strandedness"] - HEADER = ["sample", "fastq_1", "fastq_2"] - header = [x.strip('"') for x in fin.readline().strip().split(",")] - if header[: len(HEADER)] != HEADER: - print(f"ERROR: Please check samplesheet header -> {','.join(header)} != {','.join(HEADER)}") - sys.exit(1) - - ## Check sample entries - for line in fin: - if line.strip(): - lspl = [x.strip().strip('"') for x in line.strip().split(",")] - - ## Check valid number of columns per row - if len(lspl) < len(HEADER): - print_error( - f"Invalid number of columns (minimum = {len(HEADER)})!", - "Line", - line, - ) - - num_cols = len([x for x in lspl if x]) - if num_cols < MIN_COLS: - print_error( - f"Invalid number of populated columns (minimum = {MIN_COLS})!", - "Line", - line, - ) - - ## Check sample name entries - # sample, fastq_1, fastq_2, strandedness = lspl[: len(HEADER)] - sample, fastq_1, fastq_2 = lspl[: len(HEADER)] - if sample.find(" ") != -1: - print(f"WARNING: Spaces have been replaced by underscores for sample: {sample}") - sample = sample.replace(" ", "_") - if not sample: - print_error("Sample entry has not been specified!", "Line", line) - - ## Check FastQ file extension - for fastq in [fastq_1, fastq_2]: - if fastq: - if fastq.find(" ") != -1: - print_error("FastQ file contains spaces!", "Line", line) - if not fastq.endswith(".fastq.gz") and not fastq.endswith(".fq.gz"): - print_error( - "FastQ file does not have extension '.fastq.gz' or '.fq.gz'!", - "Line", - line, - ) - - ## Check strandedness - # strandednesses = ["unstranded", "forward", "reverse"] - # if strandedness: - # if strandedness not in strandednesses: - # print_error( - # f"Strandedness must be one of '{', '.join(strandednesses)}'!", - # "Line", - # line, - # ) - # else: - # print_error( - # f"Strandedness has not been specified! Must be one of {', '.join(strandednesses)}.", - # "Line", - # line, - # ) - - ## Auto-detect paired-end/single-end - sample_info = [] ## [single_end, fastq_1, fastq_2, strandedness] - if sample and fastq_1 and fastq_2: ## Paired-end short reads - # sample_info = ["0", fastq_1, fastq_2, strandedness] - sample_info = ["0", fastq_1, fastq_2] - elif sample and fastq_1 and not fastq_2: ## Single-end short reads - # sample_info = ["1", fastq_1, fastq_2, strandedness] - sample_info = ["1", fastq_1, fastq_2] - else: - print_error("Invalid combination of columns provided!", "Line", line) - - ## Create sample mapping dictionary = {sample: [[ single_end, fastq_1, fastq_2, strandedness ]]} - if sample not in sample_mapping_dict: - sample_mapping_dict[sample] = [sample_info] - else: - if sample_info in sample_mapping_dict[sample]: - print_error("Samplesheet contains duplicate rows!", "Line", line) - else: - sample_mapping_dict[sample].append(sample_info) - - ## Write validated samplesheet with appropriate columns - if len(sample_mapping_dict) > 0: - out_dir = os.path.dirname(file_out) - make_dir(out_dir) - with open(file_out, "w") as fout: - # fout.write(",".join(["sample", "single_end", "fastq_1", "fastq_2", "strandedness"]) + "\n") - fout.write(",".join(["sample", "single_end", "fastq_1", "fastq_2"]) + "\n") - for sample in sorted(sample_mapping_dict.keys()): - - ## Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end - if not all(x[0] == sample_mapping_dict[sample][0][0] for x in sample_mapping_dict[sample]): - print_error( - f"Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end!", - "Sample", - sample, - ) - - ## Check that multiple runs of the same sample are of the same strandedness - # if not all(x[-1] == sample_mapping_dict[sample][0][-1] for x in sample_mapping_dict[sample]): - # print_error( - # f"Multiple runs of a sample must have the same strandedness!", - # "Sample", - # sample, - # ) - - for idx, val in enumerate(sample_mapping_dict[sample]): - fout.write(",".join([f"{sample}_T{idx+1}"] + val) + "\n") - else: - print_error(f"No entries to process!", "Samplesheet: {file_in}") - - -def main(args=None): - args = parse_args(args) - check_samplesheet(args.FILE_IN, args.FILE_OUT) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/bin/circRNA_counts_matrix.py b/bin/circRNA_counts_matrix.py deleted file mode 100755 index bcd6ff967..000000000 --- a/bin/circRNA_counts_matrix.py +++ /dev/null @@ -1,41 +0,0 @@ -import sys, glob -from collections import defaultdict - -par = sys.argv -hld = defaultdict(list) -samps = defaultdict(list) - -path = "./" -files = [f for f in glob.glob("*.bed")] - -for fi in files: - name = fi.strip("./").split(".bed")[0] - with open(fi) as IN: - for li in IN: - fds = li.strip().split() - k = "_".join(fds[0:4]) - if k in samps: - samps[k].append((name, fds[4])) - else: - samps[k] = [(name, fds[4])] - - -def Diff(list1, list2): - return list(list(set(list1) - set(list2)) + list(set(list2) - set(list1))) - - -tmp = [x.replace(".bed", "") for x in files] -print("Chr\tStart\tStop\tStrand\t" + "\t".join([x for x in tmp])) - -for k, v in samps.items(): - disj = Diff(tmp, [a_tuple[0] for a_tuple in v]) - for val in disj: - v.append((val, "0")) - # print(" ".join(k.split("_")),v) - li = [] - for h in tmp: - for x in v: - if x[0] == h: - li.append(x[1]) - print("\t".join(k.split("_")) + "\t" + "\t".join(li)) -sys.exit() diff --git a/bin/consolidate_algorithms_intersection.R b/bin/consolidate_algorithms_intersection.R deleted file mode 100755 index a147c1057..000000000 --- a/bin/consolidate_algorithms_intersection.R +++ /dev/null @@ -1,128 +0,0 @@ -#!/usr/bin/Rscript - -get_args <- function(){ - - argp <- arg_parser( - description="Script to take tool outputs and bring forward circRNAs called by at least n tools", - hide.opts=TRUE) - - argp <- add_argument( - parser=argp, - arg="samples", - short="s", - help="csv file listing tool output files that are not empty", - default="samples.csv") - - argp <- add_argument( - parser=argp, - arg="n_tools", - short="n", - help="number of tools circrna must be called by") - - argp <- add_argument( - parser=argp, - arg="duplicates_fun", - short="d", - help="how to handle counts for duplicate circRNAs [mean, max]") - - argv <- parse_args( - parser=argp, - argv=commandArgs(trailingOnly=TRUE)) - - return(argv) -} - -giveError <- function(message){ - cat(paste("\n", message, sep="")) - quit() -} - -usage <- function(){giveError("Usage: consolidate_algorithms.R samples.csv ${params.n_tools}")} - -## script body - -stage_data <- function(samples, n_tools, duplicates_fun){ - - inputdata <- list() - - samples <- read.csv(samples, sep="\t", header=F, stringsAsFactors=FALSE) - inputdata$samples <- samples - inputdata$n_tools <- n_tools - inputdata$duplicates_fun <- duplicates_fun - - return(inputdata) -} - - -## read in files and make circRNA IDs. - -main <- function(inputdata){ - - samples <- inputdata$samples - n_tools <- inputdata$n_tools - duplicates_fun <- inputdata$duplicates_fun - - ## intit lists - dflist <- list() - idlist <- list() - - # loop over samples, append to counts mat and IDs to respective lists - for(i in 1:nrow(samples)){ - file_handle <- file.path(paste("./", samples$V1[i], sep="")) - df <- read.table(file_handle, sep="\t", header=F, stringsAsFactors=FALSE) - dflist[[i]] <- read.table(file_handle, sep="\t", header=F, stringsAsFactors=FALSE) - idlist[[i]] <- with(df, paste0(V1, sep="-", V2, sep=":", V3, sep="-", V4)) - } - - # place all ids in a vector - vec <- unlist(idlist) - - # make table to get ID ocurrence count - tab <- table(vec) - - # now filter by ocurrence i.e must have been in "n" tools - filt_id <- names(tab)[tab >= n_tools] - - # make matser df, append ID - mat <- do.call("rbind", dflist) - mat$ID <- with(mat, paste0(V1, sep="-", V2, sep=":", V3, sep="-", V4)) - - # extract circrnas called by n tools - mat <- subset(mat, mat$ID %in% filt_id) - print(head(mat)) - # handle duplicate circRNA counts: - if(duplicates_fun == "max"){ - - # Take the max value for duplicate circRNAs - mat <- mat[order(mat$ID, -abs(mat$V5)),] - mat <- mat[!duplicated(mat$ID),] - mat <- subset(mat, select=-c(ID)) - }else{ - - # Remove duplicates like above to generate row indices - tmp <- mat[order(mat$ID, -abs(mat$V5)),] - tmp <- tmp[!duplicated(tmp$ID),] - # Now Take average values, append back to tmp. - mat <- setNames(aggregate(mat$V5, list(mat$ID), FUN=mean), c("ID", "V5")) - stopifnot(nrow(tmp)==nrow(mat)) - tmp <- subset(tmp, select=c(V1,V2,V3,V4)) - mat <- cbind(tmp,mat) - # round up to nearest integer - mat$V5 <- ceiling(mat$V5) - mat <- subset(mat, select=-c(ID)) - } - - write.table(mat, "combined_counts.bed", sep="\t", row.names = F, col.names = F, quote = F) - -} - -## error messages, library load -options(error=function()traceback(2)) -suppressPackageStartupMessages(library("argparser")) - -## initiate script -arg <- get_args() - -inputdata <- stage_data(arg$samples, arg$n_tools, arg$duplicates_fun) - -x <- main(inputdata) diff --git a/bin/ensembl_database_map.txt b/bin/ensembl_database_map.txt deleted file mode 100755 index 86e6bb555..000000000 --- a/bin/ensembl_database_map.txt +++ /dev/null @@ -1,18 +0,0 @@ -species command -cel useMart(biomart = "ensembl", dataset = "celegans_gene_ensembl", host="https://www.ensembl.org", archive=FALSE) -hsa useMart(biomart = "ensembl", dataset = "hsapiens_gene_ensembl", host="https://www.ensembl.org", archive=FALSE) -mmu useMart(biomart = "ensembl", dataset = "mmusculus_gene_ensembl", host="https://www.ensembl.org", archive=FALSE) -ath useMart(biomart = "plants_mart", dataset = "athaliana_eg_gene", host="plants.ensembl.org", archive=FALSE) -bta useMart(biomart = "ensembl", dataset = "btaurus_gene_ensembl", host="https://www.ensembl.org", archive=FALSE) -cfa useMart(biomart = "ensembl", dataset = "clfamiliaris_gene_ensembl", host="https://www.ensembl.org", archive=FALSE) -dre useMart(biomart = "ensembl", dataset = "drerio_gene_ensembl", host="https://www.ensembl.org", archive=FALSE) -dme useMart(biomart = "ensembl", dataset = "dmelanogaster_gene_ensembl", host="https://www.ensembl.org", archive=FALSE) -eca useMart(biomart = "ensembl", dataset = "ecaballus_gene_ensembl", host="https://www.ensembl.org", archive=FALSE) -gga useMart(biomart = "ensembl", dataset = "ggallus_gene_ensembl", host="https://www.ensembl.org", archive=FALSE) -osa useMart(biomart = "plants_mart", dataset = "osativa_eg_gene", host="plants.ensembl.org", archive=FALSE) -ptr useMart(biomart = "ensembl", dataset = "ptroglodytes_gene_ensembl", host="https://www.ensembl.org", archive=FALSE) -rno useMart(biomart = "ensembl", dataset = "rnorvegicus_gene_ensembl", host="https://www.ensembl.org", archive=FALSE) -sbi useMart(biomart = "plants_mart", dataset = "sbicolor_eg_gene", host="plants.ensembl.org", archive=FALSE) -ssc useMart(biomart = "ensembl", dataset = "sscrofa_gene_ensembl", host="https://www.ensembl.org", archive=FALSE) -zma useMart(biomart = "plants_mart", dataset = "zmays_eg_gene", host="plants.ensembl.org", archive=FALSE) -mml useMart(biomart = "ensembl", dataset = "mmulatta_gene_ensembl", host="https://www.ensembl.org", archive=FALSE) diff --git a/bin/prepare_circ_test.R b/bin/prepare_circ_test.R deleted file mode 100755 index 065119d52..000000000 --- a/bin/prepare_circ_test.R +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env Rscript - -circ_mat = read.table("count_matrix.txt", header=T, sep="\t", check.names = FALSE, stringsAsFactors = F, row.names = "ID") -gene_mat = read.table("gene_count_matrix.csv", sep=",", header=T, row.names="gene_id", stringsAsFactors = F) -map = read.table("circrna_host-gene.txt", header = F, sep="\t", stringsAsFactors = F) - -# some circrnas do not have a host gene. -map <- na.omit(map) -colnames(map) <- c("circrna", "gene") - -# resolve multiple host genes by creating a new 'map' dataframe -# to enforce 1-1 mapping. - -new_circ = c() -new_gene = c() - -for(i in 1:nrow(map)){ - row <- map[i,] - circ <- row$circrna - gene <- row$gene - - # multiple host genes? - multiple_genes <- unlist(strsplit(gene, ",")) - #print(multiple_genes) - if(length(multiple_genes) > 1){ - for(gene in multiple_genes){ - new_circ <- c(new_circ, circ) - new_gene <- c(new_gene, gene) - } - } - new_circ <- c(new_circ, circ) - new_gene <- c(new_gene, gene) -} - -new_map <- data.frame(new_circ, new_gene) - -#create circTest dataframe. really odd formatting..! -new_circ_mat <- circ_mat[c(new_map$new_circ),] - -# unique names for duplicated circrna ids, resolve below -Chr <- c() -Start <- c() -End <- c() -for(i in 1:nrow(new_circ_mat)){ - row <- new_circ_mat[i,] - print(rownames(row)) - chr_ <- unlist(strsplit(rownames(row), ':'))[1] - coords <- unlist(strsplit(rownames(row), ':'))[2] - start_ <- unlist(strsplit(coords, '-'))[1] - end_ <- unlist(strsplit(coords, '-'))[2] - - Chr <- c(Chr, chr_) - Start <- c(Start, start_) - End <- c(End, end_) - -} - -Gene <- new_map$new_gene -circ_csv <- data.frame(Chr, Start, End, Gene, new_circ_mat) -# DROP the rownames, do not write to file. - -gene_csv <- gene_mat[c(new_map$new_gene),] -gene_csv <- data.frame(Chr, Start, End, Gene, gene_csv) - -write.csv(circ_csv, "circ.csv", quote=F, row.names = FALSE) -write.csv(gene_csv, "linear.csv", quote=F, row.names = FALSE) diff --git a/bin/reformat_count_matrix.R b/bin/reformat_count_matrix.R deleted file mode 100755 index c78b48d70..000000000 --- a/bin/reformat_count_matrix.R +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/env Rscript -library(dplyr) -mat <- read.table("circRNA_matrix.txt", sep="\t", header=T, stringsAsFactors=F) -mat$ID <- with(mat, paste0(Chr, sep=":", Start, sep="-", Stop, sep=":", Strand)) -mat <- mat[,-c(1:4)] -mat1 <- mat %>% select(ID, everything()) -ID <- as.data.frame(mat1$ID) -mat <- as.data.frame(subset(mat1, select=-c(ID))) -mat <- mat[, order(names(mat))] -mat1 <- cbind(ID, mat) -colnames(mat1)[1] <- "ID" -write.table(mat1, "count_matrix.txt", sep="\t", row.names=F, col.names=T, quote=F) diff --git a/bin/targetscan_format.sh b/bin/targetscan_format.sh index 2a23c9f62..9321221bb 100755 --- a/bin/targetscan_format.sh +++ b/bin/targetscan_format.sh @@ -1,4 +1,7 @@ -#!/usr/bin/env +#!/usr/bin/env bash + +## Author: Barry Digby +## License: MIT ## Script that converts miRbase (mature.fa) file to ## TargetScan compatability. The motivation for doing @@ -15,7 +18,6 @@ ## Stage input mature.fa file, species MATURE="$1" -#GENOME_ID="$2" ## Uncompress if necessary if [ ${MATURE: -3} == ".gz" ]; then @@ -35,7 +37,7 @@ paste miR_ID seed_sequence > targetscan_tmp.txt ## Correct delimiter, add dummy species awk -v OFS="\t" '{print $1, $2, "0000"}' targetscan_tmp.txt > mature.txt -## Tidy the work dir (uncomment these for debugging scratch dirs) +## Tidy the work dir (comment these for debugging scratch dirs) rm -rf mature_sequence rm -rf miR_ID rm -rf targetscan_tmp.txt diff --git a/bin/unwanted_biotypes.txt b/bin/unwanted_biotypes.txt deleted file mode 100755 index ef5a6b2b6..000000000 --- a/bin/unwanted_biotypes.txt +++ /dev/null @@ -1,55 +0,0 @@ -IG_C_gene -IG_D_gene -IG_J_gene -IG_LV_gene -IG_V_gene -TR_C_gene -TR_J_gene -TR_V_gene -TR_D_gene -IG_pseudogene -IG_C_pseudogene -IG_J_pseudogene -IG_V_pseudogene -TR_V_pseudogene -TR_J_pseudogene -Mt_rRNA -Mt_tRNA -miRNA -misc_RNA -rRNA -scRNA -snRNA -snoRNA -ribozyme -sRNA -scaRNA -TEC -Mt_tRNA_pseudogene -tRNA_pseudogene -snoRNA_pseudogene -snRNA_pseudogene -scRNA_pseudogene -rRNA_pseudogene -misc_RNA_pseudogene -miRNA_pseudogene -vaultRNA/vault_RNA -disrupted_domain -macro_lncRNA -artifact -unitary_pseudogene -GL000009.2 -GL000194.1 -GL000195.1 -GL000205.2 -GL000213.1 -GL000218.1 -GL000219.1 -KI270711.1 -KI270713.1 -KI270721.1 -KI270726.1 -KI270727.1 -KI270728.1 -KI270731.1 -KI270734.1 diff --git a/conf/base.config b/conf/base.config index 3d1ab69e8..35f36a915 100644 --- a/conf/base.config +++ b/conf/base.config @@ -14,7 +14,7 @@ process { memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } maxRetries = 1 maxErrors = '-1' @@ -57,7 +57,4 @@ process { errorStrategy = 'retry' maxRetries = 2 } - withName:CUSTOM_DUMPSOFTWAREVERSIONS { - cache = false - } } diff --git a/conf/full.config b/conf/full.config new file mode 100644 index 000000000..37cf77782 --- /dev/null +++ b/conf/full.config @@ -0,0 +1,17 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config for full-size tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines parameters so that mimimal tests are converted to full size pipeline test. + + Use as follows: + nextflow run nf-core/circrna -profile test,full, --outdir + nextflow run nf-core/circrna -profile test_igenomes,full, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + tools = 'circexplorer2,ciriquant,find_circ,circrna_finder,mapsplice,dcc,segemehl' + tool_filter = 2 +} diff --git a/conf/igenomes.config b/conf/igenomes.config index 88224a279..ab2b7f045 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -12,410 +12,508 @@ params { // illumina iGenomes reference file paths genomes { 'GRCh37' { - fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa.fai" - bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" - bowtie = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BowtieIndex/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" - mature = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/SmallRNA/mature.fa" - mito_name = "MT" - species_id = "hsa" + fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/SmallRNA/mature.fa" + mito_name = "MT" + species_id = "hsa" } 'GRCh38' { - fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa.fai" - bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" - bowtie = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BowtieIndex/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" - mature = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/SmallRNA/mature.fa" - mito_name = "chrM" - species_id = "hsa" + fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/SmallRNA/mature.fa" + mito_name = "chrM" + species_id = "hsa" + } + 'CHM13' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" + bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf" + gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz" + mito_name = "chrM" + } + 'CHM13' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" + bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf" + gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz" + mito_name = "chrM" } 'GRCm38' { - fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa.fai" - bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" - bowtie = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BowtieIndex/" - bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" - gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" - mature = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/SmallRNA/mature.fa" - mito_name = "MT" - species_id = "mmu" + fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/SmallRNA/mature.fa" + mito_name = "MT" + species_id = "hsa" } 'TAIR10' { - fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa.fai" - bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/" - bowtie = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BowtieIndex/" - bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" - gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" - mature = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/SmallRNA/mature.fa" - mito_name = "Mt" - species_id = "ath" + fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/SmallRNA/mature.fa" + mito_name = "Mt" + species_id = "ath" + } + 'EB2' { + fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/SmallRNA/mature.fa" + species_id = "bsu" } 'UMD3.1' { - fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa.fai" - bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/" - bowtie = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BowtieIndex/" - bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" - gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" - mature = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/SmallRNA/mature.fa" - mito_name = "MT" - species_id = "bta" + fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/SmallRNA/mature.fa" + mito_name = "MT" + species_id = "bta" } 'WBcel235' { - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa.fai" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/" - bowtie = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BowtieIndex/" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" - gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" - mature = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/SmallRNA/mature.fa" - mito_name = "MtDNA" - species_id = "cel" + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/SmallRNA/mature.fa" + mito_name = "MtDNA" + species_id = "cel" } 'CanFam3.1' { - fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa.fai" - bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/" - bowtie = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BowtieIndex/" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" - gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" - mature = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/SmallRNA/mature.fa" - mito_name = "MT" - species_id = "cfa" - } - 'Mmul_1' { - fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa.fai" - bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/version0.6.0/" - bowtie = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BowtieIndex/" - bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" - gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" - mature = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/SmallRNA/mature.fa" - mito_name = "MT" - species_id = "mml" + fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/SmallRNA/mature.fa" + mito_name = "MT" + species_id = "cfa" } 'GRCz10' { - fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa.fai" - bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/version0.6.0/" - bowtie = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BowtieIndex/" - bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" - gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" - mature = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/SmallRNA/mature.fa" - mito_name = "MT" - species_id = "dre" + fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/SmallRNA/mature.fa" + mito_name = "MT" + species_id = "dre" } 'BDGP6' { - fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa.fai" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/version0.6.0/" - bowtie = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BowtieIndex/" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" - gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" - mature = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/SmallRNA/mature.fa" - mito_name = "M" - species_id = "dme" + fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/SmallRNA/mature.fa" + mito_name = "M" + species_id = "dme" } 'EquCab2' { - fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa.fai" - bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/version0.6.0/" - bowtie = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BowtieIndex/" - bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" - gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" - mature = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/SmallRNA/mature.fa" - mito_name = "MT" - species_id = "eca" + fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/SmallRNA/mature.fa" + mito_name = "MT" + species_id = "eca" + } + 'EB1' { + fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/SmallRNA/mature.fa" + species_id = "eco" } 'Galgal4' { - fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa.fai" - bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/version0.6.0/" - bowtie = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BowtieIndex/" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" - gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" - mature = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/SmallRNA/mature.fa" - mito_name = "MT" - species_id = "gga" + fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/SmallRNA/mature.fa" + mito_name = "MT" + species_id = "gga" + } + 'Gm01' { + fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/SmallRNA/mature.fa" + species_id = "gmx" + } + 'Mmul_1' { + fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/SmallRNA/mature.fa" + mito_name = "MT" + species_id = "mml" } 'IRGSP-1.0' { - fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa.fai" - bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/version0.6.0/" - bowtie = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BowtieIndex/" - bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" - gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" - mature = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/SmallRNA/mature.fa" - mito_name = "Mt" - species_id = "osa" + fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/SmallRNA/mature.fa" + mito_name = "Mt" + species_id = "osa" } 'CHIMP2.1.4' { - fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa.fai" - bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/version0.6.0/" - bowtie = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BowtieIndex/" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" - gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" - mature = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/SmallRNA/mature.fa" - mito_name = "MT" - species_id = "ptr" + fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/SmallRNA/mature.fa" + mito_name = "MT" + species_id = "ptr" + } + 'Rnor_5.0' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/SmallRNA/mature.fa" + mito_name = "MT" + species_id = "rno" } 'Rnor_6.0' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa.fai" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/version0.6.0/" - bowtie = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BowtieIndex/" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" - gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" - mature = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/SmallRNA/mature.fa" - mito_name = "MT" - species_id = "rno" + fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/SmallRNA/mature.fa" + mito_name = "MT" + species_id = "rno" + } + 'R64-1-1' { + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/SmallRNA/mature.fa" + mito_name = "MT" + species_id = "sce" + } + 'EF2' { + fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/SmallRNA/mature.fa" + mito_name = "MT" + species_id = "spo" } 'Sbi1' { - fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa.fai" - bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/version0.6.0/" - bowtie = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BowtieIndex/" - bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" - gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" - mature = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/SmallRNA/mature.fa" - species_id = "sbi" + fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/SmallRNA/mature.fa" + species_id = "sbi" } 'Sscrofa10.2' { - fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa.fai" - bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/version0.6.0/" - bowtie = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BowtieIndex/" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" - gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" - mature = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/SmallRNA/mature.fa" - mito_name = "MT" - species_id = "ssc" + fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/SmallRNA/mature.fa" + mito_name = "MT" + species_id = "ssc" } 'AGPv3' { - fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa.fai" - bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/version0.6.0/" - bowtie = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BowtieIndex/" - bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" - gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" - mature = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/SmallRNA/mature.fa" - mito_name = "Mt" - species_id = "zma" + fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/SmallRNA/mature.fa" + mito_name = "Mt" + species_id = "zma" } 'hg38' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa.fai" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" - bowtie = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BowtieIndex/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" - mature = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/SmallRNA/mature.fa" - mito_name = "chrM" - species_id = "hsa" + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/SmallRNA/mature.fa" + mito_name = "chrM" + species_id = "hsa" } 'hg19' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa.fai" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/" - bowtie = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BowtieIndex/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" - mature = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/SmallRNA/mature.fa" - mito_name = "chrM" - species_id = "hsa" + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/SmallRNA/mature.fa" + mito_name = "chrM" + species_id = "hsa" } 'mm10' { - fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa.fai" - bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/" - bowtie = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BowtieIndex/" - bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" - gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" - mature = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/SmallRNA/mature.fa" - mito_name = "chrM" - species_id = "mmu" + fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/SmallRNA/mature.fa" + mito_name = "chrM" + species_id = "mmu" } 'bosTau8' { - fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa.fai" - bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/version0.6.0/" - bowtie = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BowtieIndex/" - bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" - gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" - mature = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/SmallRNA/mature.fa" - mito_name = "chrM" - species_id = "bta" + fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/SmallRNA/mature.fa" + mito_name = "chrM" + species_id = "bta" } 'ce10' { - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa.fai" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/" - bowtie = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BowtieIndex/" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" - gtf = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" - mature = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/SmallRNA/mature.fa" - mito_name = "chrM" - species_id = "cel" + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/SmallRNA/mature.fa" + mito_name = "chrM" + species_id = "cel" } 'canFam3' { - fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa.fai" - bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/version0.6.0/" - bowtie = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BowtieIndex/" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" - gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" - mature = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/SmallRNA/mature.fa" - mito_name = "chrM" - species_id = "cfa" + fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" + bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" + mito_name = "chrM" + species_id = "cfa" } 'danRer10' { - fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa.fai" - bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/version0.6.0/" - bowtie = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BowtieIndex/" - bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" - gtf = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" - mature = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/SmallRNA/mature.fa" - mito_name = "chrM" - species_id = "dre" + fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" + bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" + mito_name = "chrM" + species_id = "dre" } 'dm6' { - fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa.fai" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/version0.6.0/" - bowtie = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BowtieIndex/" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" - gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" - mature = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/SmallRNA/mature.fa" - mito_name = "chrM" - species_id = "dme" + fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/SmallRNA/mature.fa" + mito_name = "chrM" + species_id = "dme" } 'equCab2' { - fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa.fai" - bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/version0.6.0/" - bowie = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BowtieIndex/" - bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" - gtf = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" - mature = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/SmallRNA/mature.fa" - mito_name = "chrM" - species_id = "eca" + fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/SmallRNA/mature.fa" + mito_name = "chrM" + species_id = "eca" } 'galGal4' { - fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa.fai" - bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/version0.6.0/" - bowtie = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BowtieIndex/" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" - gtf = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" - mature = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/SmallRNA/mature.fa" - mito_name = "chrM" - species_id = "gga" + fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/SmallRNA/mature.fa" + mito_name = "chrM" + species_id = "gga" } 'panTro4' { - fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa.fai" - bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/version0.6.0/" - bowtie = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BowtieIndex/" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" - gtf = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" - mature = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/SmallRNA/mature.fa" - mito_name = "chrM" - species_id = "ptr" + fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/SmallRNA/mature.fa" + mito_name = "chrM" + species_id = "ptr" } 'rn6' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa.fai" - bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/version0.6.0/" - bowtie = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BowtieIndex/" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" - gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" - mature = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/SmallRNA/mature.fa" - mito_name = "chrM" - species_id = "rno" + fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/SmallRNA/mature.fa" + mito_name = "chrM" + species_id = "rno" + } + 'sacCer3' { + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" + mature = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/SmallRNA/mature.fa" + mito_name = "chrM" + species_id = "sce" } 'susScr3' { - fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa.fai" - bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/version0.6.0/" - bowtie = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BowtieIndex/" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" - gtf = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" - mature = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/SmallRNA/mature.fa" - mito_name = "chrM" - species_id = "ssc" - } - 'R64-1-1' { - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" + fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa.fai" + bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/version0.6.0/" + bowtie = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BowtieIndex/" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" + mature = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/SmallRNA/mature.fa" + mito_name = "chrM" + species_id = "ssc" } } } diff --git a/conf/modules.config b/conf/modules.config index b44dd7c19..b23ea157f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -18,392 +18,430 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: SAMPLESHEET_CHECK { + withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ path: { "${params.outdir}/pipeline_info" }, mode: params.publish_dir_mode, + pattern: '*_versions.yml' + ] + } + + withName: CAT_FASTQ { + publishDir = [ + path: { "${params.outdir}/preprocessing/merged_samples" }, + mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: FASTQC { - ext.args = '--quiet' + withName: SAMTOOLS_FAIDX { + publishDir = [ + path: { "${params.outdir}/references/index/fasta" }, + mode: params.publish_dir_mode, + saveAs: { filename -> ( filename != 'versions.yml' && params.save_reference ) ? filename : null } + ] } - withName: CUSTOM_DUMPSOFTWAREVERSIONS { + withName: '.*:FASTQC_TRIMGALORE:FASTQC' { publishDir = [ - path: { "${params.outdir}/pipeline_info" }, + path: { "${params.outdir}/quality_control/fastqc" }, mode: params.publish_dir_mode, - pattern: '*_versions.yml' + pattern: "*.{html,zip}" ] } - // TRIMMING couresy of nf-core/rnaseq - -if (!params.skip_trimming) { - process { - withName: '.*:FASTQC_TRIMGALORE:TRIMGALORE' { - ext.args = { - [ - "--fastqc_args '-t ${task.cpus}' ", - params.trim_nextseq > 0 ? "--nextseq ${params.trim_nextseq}" : '' - ].join(' ').trim() - } - publishDir = [ - [ - path: { "${params.outdir}/trimgalore/fastqc" }, - mode: params.publish_dir_mode, - pattern: "*.{html,zip}" - ], - [ - path: { "${params.outdir}/trimgalore" }, - mode: params.publish_dir_mode, - pattern: "*.fq.gz", - enabled: params.save_trimmed - ], - [ - path: { "${params.outdir}/trimgalore" }, - mode: params.publish_dir_mode, - pattern: "*.txt" - ] - ] + withName: '.*:FASTQC_TRIMGALORE:TRIMGALORE' { + ext.args = { + [ + "--fastqc_args '-t ${task.cpus}' ", + params.trim_nextseq > 0 ? "--nextseq ${params.trim_nextseq}" : '' + ].join(' ').trim() } + publishDir = [ + [ + path: { "${params.outdir}/quality_control/trimgalore" }, + mode: params.publish_dir_mode, + pattern: "*.fq.gz", + enabled: params.save_trimmed + ], + [ + path: { "${params.outdir}/quality_control/trimgalore" }, + mode: params.publish_dir_mode, + pattern: "*.txt" + ] + ] } -} // PREPARE GENOME + withName: CLEAN_FASTA { + ext.args2 = '\'/>/{ gsub(\$2, "",\$2);gsub(" ", "") };{print}\'' + publishDir = [ + path: { "${params.outdir}/references/genome/clean_fasta" }, + mode: params.publish_dir_mode, + saveAs: { filename -> ( filename != 'versions.yml' && params.save_reference ) ? filename : null } + ] + } + + withName: GTFFILTER { + ext.suffix = "filtered.gtf" + publishDir = [ + path: { "${params.outdir}/references/genome/filtered_gtf" }, + mode: params.publish_dir_mode, + saveAs: { filename -> ( filename != 'versions.yml' && params.save_reference ) ? filename : null } + ] + } + + withName: SEQKIT_SPLIT { + ext.args = "-i --by-id-prefix \"\"" + publishDir = [ + path: { "${params.outdir}/references/genome/chromosomes" }, + mode: params.publish_dir_mode, + saveAs: { filename -> ( filename != 'versions.yml' && params.save_reference ) ? filename : null } + ] + } + withName: BOWTIE_BUILD { - ext.when = { params.fasta && !params.bowtie && params.tool.split(',').contains('mapsplice') && params.module.split(',').contains('circrna_discovery') } + ext.when = { !params.bowtie && params.tools.split(',').contains('mapsplice') } publishDir = [ - path: { "${params.outdir}/genome/index/bowtie" }, + path: { "${params.outdir}/references/index" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference + saveAs: { filename -> ( filename != 'versions.yml' && params.save_reference ) ? filename : null } ] } withName: BOWTIE2_BUILD { - ext.when = { params.fasta && !params.bowtie2 && params.tool.split(',').contains('find_circ') && params.module.split(',').contains('circrna_discovery') } + ext.when = { !params.bowtie2 && params.tools.split(',').contains('find_circ') } publishDir = [ - path: { "${params.outdir}/genome/index" }, + path: { "${params.outdir}/references/index" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference + saveAs: { filename -> ( filename != 'versions.yml' && params.save_reference ) ? filename : null } ] } withName: BWA_INDEX { - ext.when = { params.fasta && !params.bwa && params.tool.split(',').contains('ciriquant') && params.module.split(',').contains('circrna_discovery') } + ext.when = { !params.bwa && params.tools.split(',').contains('ciriquant') } publishDir = [ - path: { "${params.outdir}/genome/index" }, + path: { "${params.outdir}/references/index" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference + saveAs: { filename -> ( filename != 'versions.yml' && params.save_reference ) ? filename : null } ] } withName: HISAT2_EXTRACTSPLICESITES { - ext.when = { params.fasta && params.gtf && ( params.module.split(',').contains('differential_expression') || params.tool.split(',').contains('ciriquant') ) } + ext.when = { params.tools.split(',').contains('ciriquant') } publishDir = [ - path: { "${params.outdir}/genome/index/hisat2" }, + path: { "${params.outdir}/references/index/hisat2" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference + saveAs: { filename -> ( filename != 'versions.yml' && params.save_reference ) ? filename : null } ] } withName: HISAT2_BUILD { - ext.when = { params.fasta && params.gtf && ( params.module.split(',').contains('differential_expression') || params.tool.split(',').contains('ciriquant') ) } + ext.when = { params.tools.split(',').contains('ciriquant') } publishDir = [ - path: { "${params.outdir}/genome/index/hisat2" }, + path: { "${params.outdir}/references/index" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference + saveAs: { filename -> ( filename != 'versions.yml' && params.save_reference ) ? filename : null } ] } withName: STAR_GENOMEGENERATE { - ext.when = { params.fasta && params.gtf && !params.star && params.module.split(',').contains('circrna_discovery') && ( params.tool.split(',').contains('circexplorer2') || params.tool.split(',').contains('dcc') || params.tool.split(',').contains('circrna_finder') ) } + ext.when = { !params.star && ( params.tools.split(',').contains('circexplorer2') || params.tools.split(',').contains('dcc') || params.tools.split(',').contains('circrna_finder') ) } ext.args = [ "", params.sjdboverhang ? "--sjdbOverhang ${params.sjdboverhang}" : '', ].join(' ').trim() publishDir = [ - path: { "${params.outdir}/genome/index/star" }, + path: { "${params.outdir}/references/index" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference + saveAs: { filename -> ( filename != 'versions.yml' && params.save_reference ) ? filename : null } ] } - withName: SEGEMEHL_INDEX { - ext.when = { params.fasta && !params.segemehl && params.tool.split(',').contains('segemehl') && params.module.split(',').contains('circrna_discovery') } + // circRNA + + withName: '.*:SEGEMEHL:INDEX' { publishDir = [ - path: { "${params.outdir}/genome/index/segemehl" }, + path: { "${params.outdir}/references/index/segemehl" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference + saveAs: { filename -> ( filename != 'versions.yml' && params.save_reference ) ? filename : null } ] } - // circRNA - - withName: SEGEMEHL_ALIGN { - ext.when = { params.fasta && !params.segemehl && params.tool.split(',').contains('segemehl') && params.module.split(',').contains('circrna_discovery') } + withName: '.*:SEGEMEHL:ALIGN' { ext.args = [ "", "-b", "-S" ].join(' ').trim() publishDir = [ - path: { "${params.outdir}/circrna_discovery/segemehl/intermediates" }, + path: { "${params.outdir}/bsj_detection/tools/segemehl/intermediates" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } ] } - withName: SEGEMEHL_FILTER { - ext.when = { params.tool.split(',').contains('segemehl') && params.module.split(',').contains('circrna_discovery') } + withName: '.*:SEGEMEHL:EXTRACT' { + // Keep only rows with ";C;" in column 4 + // Print $1 $2 $3 $1:$2-$3 $5 $6 + ext.args = "-v FS='\\t' -v OFS='\\t' '{ if (\$4 ~ /;C;/) { print \$1, \$2, \$3, \$1 \":\" \$2 \"-\" \$3 \":\" \$6, \$5, \$6 } }'" + ext.suffix = "segemehl_extracted.bed" publishDir = [ - path: { "${params.outdir}/circrna_discovery/segemehl/intermediates" }, + path: { "${params.outdir}/bsj_detection/tools/segemehl/extracted" }, + mode: params.publish_dir_mode, + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } + ] + } + + withName: '.*:SEGEMEHL:SORT' { + ext.args = "-k1,1 -k2,2n -k3,3n -k4,4 -k6,6" + ext.suffix = "segemehl_sorted.bed" + publishDir = [ + path: { "${params.outdir}/bsj_detection/tools/segemehl/sorted" }, + mode: params.publish_dir_mode, + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } + ] + } + + withName: '.*:SEGEMEHL:GROUP' { + ext.summary_col = 5 + ext.args = "-g 1,2,3,4,6 -o count" + ext.suffix = "segemehl_grouped.bed" + publishDir = [ + path: { "${params.outdir}/bsj_detection/tools/segemehl/grouped" }, + mode: params.publish_dir_mode, + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } + ] + } + + withName: '.*:SEGEMEHL:UNIFY' { + ext.args = "-v FS='\\t' -v OFS='\\t' '{ print \$1, \$2, \$3, \$4, \$6, \$5 }'" + ext.suffix = "segemehl.bed" + publishDir = [ + path: { "${params.outdir}/bsj_detection/tools/segemehl/unified" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates, - pattern: "*circs.bed" + pattern: "*.segemehl.bed" ] } - withName: STAR_1ST_PASS { - ext.when = { params.module.split(',').contains('circrna_discovery') && ( params.tool.split(',').contains('circexplorer2') || params.tool.split(',').contains('circrna_finder') ) } + withName: '.*:STAR2PASS:PASS_1' { + ext.when = { params.tools.split(',').contains('circexplorer2') || params.tools.split(',').contains('circrna_finder') } ext.args = [ "", "--chimOutType Junctions WithinBAM", "--outSAMunmapped Within", "--outFilterType BySJout", "--outReadsUnmapped None", "--readFilesCommand zcat", - params.alignSJDBoverhangMin ? "--alignSJDBoverhangMin ${params.alignSJDBoverhangMin}" : "", - params.chimJunctionOverhangMin ? "--chimJunctionOverhangMin ${params.chimJunctionOverhangMin}" : "", - params.chimSegmentMin ? "--chimSegmentMin ${params.chimSegmentMin}" : "" + "--alignSJDBoverhangMin ${params.alignSJDBoverhangMin}", + "--limitSjdbInsertNsj ${params.limitSjdbInsertNsj}", + "--chimJunctionOverhangMin ${params.chimJunctionOverhangMin}", + "--chimSegmentMin ${params.chimSegmentMin}" ].join(' ').trim() publishDir = [ - path: { "${params.outdir}/circrna_discovery/star/1st_pass" }, + path: { "${params.outdir}/bsj_detection/tools/star/1st_pass/${meta.id}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } ] } - withName: STAR_SJDB { - ext.when = { params.module.split(',').contains('circrna_discovery') && ( params.tool.split(',').contains('circexplorer2') || params.tool.split(',').contains('circrna_finder') ) } + withName: '.*:STAR2PASS:SJDB' { publishDir = [ - path: { "${params.outdir}/circrna_discovery/star/sjdb" }, + path: { "${params.outdir}/bsj_detection/tools/star/sjdb/${meta.id}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } ] } - withName: STAR_2ND_PASS { - ext.when = { params.module.split(',').contains('circrna_discovery') && ( params.tool.split(',').contains('circexplorer2') || params.tool.split(',').contains('circrna_finder') ) } + withName: '.*:STAR2PASS:PASS_2' { ext.args = [ "", - // TODO: is this valid when both tools are run? - params.tool.split(',').contains('circrna_finder') ? "--chimOutType Junctions SeparateSAMold" : "--chimOutType Junctions WithinBAM", + params.tools.split(',').contains('circrna_finder') ? "--chimOutType Junctions SeparateSAMold" : "--chimOutType Junctions WithinBAM", "--outSAMunmapped Within", "--outFilterType BySJout", "--outReadsUnmapped None", "--readFilesCommand zcat", "--sjdbFileChrStartEnd dataset.SJ.out.tab", - params.alignSJDBoverhangMin ? "--alignSJDBoverhangMin ${params.alignSJDBoverhangMin}" : "--alignSJDBoverhangMin 10", - params.chimJunctionOverhangMin ? "--chimJunctionOverhangMin ${params.chimJunctionOverhangMin}" : "--chimJunctionOverhangMin 10", - params.chimSegmentMin ? "--chimSegmentMin ${params.chimSegmentMin}" : "--chimSegmentMin 10" + "--alignSJDBoverhangMin ${params.alignSJDBoverhangMin}", + "--limitSjdbInsertNsj ${params.limitSjdbInsertNsj}", + "--chimJunctionOverhangMin ${params.chimJunctionOverhangMin}", + "--chimSegmentMin ${params.chimSegmentMin}" ].join(' ').trim() publishDir = [ - path: { "${params.outdir}/circrna_discovery/star/2nd_pass" }, + path: { "${params.outdir}/bsj_detection/tools/star/2nd_pass/${meta.id}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } ] } - withName: CIRCEXPLORER2_REFERENCE { - ext.when = { params.module.split(',').contains('circrna_discovery') && params.tool.split(',').contains('circexplorer2') } + withName: '.*:CIRCEXPLORER2:REFERENCE' { ext.args = [ "", "-genePredExt", "-geneNameAsName2" ].join(' ').trim() publishDir = [ - path: { "${params.outdir}/genome/circexplorer2" }, + path: { "${params.outdir}/references/bsj_detection/circexplorer2" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference + saveAs: { filename -> ( filename != 'versions.yml' && params.save_reference ) ? filename : null } ] } - withName: CIRCEXPLORER2_PARSE { - ext.when = { params.module.split(',').contains('circrna_discovery') && params.tool.split(',').contains('circexplorer2') } + withName: '.*:CIRCEXPLORER2:PARSE' { + publishDir = [ + path: { "${params.outdir}/bsj_detection/tools/circexplorer2/intermediates/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } + ] + } + withName: '.*:CIRCEXPLORER2:ANNOTATE' { publishDir = [ - path: { "${params.outdir}/circrna_discovery/circexplorer2/intermediates" }, + path: { "${params.outdir}/bsj_detection/tools/circexplorer2/intermediates/${meta.id}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } ] } - withName: CIRCEXPLORER2_ANNOTATE { - ext.when = { params.module.split(',').contains('circrna_discovery') && params.tool.split(',').contains('circexplorer2') } + withName: '.*:CIRCEXPLORER2:UNIFY' { + ext.args = "-v FS='\\t' -v OFS='\\t' '{ print \$1, \$2, \$3, \$1 \":\" \$2 \"-\" \$3 \":\" \$6, \$13, \$6 }'" + ext.suffix = "circexplorer2.bed" publishDir = [ - path: { "${params.outdir}/circrna_discovery/circexplorer2/intermediates" }, + path: { "${params.outdir}/bsj_detection/tools/circexplorer2/unified" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates + pattern: "*.circexplorer2.bed" ] } - withName: CIRCEXPLORER2_FILTER { - ext.when = { params.module.split(',').contains('circrna_discovery') && params.tool.split(',').contains('circexplorer2') } + withName: '.*:CIRCRNA_FINDER:MAIN' { publishDir = [ - path: { "${params.outdir}/circrna_discovery/circexplorer2/intermediates" }, + path: { "${params.outdir}/bsj_detection/tools/circrna_finder/intermediates/${meta.id}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates, - pattern: "*circs.bed" + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null }, + pattern: "*.bed" ] } - withName: CIRCRNA_FINDER_FILTER { - ext.when = { params.module.split(',').contains('circrna_discovery') && params.tool.split(',').contains('circrna_finder') } + withName: '.*:CIRCRNA_FINDER:UNIFY' { + ext.args = "-v FS='\\t' -v OFS='\\t' '{ print \$1, \$2, \$3, \$1 \":\" \$2 \"-\" \$3 \":\" \$6, \$5, \$6 }'" + ext.suffix = "circrna_finder.bed" publishDir = [ - path: { "${params.outdir}/circrna_discovery/circrna_finder/intermediates" }, + path: { "${params.outdir}/bsj_detection/tools/circrna_finder/unified" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates, - pattern: "*circs.bed" + pattern: "*.circrna_finder.bed" ] } - withName: FIND_CIRC_ALIGN { - ext.when = { params.module.split(',').contains('circrna_discovery') && params.tool.split(',').contains('find_circ') } - ext.args = [ "", - "--very-sensitive", - "--mm", - "-D 20", - "--score-min=C,-15,0", - "-q" - ].join(' ').trim() + withName: '.*:FIND_CIRC:ALIGN' { + ext.args = { "--very-sensitive --mm -D 20 --score-min=C,-15,0 -q " + + (!meta.strandedness || meta.strandedness == 'unstranded' || meta.strandedness == 'auto' ? '' : + meta.strandedness == 'forward' ? ' --norc' : ' --nofw') } publishDir = [ - path: { "${params.outdir}/circrna_discovery/find_circ/intermediates" }, + path: { "${params.outdir}/bsj_detection/tools/find_circ/intermediates/${meta.id}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } ] } - withName: SAMTOOLS_VIEW { - ext.when = { params.module.split(',').contains('circrna_discovery') && params.tool.split(',').contains('find_circ') } - ext.prefix = { "${meta.id}_unmapped" } - ext.args = "-hf 4" + withName: '.*:FIND_CIRC:SAMTOOLS_INDEX' { publishDir = [ - path: { "${params.outdir}/circrna_discovery/find_circ/intermediates" }, + path: { "${params.outdir}/bsj_detection/tools/find_circ/intermediates/${meta.id}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } ] } - withName: FIND_CIRC_ANCHORS { - ext.when = { params.module.split(',').contains('circrna_discovery') && params.tool.split(',').contains('find_circ') } + withName: '.*:FIND_CIRC:SAMTOOLS_VIEW' { + ext.prefix = { "${meta.id}_unmapped" } + ext.args = "-hf 4" publishDir = [ - path: { "${params.outdir}/circrna_discovery/find_circ/intermediates" }, + path: { "${params.outdir}/bsj_detection/tools/find_circ/intermediates/${meta.id}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } ] } - withName: FIND_CIRC { - ext.when = { params.module.split(',').contains('circrna_discovery') && params.tool.split(',').contains('find_circ') } + withName: '.*:FIND_CIRC:ANCHORS' { publishDir = [ - path: { "${params.outdir}/circrna_discovery/find_circ/intermediates" }, + path: { "${params.outdir}/bsj_detection/tools/find_circ/intermediates/${meta.id}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } ] } - withName: FIND_CIRC_FILTER { - ext.when = { params.module.split(',').contains('circrna_discovery') && params.tool.split(',').contains('find_circ') } + withName: '.*:FIND_CIRC:MAIN' { + ext.args = { !meta.strandedness || meta.strandedness == 'unstranded' || meta.strandedness == 'auto' ? '' : + meta.strandedness == 'forward' ? ' --norc' : ' --nofw' } publishDir = [ - path: { "${params.outdir}/circrna_discovery/find_circ/intermediates" }, + path: { "${params.outdir}/bsj_detection/tools/find_circ/intermediates/${meta.id}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates, - pattern: "*circs.bed" + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } ] } - withName: CIRIQUANT_YML { - ext.when = { params.module.split(',').contains('circrna_discovery') && params.tool.split(',').contains('ciriquant') } + withName: '.*:FIND_CIRC:UNIFY' { + // Keep only rows with CIRCULAR, UNAMBIGUOUS_BP and ANCHOR_UNIQUE in $18 + ext.args = "-v FS='\\t' -v OFS='\\t' '{ if (\$18 ~ /CIRCULAR/ && \$18 ~ /UNAMBIGUOUS_BP/ && \$18 ~ /ANCHOR_UNIQUE/) { print \$1, \$2, \$3, \$1 \":\" \$2 \"-\" \$3 \":\" \$6, \$5, \$6 } }'" + ext.suffix = "find_circ.bed" publishDir = [ - path: { "${params.outdir}/genome/ciriquant" }, + path: { "${params.outdir}/bsj_detection/tools/find_circ/unified" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference + pattern: "*.find_circ.bed" ] } - withName: CIRIQUANT { - ext.when = { params.module.split(',').contains('circrna_discovery') && params.tool.split(',').contains('ciriquant') } + withName: '.*:CIRIQUANT:MAIN' { publishDir = [ - path: { "${params.outdir}/circrna_discovery/ciriquant/intermediates" }, + path: { "${params.outdir}/bsj_detection/tools/ciriquant/intermediates/${meta.id}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } ] } - withName: CIRIQUANT_FILTER { - ext.when = { params.module.split(',').contains('circrna_discovery') && params.tool.split(',').contains('ciriquant') } + withName: '.*:CIRIQUANT:UNIFY' { + // Drop all rows starting with # + // $count is $14 until the dot (never has decimals) + // Print $1 $4 $5 $1:$4-$5:$7 $count $7 + ext.args = "-v OFS='\\t' '{ count = substr(\$14, 1, index(\$14, \".\") - 1); print \$1, \$4, \$5, \$1 \":\" \$4 \"-\" \$5 \":\" \$7, count, \$7 }'" + ext.suffix = "ciriquant.bed" + publishDir = [ - path: { "${params.outdir}/circrna_discovery/ciriquant/intermediates" }, + path: { "${params.outdir}/bsj_detection/tools/ciriquant/unified" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates, - pattern: "*circs.bed" + pattern: "*.ciriquant.bed" ] } - withName: DCC_1ST_PASS { - ext.when = { params.module.split(',').contains('circrna_discovery') && params.tool.split(',').contains('dcc') } + withName: '.*:DCC:MATE1_1ST_PASS' { + ext.prefix = { "${meta.id}_mate1" } ext.args = [ "", "--chimOutType Junctions WithinBAM", "--outSAMunmapped Within", "--outFilterType BySJout", "--outReadsUnmapped None", "--readFilesCommand zcat", - params.alignSJDBoverhangMin ? "--alignSJDBoverhangMin ${params.alignSJDBoverhangMin}" : "", - params.chimJunctionOverhangMin ? "--chimJunctionOverhangMin ${params.chimJunctionOverhangMin}" : "", - params.chimSegmentMin ? "--chimSegmentMin ${params.chimSegmentMin}" : "" + "--alignSJDBoverhangMin ${params.alignSJDBoverhangMin}", + "--limitSjdbInsertNsj ${params.limitSjdbInsertNsj}", + "--chimJunctionOverhangMin ${params.chimJunctionOverhangMin}", + "--chimSegmentMin ${params.chimSegmentMin}" ].join(' ').trim() publishDir = [ - path: { "${params.outdir}/circrna_discovery/dcc/intermediates/align/1st_pass" }, + path: { "${params.outdir}/bsj_detection/tools/dcc/intermediates/mate1/1st_pass" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } ] } - withName: DCC_SJDB { - ext.when = { params.module.split(',').contains('circrna_discovery') && params.tool.split(',').contains('dcc') } + withName: '.*:DCC:MATE1_SJDB' { publishDir = [ - path: { "${params.outdir}/circrna_discovery/dcc/intermediates/align/sjdb" }, + path: { "${params.outdir}/bsj_detection/tools/dcc/intermediates/mate1/sjdb" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } ] } - withName: DCC_2ND_PASS { - ext.when = { params.module.split(',').contains('circrna_discovery') && params.tool.split(',').contains('dcc') } + withName: '.*:DCC:MATE1_2ND_PASS' { + ext.prefix = { "${meta.id}_mate1" } ext.args = [ "", "--chimOutType Junctions WithinBAM", "--outSAMunmapped Within", @@ -411,52 +449,48 @@ if (!params.skip_trimming) { "--outReadsUnmapped None", "--readFilesCommand zcat", "--sjdbFileChrStartEnd dataset.SJ.out.tab", - params.alignSJDBoverhangMin ? "--alignSJDBoverhangMin ${params.alignSJDBoverhangMin}" : "--alignSJDBoverhangMin 10", - params.chimJunctionOverhangMin ? "--chimJunctionOverhangMin ${params.chimJunctionOverhangMin}" : "--chimJunctionOverhangMin 10", - params.chimSegmentMin ? "--chimSegmentMin ${params.chimSegmentMin}" : "--chimSegmentMin 10" + "--alignSJDBoverhangMin ${params.alignSJDBoverhangMin}", + "--limitSjdbInsertNsj ${params.limitSjdbInsertNsj}", + "--chimJunctionOverhangMin ${params.chimJunctionOverhangMin}", + "--chimSegmentMin ${params.chimSegmentMin}" ].join(' ').trim() publishDir = [ - path: { "${params.outdir}/circrna_discovery/dcc/intermediates/align/2nd_pass" }, + path: { "${params.outdir}/bsj_detection/tools/dcc/intermediates/mate1/2nd_pass" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } ] } - withName: DCC_MATE1_1ST_PASS { - ext.when = { !meta.single_end && params.module.split(',').contains('circrna_discovery') && params.tool.split(',').contains('dcc') } - ext.prefix = { "${meta.id}_mate1" } + withName: '.*:DCC:MATE2_1ST_PASS' { + ext.prefix = { "${meta.id}_mate2" } ext.args = [ "", "--chimOutType Junctions WithinBAM", "--outSAMunmapped Within", "--outFilterType BySJout", "--outReadsUnmapped None", "--readFilesCommand zcat", - params.alignSJDBoverhangMin ? "--alignSJDBoverhangMin ${params.alignSJDBoverhangMin}" : "", - params.chimJunctionOverhangMin ? "--chimJunctionOverhangMin ${params.chimJunctionOverhangMin}" : "", - params.chimSegmentMin ? "--chimSegmentMin ${params.chimSegmentMin}" : "" + "--alignSJDBoverhangMin ${params.alignSJDBoverhangMin}", + "--limitSjdbInsertNsj ${params.limitSjdbInsertNsj}", + "--chimJunctionOverhangMin ${params.chimJunctionOverhangMin}", + "--chimSegmentMin ${params.chimSegmentMin}" ].join(' ').trim() publishDir = [ - path: { "${params.outdir}/circrna_discovery/dcc/intermediates/mate1/1st_pass" }, + path: { "${params.outdir}/bsj_detection/tools/dcc/intermediates/mate2/1st_pass" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } ] } - withName: DCC_MATE1_SJDB { - ext.when = { !meta.single_end && params.module.split(',').contains('circrna_discovery') && params.tool.split(',').contains('dcc') } + withName: '.*:DCC:MATE2_SJDB' { publishDir = [ - path: { "${params.outdir}/circrna_discovery/dcc/intermediates/mate1/sjdb" }, + path: { "${params.outdir}/bsj_detection/tools/dcc/intermediates/mate2/sjdb" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } ] } - withName: DCC_MATE1_2ND_PASS { - ext.when = { !meta.single_end && params.module.split(',').contains('circrna_discovery') && params.tool.split(',').contains('dcc') } - ext.prefix = { "${meta.id}_mate1" } + withName: '.*:DCC:MATE2_2ND_PASS' { + ext.prefix = { "${meta.id}_mate2" } ext.args = [ "", "--chimOutType Junctions WithinBAM", "--outSAMunmapped Within", @@ -464,191 +498,492 @@ if (!params.skip_trimming) { "--outReadsUnmapped None", "--readFilesCommand zcat", "--sjdbFileChrStartEnd dataset.SJ.out.tab", - params.alignSJDBoverhangMin ? "--alignSJDBoverhangMin ${params.alignSJDBoverhangMin}" : "--alignSJDBoverhangMin 10", - params.chimJunctionOverhangMin ? "--chimJunctionOverhangMin ${params.chimJunctionOverhangMin}" : "--chimJunctionOverhangMin 10", - params.chimSegmentMin ? "--chimSegmentMin ${params.chimSegmentMin}" : "--chimSegmentMin 10" + "--alignSJDBoverhangMin ${params.alignSJDBoverhangMin}", + "--limitSjdbInsertNsj ${params.limitSjdbInsertNsj}", + "--chimJunctionOverhangMin ${params.chimJunctionOverhangMin}", + "--chimSegmentMin ${params.chimSegmentMin}" ].join(' ').trim() publishDir = [ - path: { "${params.outdir}/circrna_discovery/dcc/intermediates/mate1/2nd_pass" }, + path: { "${params.outdir}/bsj_detection/tools/dcc/intermediates/mate2/2nd_pass" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } ] } - withName: DCC_MATE2_1ST_PASS { - ext.when = { !meta.single_end && params.module.split(',').contains('circrna_discovery') && params.tool.split(',').contains('dcc') } - ext.prefix = { "${meta.id}_mate2" } - ext.args = [ "", - "--chimOutType Junctions WithinBAM", - "--outSAMunmapped Within", - "--outFilterType BySJout", - "--outReadsUnmapped None", - "--readFilesCommand zcat", - params.alignSJDBoverhangMin ? "--alignSJDBoverhangMin ${params.alignSJDBoverhangMin}" : "", - params.chimJunctionOverhangMin ? "--chimJunctionOverhangMin ${params.chimJunctionOverhangMin}" : "", - params.chimSegmentMin ? "--chimSegmentMin ${params.chimSegmentMin}" : "" - ].join(' ').trim() + withName: '.*:DCC:MAIN' { publishDir = [ - path: { "${params.outdir}/circrna_discovery/dcc/intermediates/mate2/1st_pass" }, + path: { "${params.outdir}/bsj_detection/tools/dcc/intermediates/${meta.id}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } ] } - withName: DCC_MATE2_SJDB { - ext.when = { !meta.single_end && params.module.split(',').contains('circrna_discovery') && params.tool.split(',').contains('dcc') } + withName: '.*:DCC:UNIFY' { + ext.args = "-v FS='\\t' -v OFS='\\t' '{ print \$1, \$2, \$3, \$1 \":\" \$2 \"-\" \$3 \":\" \$4, \$5, \$4 }'" + ext.suffix = "dcc.bed" publishDir = [ - path: { "${params.outdir}/circrna_discovery/dcc/intermediates/mate2/sjdb" }, + path: { "${params.outdir}/bsj_detection/tools/dcc/unified" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates + pattern: "*.dcc.bed" ] } - withName: DCC_MATE2_2ND_PASS { - ext.when = { !meta.single_end && params.module.split(',').contains('circrna_discovery') && params.tool.split(',').contains('dcc') } - ext.prefix = { "${meta.id}_mate2" } + withName: '.*:MAPSPLICE:REFERENCE' { ext.args = [ "", - "--chimOutType Junctions WithinBAM", - "--outSAMunmapped Within", - "--outFilterType BySJout", - "--outReadsUnmapped None", - "--readFilesCommand zcat", - "--sjdbFileChrStartEnd dataset.SJ.out.tab", - params.alignSJDBoverhangMin ? "--alignSJDBoverhangMin ${params.alignSJDBoverhangMin}" : "--alignSJDBoverhangMin 10", - params.chimJunctionOverhangMin ? "--chimJunctionOverhangMin ${params.chimJunctionOverhangMin}" : "--chimJunctionOverhangMin 10", - params.chimSegmentMin ? "--chimSegmentMin ${params.chimSegmentMin}" : "--chimSegmentMin 10" + "-genePredExt", + "-geneNameAsName2" ].join(' ').trim() publishDir = [ - path: { "${params.outdir}/circrna_discovery/dcc/intermediates/mate2/2nd_pass" }, + path: { "${params.outdir}/references/bsj_detection/mapsplice" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates + saveAs: { filename -> ( filename != 'versions.yml' && params.save_reference ) ? filename : null } ] } - withName: DCC { - ext.when = { params.tool.split(',').contains('dcc') && params.module.split(',').contains('circrna_discovery') } + withName: '.*:MAPSPLICE:ALIGN' { + ext.args = [ "", + "--seglen ${params.seglen}", + "--min-intron ${params.min_intron}", + "--max-intron ${params.max_intron}", + "--min-map-len ${params.min_map_len}", + "--min-fusion-distance ${params.min_fusion_distance}", + "--fusion-non-canonical" + ].join(' ').trim() publishDir = [ - path: { "${params.outdir}/circrna_discovery/dcc/intermediates/" }, + path: { "${params.outdir}/bsj_detection/tools/mapsplice/intermediates/${meta.id}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } ] } - withName: MAPSPLICE_REFERENCE { - ext.when = { params.module.split(',').contains('circrna_discovery') && params.tool.split(',').contains('mapsplice') } - ext.args = [ "", - "-genePredExt", - "-geneNameAsName2" - ].join(' ').trim() + withName: '.*:MAPSPLICE:PARSE' { publishDir = [ - path: { "${params.outdir}/genome/mapsplice" }, + path: { "${params.outdir}/bsj_detection/tools/mapsplice/intermediates/${meta.id}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } ] } - withName: MAPSPLICE_ALIGN { - ext.when = { params.module.split(',').contains('circrna_discovery') && params.tool.split(',').contains('mapsplice') } - ext.args = [ "", - params.seglen ? "--seglen ${params.seglen}" : "--seglen 25", - params.min_intron ? "--min-intron ${params.min_intron}" : "--min-intron 20", - params.max_intron ? "--max-intron ${params.max_intron}" : "--max-intron 1000000", - params.min_map_len ? "--min-map-len ${params.min_map_len}" : "--min-map-len 40", - params.min_fusion_distance ? "--min-fusion-distance ${params.min_fusion_distance}" : "--min-fusion-distance 200", - "--fusion-non-canonical" - ].join(' ').trim() + withName: '.*:MAPSPLICE:ANNOTATE' { publishDir = [ - path: { "${params.outdir}/circrna_discovery/mapsplice/intermediates" }, + path: { "${params.outdir}/bsj_detection/tools/mapsplice/intermediates/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } + ] + } + + withName: '.*:MAPSPLICE:UNIFY' { + ext.args = "-v FS='\\t' -v OFS='\\t' '{ print \$1, \$2, \$3, \$1 \":\" \$2 \"-\" \$3 \":\" \$6, \$10, \$6 }'" + ext.suffix = "mapsplice.bed" + publishDir = [ + path: { "${params.outdir}/bsj_detection/tools/mapsplice/unified" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates + pattern: "*.mapsplice.bed" ] } -withName: MAPSPLICE_PARSE { - ext.when = { params.module.split(',').contains('circrna_discovery') && params.tool.split(',').contains('mapsplice') } + withName: 'FILTER_BSJS' { + // Make sure score is higher or equal to the threshold + ext.args = { "-v FS='\\t' -v OFS='\\t' '{ if (\$5 >= ${params.bsj_reads}) { print } }'" } + ext.suffix = {"${meta.tool}.filtered.bed"} publishDir = [ - path: { "${params.outdir}/circrna_discovery/mapsplice/intermediates" }, + path: { "${params.outdir}/bsj_detection/tools/${meta.tool}/filtered" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates ] } - withName: MAPSPLICE_ANNOTATE { - ext.when = { params.module.split(',').contains('circrna_discovery') && params.tool.split(',').contains('mapsplice') } + withName: 'MASK_SCORES' { + // Take bed file and replace the score column with a dot + ext.args = "-v FS='\\t' -v OFS='\\t' '{ print \$1, \$2, \$3, \$4, \".\", \$6 }'" + ext.suffix = {"${meta.tool}.masked.bed"} + publishDir = [ + path: { "${params.outdir}/bsj_detection/tools/${meta.tool}/masked" }, + mode: params.publish_dir_mode, + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } + ] + } + + withName: 'CONCAT_TOOLS_PER_SAMPLE' { + // GNU sort by columns 1,2,3,4,6 + ext.args = "-k1,1 -k2,2n -k3,3n -k4,4 -k6,6" + ext.suffix = {"sorted.bed"} + publishDir = [ + path: { "${params.outdir}/bsj_detection/samples/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> null } // The same data will be published in COUNT_TOOLS in a better format + ] + } + + withName: 'COUNT_TOOLS' { + // Count the number of tools that support each circRNA + ext.summary_col = 5 + ext.args = "-g 1,2,3,4,6 -o count" + ext.suffix = {"tool_counts.bed"} + publishDir = [ + path: { "${params.outdir}/bsj_detection/samples/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } + ] + } + + withName: 'FILTER_MIN_TOOLS' { + // Keep only rows with at least the minimum number of tools + // Replace the score column with a dot + ext.args = { "-v FS='\\t' -v OFS='\\t' '{ if (\$6 >= ${params.tool_filter}) { print \$1, \$2, \$3, \$4, \".\", \$5 } }'" } + ext.suffix = "filtered.bed" + publishDir = [ + path: { "${params.outdir}/bsj_detection/samples/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'CONCAT_SAMPLES' { + // GNU sort by columns 1,2,3,4,6 + ext.args = "-k1,1 -k2,2n -k3,3n -k4,4 -k6,6 -u" + ext.suffix = {"combined.bed"} publishDir = [ - path: { "${params.outdir}/circrna_discovery/mapsplice/intermediates" }, + path: { "${params.outdir}/bsj_detection/combined" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'EXTRACT_COUNTS' { + // Add meta.id as header + // Keep columns 4,5 + ext.args = { "-v FS='\\t' -v OFS='\\t' 'BEGIN { print \"id\", \"${meta.id}\" } { print \$4, \$5 }'" } + ext.suffix = {"counts.tsv"} + publishDir = [ + enabled: false + ] + } + + withName: 'COMBINE_COUNTS_PER_TOOL' { + ext.args = "-f 1 -t -O" + publishDir = [ + path: { "${params.outdir}/bsj_detection/tools/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: UPSET_SAMPLES { + ext.when = { params.tools.split(',').length > 1 } + publishDir = [ + path: { "${params.outdir}/bsj_detection/samples/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: UPSET_ALL { + ext.when = { params.tools.split(',').length > 1 } + publishDir = [ + path: { "${params.outdir}/bsj_detection/combined" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: FASTA_COMBINED { + ext.suffix = "fasta" + publishDir = [ + path: { "${params.outdir}/bsj_detection/combined" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: FASTA_PER_SAMPLE { + ext.suffix = "fasta" + publishDir = [ + path: { "${params.outdir}/bsj_detection/samples/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: FASTA_PER_SAMPLE_TOOL { + ext.suffix = { "${meta.tool}.fasta" } + publishDir = [ + path: { "${params.outdir}/bsj_detection/tools/${meta.tool}/fasta" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:ANNOTATE_(COMBINED|PER_SAMPLE|PER_SAMPLE_TOOL):INTERSECT_GTF' { + ext.args = "-loj" + ext.suffix = "intersect_gtf.bed" + } + + withName: '.*:ANNOTATE_COMBINED:INTERSECT_GTF' { + publishDir = [ + path: { "${params.outdir}/bsj_detection/combined" }, + mode: params.publish_dir_mode, + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } + ] + } + + withName: '.*:ANNOTATE_PER_SAMPLE:INTERSECT_GTF' { + publishDir = [ + path: { "${params.outdir}/bsj_detection/samples/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } + ] + } + + withName: '.*:ANNOTATE_PER_SAMPLE_TOOL:INTERSECT_GTF' { + publishDir = [ + path: { "${params.outdir}/bsj_detection/tools/${meta.tool}/annotated" }, + mode: params.publish_dir_mode, + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } + ] + } + + withName: '.*:ANNOTATE_(COMBINED|PER_SAMPLE|PER_SAMPLE_TOOL):INGEST_DATABASE_NAMES' { + ext.args = { "-v FS='\\t' -v OFS='\\t' '{ print \$1, \$2, \$3, \"${meta.id}:\" \$4, \$5, \$6 }'" } + ext.suffix = "named.bed" + + publishDir = [ + path: { "${params.outdir}/references/named_databases" }, + mode: params.publish_dir_mode, + saveAs: { filename -> ( filename != 'versions.yml' && params.save_reference ) ? filename : null } + ] + } + + withName: '.*:ANNOTATE_(COMBINED|PER_SAMPLE|PER_SAMPLE_TOOL):INTERSECT_DATABASE' { + ext.args = { "-f ${meta.min_overlap} -r -loj -wa -wb" } + ext.suffix = "intersect_database.bed" + } + + withName: '.*:ANNOTATE_COMBINED:INTERSECT_DATABASE' { + publishDir = [ + path: { "${params.outdir}/bsj_detection/combined" }, + mode: params.publish_dir_mode, + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } + ] + } + + withName: '.*:ANNOTATE_PER_SAMPLE:INTERSECT_DATABASE' { + publishDir = [ + path: { "${params.outdir}/bsj_detection/samples/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } + ] + } + + withName: '.*:ANNOTATE_PER_SAMPLE_TOOL:INTERSECT_DATABASE' { + publishDir = [ + path: { "${params.outdir}/bsj_detection/tools/${meta.tool}/annotated" }, + mode: params.publish_dir_mode, + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } + ] + } + + withName: '.*:ANNOTATE_(COMBINED|PER_SAMPLE|PER_SAMPLE_TOOL):ANNOTATE' { + ext.prefix = { "${meta.id}.annotated" } + } + + withName: '.*:ANNOTATE_COMBINED:ANNOTATE' { + publishDir = [ + path: { "${params.outdir}/bsj_detection/combined" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:ANNOTATE_PER_SAMPLE:ANNOTATE' { + publishDir = [ + path: { "${params.outdir}/bsj_detection/samples/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:ANNOTATE_PER_SAMPLE_TOOL:ANNOTATE' { + publishDir = [ + path: { "${params.outdir}/bsj_detection/tools/${meta.tool}/annotated" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: ADD_BACKSPLICE { + ext.args = "-c fastx '{ if (\$name ~ /^circ_/) { \$seq = \$seq substr(\$seq, 1, 25) } print \">\" \$name; print \$seq }'" + ext.suffix = "backspliced.fa" + publishDir = [ + path: { "${params.outdir}/mirna_prediction" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: UNIFY_MIRANDA { + ext.args = "-v FS='\\t' -v OFS='\\t' 'NR>1 { print \$1, \$2, \$7, \$8, \"miranda\" }'" + ext.suffix = "miranda.tsv" + publishDir = [ + path: { "${params.outdir}/mirna_prediction/binding_sites/tools/miranda/unified" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates ] } - withName: MAPSPLICE_FILTER { - ext.when = { params.module.split(',').contains('circrna_discovery') && params.tool.split(',').contains('mapsplice') } + withName: UNIFY_TARGETSCAN { + ext.args = "-v FS='\\t' -v OFS='\\t' 'NR>1 { print \$2, \$1, \$6, \$7, \"targetscan\" }'" + ext.suffix = "targetscan.tsv" publishDir = [ - path: { "${params.outdir}/circrna_discovery/mapsplice/intermediates" }, + path: { "${params.outdir}/mirna_prediction/binding_sites/tools/targetscan/unified" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates, - pattern: "*circs.bed" ] } - withName: ANNOTATION { + withName: COMBINE_BINDINGSITES { + ext.prefix = "bindingsites.tsv" + } + + withName: COMBINE_TRANSCRIPTOME_GTFS { + ext.args = "-k 1,1 -k4,4n -k5,5n" + ext.suffix = "combined.gtf" publishDir = [ - path: { "${params.outdir}/circrna_discovery/${meta.tool}" }, + path: { "${params.outdir}/quantification/transcriptome" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: FASTA { - ext.when = { params.module.split(',').contains('circrna_discovery') } + withName: EXCLUDE_OVERLONG_TRANSCRIPTS { + ext.args = "-v FS='\\t' -v OFS='\\t' '\$5-\$4 <= 10000 { print }'" + ext.suffix = "filtered.gtf" publishDir = [ - path: { "${params.outdir}/circrna_discovery/${meta.tool}" }, + path: { "${params.outdir}/quantification/transcriptome" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - pattern: "*.fasta" + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: TRANSCRIPTOME { + publishDir = [ + path: { "${params.outdir}/quantification/transcriptome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: MARK_CIRCULAR { + // GAWK process that marks FASTA headers. + // Leaves headers starting with "ENS" and non-header lines as is. + // Adds "\tC" to the end of the header for all other headers + ext.args = "-v FS='\\t' -v OFS='\\t' '{ if (!/^>circ_/) { print } else { print \$1 \"\\tC\" } }'" + ext.suffix = "marked.fasta" + publishDir = [ + path: { "${params.outdir}/quantification/transcriptome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } + ] + } + + withName: PSIRC_INDEX { + publishDir = [ + path: { "${params.outdir}/references/index/psirc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> ( filename != 'versions.yml' && params.save_reference ) ? filename : null } + ] + } + + withName: PSIRC_QUANT { + publishDir = [ + path: { "${params.outdir}/quantification/samples/${meta.id}/psirc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'CUSTOM_TX2GENE' { + publishDir = [ + path: { "${params.outdir}/quantification/transcriptome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: COUNTS_SINGLE { - ext.when = { params.module.split(',').contains('circrna_discovery') } + withName: TXIMETA_TXIMETA { publishDir = [ - path: { "${params.outdir}/circrna_discovery/" }, + path: { "${params.outdir}/quantification/samples/${meta.id}/tximeta" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: TXIMETA_TXIMPORT { + publishDir = [ + path: { "${params.outdir}/quantification/samples/${meta.id}/tximport" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: ".*:JOIN_(GENE|TX)_(COUNTS|TPM)" { + ext.args = "-f 1,2 -t" + label = "process_medium" + maxRetries = 3 + publishDir = [ + path: { "${params.outdir}/quantification/combined" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: ".*:SPLIT_TYPES_(COUNTS|TPM)" { + publishDir = [ + path: { "${params.outdir}/quantification/combined" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: MERGE_EXPERIMENTS { + publishDir = [ + path: { "${params.outdir}/quantification/combined" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:MIRNA_PREDICTION:DESEQ2_NORMALIZATION' { + publishDir = [ + path: { "${params.outdir}/mirna_prediction/mirna_expression" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - pattern: "*.txt" ] } - withName: COUNTS_COMBINED { - ext.when = { params.module.split(',').contains('circrna_discovery') } + withName: '.*:MIRNA_PREDICTION:MIRNA_FILTERING' { publishDir = [ - path: { "${params.outdir}/circrna_discovery/" }, + path: { "${params.outdir}/mirna_prediction/mirna_expression" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - pattern: "*.txt" ] } withName: TARGETSCAN_DATABASE { - ext.when = { params.module.split(',').contains('mirna_prediction') } + publishDir = [ + path: { "${params.outdir}/references/mirna_prediction/targetscan" }, + mode: params.publish_dir_mode, + saveAs: { filename -> ( filename != 'versions.yml' && params.save_reference ) ? filename : null }, + pattern: "mature.txt" + ] } withName: TARGETSCAN { - ext.when = { params.module.split(',').contains('mirna_prediction') } ext.prefix = { "${meta.id}.targetscan" } publishDir = [ - path: { "${params.outdir}/mirna_prediction/targetscan" }, + path: { "${params.outdir}/mirna_prediction/binding_sites/tools/targetscan/output" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, pattern: "*.txt" @@ -656,11 +991,10 @@ withName: MAPSPLICE_PARSE { } withName: MIRANDA { - ext.when = { params.module.split(',').contains('mirna_prediction') } ext.prefix = { "${meta.id}.miranda" } ext.args = "-strict" publishDir = [ - path: { "${params.outdir}/mirna_prediction/miranda" }, + path: { "${params.outdir}/mirna_prediction/binding_sites/tools/miranda/output" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, pattern: "*.txt" @@ -668,75 +1002,49 @@ withName: MAPSPLICE_PARSE { } withName: MIRNA_TARGETS { - ext.when = { params.module.split(',').contains('mirna_prediction') } publishDir = [ - path: { "${params.outdir}/mirna_prediction/${meta.tool}" }, + path: { "${params.outdir}/mirna_prediction/binding_sites/targets" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, pattern: "*.txt" ] } - // DIFF EXP - - withName: HISAT2_ALIGN { - ext.when = { params.module.split(',').contains('differential_expression') } - ext.args = ["", - "--dta", - "--no-spliced-alignment" - ].join(' ').trim() - } - - withName: SAMTOOLS_SORT { - ext.when = { params.module.split(',').contains('differential_expression') } - ext.prefix = { "${meta.id}.sorted" } + withName: COMBINE_BINDINGSITES { publishDir = [ - path: { "${params.outdir}/differential_expression/intermediates/hisat2/" }, + path: { "${params.outdir}/mirna_prediction/binding_sites/majority_vote" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates, - pattern: "*.bam" + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } ] } - withName: STRINGTIE_STRINGTIE { - ext.when = { params.module.split(',').contains('differential_expression') } - ext.args = "-e" + withName: MAJORITY_VOTE { publishDir = [ - path: { "${params.outdir}/differential_expression/intermediates/stringtie/" }, + path: { "${params.outdir}/mirna_prediction/binding_sites/majority_vote" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates ] } - withName: STRINGTIE_PREPDE { - ext.when = { params.module.split(',').contains('differential_expression') } + withName: '.*:MIRNA_PREDICTION:COMPUTE_CORRELATIONS' { publishDir = [ - path: { "${params.outdir}/differential_expression/RNA-Seq/" }, + path: { "${params.outdir}/mirna_prediction/correlation" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_intermediates ] } - withName: DESEQ2_DIFFERENTIAL_EXPRESSION { - ext.when = { params.module.split(',').contains('differential_expression') } + withName: CIRCTEST_PREPARE { publishDir = [ - path: { "${params.outdir}/differential_expression/" }, + path: { "${params.outdir}/statistical_tests/circtest" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null } ] } - withName: PREPARE_CLR_TEST { - ext.when = { params.module.split(',').contains('differential_expression') } - } - - withName: CIRCTEST { - ext.when = { params.module.split(',').contains('differential_expression') } + withName: CIRCTEST_CIRCTEST { publishDir = [ - path: { "${params.outdir}/differential_expression/circular_linear_ratio_test" }, + path: { "${params.outdir}/statistical_tests/circtest" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] diff --git a/conf/test.config b/conf/test.config index d778abc17..6c40a460c 100644 --- a/conf/test.config +++ b/conf/test.config @@ -15,20 +15,18 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = 6.GB - max_time = 48.h + max_cpus = 2 + max_memory = 6.GB + max_time = 6.h - // Input data for test data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/circrna/samples.csv' - fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/circrna/reference/chrI.fa' - gtf = 'https://raw.githubusercontent.com/nf-core/test-datasets/circrna/reference/chrI.gtf' - mature = 'https://raw.githubusercontent.com/nf-core/test-datasets/circrna/reference/mature.fa' - tool = 'circexplorer2' - phenotype = 'https://raw.githubusercontent.com/nf-core/test-datasets/circrna/phenotype.csv' - skip_trimming = false - module = 'circrna_discovery,mirna_prediction,differential_expression' - outdir = 'test_outdir/' - bsj_reads = 2 - species = 'cel' + // Test input data + input = "${params.test_data_base}/samples.csv" + fasta = "${params.test_data_base}/reference/chrI.fa" + gtf = "${params.test_data_base}/reference/chrI.gtf" + mature = "${params.test_data_base}/reference/mature.fa" + tools = "circexplorer2" + phenotype = "${params.test_data_base}/phenotype.csv" + skip_trimming = false + outdir = "results/" + bsj_reads = 2 } diff --git a/conf/test_full.config b/conf/test_full.config index a3b229adf..094a3f072 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -1,24 +1,2 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running full-size tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a full size pipeline test. - - Use as follows: - nextflow run nf-core/circrna -profile test_full, --outdir - ----------------------------------------------------------------------------------------- -*/ - -params { - config_profile_name = 'Full test profile' - config_profile_description = 'Full test dataset to check pipeline function' - - // Input data for full size test - // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' - - // Genome references - genome = 'R64-1-1' -} +includeConfig 'test.config' +includeConfig 'full.config' diff --git a/conf/test_igenomes.config b/conf/test_igenomes.config new file mode 100644 index 000000000..d23ddbe8f --- /dev/null +++ b/conf/test_igenomes.config @@ -0,0 +1,27 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests using igenomes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a minimal pipeline test. + + Use as follows: + nextflow run nf-core/circrna -profile test_full, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Minimal igenomes profile' + config_profile_description = 'Minimal igenomes test dataset to check pipeline function' + + // Input data for minima test using igenomes + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/circrna/samples.csv' + + genome = 'ce10' + tool = 'circexplorer2' + phenotype = 'https://raw.githubusercontent.com/nf-core/test-datasets/circrna/phenotype.csv' + skip_trimming = false + star = null // igenomes STAR version is not compatible + outdir = 'results/' + bsj_reads = 2 +} diff --git a/docs/images/Genomics-Data-Science-original.png b/docs/images/Genomics-Data-Science-original.png new file mode 100644 index 000000000..50e9c02b7 Binary files /dev/null and b/docs/images/Genomics-Data-Science-original.png differ diff --git a/docs/images/metro-map.png b/docs/images/metro-map.png new file mode 100644 index 000000000..2f4f8af89 Binary files /dev/null and b/docs/images/metro-map.png differ diff --git a/docs/images/nf-core-circrna_logo_dark.png b/docs/images/nf-core-circrna_logo_dark.png index 6af619007..b48e7b08c 100644 Binary files a/docs/images/nf-core-circrna_logo_dark.png and b/docs/images/nf-core-circrna_logo_dark.png differ diff --git a/docs/images/nf-core-circrna_logo_light.png b/docs/images/nf-core-circrna_logo_light.png index d3cd49c1f..611d5449b 100644 Binary files a/docs/images/nf-core-circrna_logo_light.png and b/docs/images/nf-core-circrna_logo_light.png differ diff --git a/docs/output.md b/docs/output.md index a3ed45d6d..f60c28a0e 100644 --- a/docs/output.md +++ b/docs/output.md @@ -2,219 +2,198 @@ ## Introduction -This documentation describes the output of `nf-core/circrna` for the test dataset which runs all 3 modules in the workflow: `circRNA discovery` , `miRNA prediction` and `differential expression` analysis of circular RNAs in RNA-Seq data. - -A full run of the workflow will produce the following directory output structure: - -```console -|-- results/ - |-- circrna_discovery - |-- differential_expression - |-- mirna_prediction - |-- pipeline_info - |-- quality_control - |-- reference_genome +This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report generated from the [full-sized test dataset](https://github.com/nf-core/test-datasets/tree/circrna) for the pipeline using a command similar to the one below: + +```bash +nextflow run nf-core/circrna -profile test_full, ``` -## Pipeline Overview - -The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: - -- [nf-core/circrna: Output](#nf-corecircrna-output) - - [Introduction](#introduction) - - [Pipeline Overview](#pipeline-overview) - - [Quality Control](#quality-control) - - [Sam to Fastq](#sam-to-fastq) - - [BBDUK](#bbduk) - - [DESeq2](#deseq2) - - [MultiQC](#multiqc) - - [Genome Index Files](#genome-index-files) - - [circRNA Quantification](#circrna-quantification) - - [CIRCexplorer2](#circexplorer2) - - [circRNA finder](#circrna-finder) - - [CIRIquant](#ciriquant) - - [DCC](#dcc) - - [Find circ](#find-circ) - - [MapSplice](#mapsplice) - - [STAR](#star) - - [Segemehl](#segemehl) - - [Count Matrix](#count-matrix) - - [miRNA Prediction](#mirna-prediction) - - [miRanda](#miranda) - - [TargetScan](#targetscan) - - [miRNA targets](#mirna-targets) - - [Circos Plot](#circos-plot) - - [Differential Expression Analysis](#differential-expression-analysis) - - [circRNA](#circrna) - - [Boxplots](#boxplots) - - [RNA-Seq](#rna-seq) +The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. + +- references: Indices for various tools and intermediate reference genome files +- preprocessing: Per-sample concatenated FASTQ files +- quality_control + - fastqc: FastQC reports for raw reads + - trimgalore: Trim Galore! reports for trimmed reads +- bsj_detection + - combined: Combined BSJ calls across all samples + - samples: Per sample BSJ calls + - tools: Per tool and sample BSJ calls +- quantification + - combined: Quantification results for linear and circular transcripts across samples + - samples: Per sample quantification results + - transcriptome: Combined linear and circular transcriptome, based on GTF file and detected BSJs +- mirna_prediction + - binding_sites + - correlation + - mirna_expression +- statistical_tests + - circtest +- multiqc +- pipeline_info ## Quality Control -### Sam to Fastq +### FastQC + +:::note +The FastQC plots displayed in the MultiQC report show _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. +:::
Output files -- `quality_control/SamToFastq` - - `*_R{1,2}.fq.gz`: Paired end fastq files, generated using `VALIDATION_STRINGENCY=LENIENT`. - -
+- `fastqc/` + - `*_fastqc.html`: FastQC report containing quality metrics. + - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. -`nf-core/circrna` can accept input BAM files generated from paired end sequencing reads (e.g `TCGA`) by invoking [picard](https://broadinstitute.github.io/picard/) `SamToFastq`, converting BAM files to paired end fastq files. +::: note +The FastQC plots in this directory are generated relative to the raw, input reads. They may contain adapter sequence and regions of low quality. +::: -### BBDUK + -
-Output files +[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). -- `quality_control/BBDUK/` - - `*_r{1,2}.trim.fq.gz`: Processed paired end fastq files. +![MultiQC - FastQC sequence counts plot](images/mqc_fastqc_counts.png) -
+![MultiQC - FastQC mean quality scores plot](images/mqc_fastqc_quality.png) -[BBDUK](https://jgi.doe.gov/data-and-tools/bbtools/bb-tools-user-guide/bbduk-guide/) (DUK - "Decontamination Using Kmers") is capable of performing adapter trimming, quality trimming/filtering and read length filtering (refer to BBDUK [parameter documentation](https://nf-co.re/circrna/dev/parameters#read-trimming--adapter-removal)) for the quality control of sequencing reads. `nf-core/circrna` will automatically output gzipped fastq files from `BBDUK` to minimise data usage. +![MultiQC - FastQC adapter content plot](images/mqc_fastqc_adapter.png) -### DESeq2 +### TrimGalore
Output files -- `quality_control/DESeq2_QC` +- `trimgalore/` + - `*.fq.gz`: If `--save_trimmed` is specified, FastQ files **after** adapter trimming will be placed in this directory. + - `*_trimming_report.txt`: Log file generated by Trim Galore!. +- `trimgalore/fastqc/` + - `*_fastqc.html`: FastQC report containing quality metrics for read 1 (_and read2 if paired-end_) **after** adapter trimming. + - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. + +
- - `circRNA/` +[Trim Galore!](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/) is a wrapper tool around Cutadapt and FastQC to peform quality and adapter trimming on FastQ files. By default, Trim Galore! will automatically detect and trim the appropriate adapter sequence. - - `DESeq2_condition_PCA.pdf`: PCA plot of PC1 vs. PC2 displaying the highest amount of variation within the response variable `condition`. -

- circRNA PCA -

+![MultiQC - cutadapt trimmed sequence length plot](images/mqc_cutadapt_trimmed.png) - - `DESeq2_dispersion.pdf`: Plot of re-fitted genes + gene outliers after shrinkage estimation performed by gene-wide maximum likelihood estimates (red curve) & maximum a posteriori estimates of dispersion. -

- circRNA dispersion -

+### MultiQC - - `DESeq2_sample_dendogram.pdf`: Dendogram displaying sample distances using [pvclust](https://cran.r-project.org/web/packages/pvclust/index.html). -

- circRNA dendo -

+
+Output files - - `DESeq2_sample_heatmap.pdf`: Heatmap displaying Manhattan distance between samples. -

- circRNA samplehm -

+- `quality_control/MultiQC/` + - `Raw_Reads_MultiQC.html`: Summary reports of unprocessed RNA-Seq reads. + - `Trimmed_Reads_MultiQC.html`: Summary reports of processed RNA-Seq reads. - - `RNA-Seq/` +
- - `DESeq2_condition_PCA.pdf`: PCA plot of PC1 vs. PC2 displaying the highest amount of variation within the response variable `condition`. -

- circRNA PCA -

+[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarising all samples in your project. `nf-core` outputs HTML reports for sequencing read quality control. - - `DESeq2_dispersion.pdf`: Plot of re-fitted genes + gene outliers after shrinkage estimation performed by gene-wide maximum likelihood estimates (red curve) & maximum a posteriori estimates of dispersion. -

- circRNA dispersion -

+## Reference files - - `DESeq2_sample_dendogram.pdf`: Dendogram displaying sample distances using [pvclust](https://cran.r-project.org/web/packages/pvclust/index.html). -

- circRNA dendo -

+
+Output files - - `DESeq2_sample_heatmap.pdf`: Heatmap displaying Manhattan distance between samples. -

- circRNA samplehm -

+- `references` + - `index` + - `bowtie`: Directory containing `Bowtie` indices. + - `bowtie2`: Directory containing `Bowtie2` indices. + - `bwa`: Directory containing `BWA` indices. + - `fasta`: Directory containing FASTA index (`.fai`). + - `hisat2`: Directory containing `HISAT2` indices. + - `segemehl`: Directory containing `Segemehl` index file. + - `star`: Directory containing `STAR` indices. + - `genome` + - `clean_fasta`: Directory containing a FASTA file with reduced headers, since MapSplice has problems with multiple header fields. + - `filtered_gtf`: Directory containing a GTF file with only entries that reside on chromosomes present in the reference FASTA file. + - `chromosomes`: Directory containing individual FASTA files for each chromosome. + - `bsh_detection` + - `circexplorer2`: Directory containing the `CIRCexplorer2` annotation file. + - `mapsplice`: Directory containing the `MapSplice` annotation file. + - `mirna_prediction` + - `targetscan`: Directory containing the TargetScan miRNA database.
-`nf-core/circrna` outputs quality control plots of normalised _log2_ expression data from `DESeq2` to assess heterogeneity in the experiment samples. These plots can be useful to assess sample-sample similarity and to identify potential batch effects within the experiment. Plots are generated for both circRNAs and RNA-Seq data when the differential expression analysis module has been selected by the user (see `--module` [documentation](https://nf-co.re/circrna/dev/parameters#pipeline-options)). +nf-core/circrna will add the reference files to the output directory if `save_reference` is set to `true`. The resulting files, especially the aligner indices, can be used for speeding up future runs (if the `resume` option cannot be used). In order to achieve this, copy the indices to a location outside of the pipeline's output directory and provide the path to the indices via the corresponding aligner flags (check the [parameters documentation](https://nf-co.re/circrna/parameters/#reference-genome-options) for more information). -### MultiQC +## Pipeline info
Output files -- `quality_control/MultiQC/` - - `Raw_Reads_MultiQC.html`: Summary reports of unprocessed RNA-Seq reads. - - `Trimmed_Reads_MultiQC.html`: Summary reports of processed RNA-Seq reads. +- `pipeline_info` + - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. + - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. + - Parameters used by the pipeline run: `params.json`.
-[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarising all samples in your project. `nf-core` outputs HTML reports for sequencing read quality control. +## BSJ detection + +The rough workflow for the BSJ detection looks like this: + +1. Each tool detects BSJs in each sample and quantifies how many reads support each BSJ. +2. Bring the tool outputs into a common format. +3. Apply a threshold (parameter `bsj_reads`) to the BSJ reads to filter out lowly supported BSJs. +4. Combine all tool-specific BSJ calls per sample into a single file. +5. Filter out BSJs that are not supported by at least as many tools as specified by`tool_filter`. +6. Merge all samples into a single file. This now represents the "circular transcriptome" -## Genome Index Files +### Per tool
-Output files +Output files available for all tools -- `reference_genome` - - `BowtieIndex/`: Directory containing `Bowtie` indices. - - `Bowtie2Index/`: Directory containing `Bowtie2` indices. - - `BWAIndex/`: Directory containing `BWA` indices. - - `Hisat2Index/`: Directory containing `HISAT2` indices. - - `SAMtoolsIndex`: Directory containing `SAMtools` index file. - - `STARIndex`: Directory containing `STAR` indices. - - `SegemehlIndex`: Directory containing `Segemehl` index file. +- `unified`: Directory containing the BSJ calls in the BED6 format. +- `filtered`: Based on `unified`, but filtered for BSJs with at least `bsj_reads` supporting reads. +- `masked`: Based on `filtered`, but scores are replaced by a dot (.) +- `annotated`: Based on `masked`, but with additional columns for the circRNA type, the host gene(s), host transcript(s) and potential database hits. Contains a BED and a GTF file for each sample. +- `fasta`: Extracted sequences of the circRNAs in FASTA format. Based on `masked`. +- `intermediates`: Contains intermediate files generated by the BSJ detection tools, as explained below. +- `${tool}.csv`: Number of reads that the tool found supporting the BSJ.
-`nf-core/circrna` will save genome indices when `--save_reference true`. This is highly encouraged to reduce runtimes on redeployment of the workflow. - -## circRNA Quantification +An exemption of the above is `star`, which is not used as a standalone BSJ detection tool, but the output of a 2-pass STAR alignment is used by `CIRCexplorer2`, `circRNA finder` and `DCC`. -### CIRCexplorer2 +#### CIRCexplorer2
Output files -- `circrna_discovery/CIRCexplorer2/intermediates/${sample_id}/` - - - `*.STAR.junction.bed`: Intermediate file generated by `CIRCexplorer2 parse` module, identifying STAR fusion junctions for downstream annotation. +- `bsj_detection/tools/circexplorer2/intermediates/${sample_id}/` + - `*.bed`: Intermediate file generated by `CIRCexplorer2 parse` module, identifying STAR fusion junctions for downstream annotation. - `*.txt`: Output files generated by `CIRCexplorer2 annotate` module, based on BED 12 format containing circRNA genomic location information, exon cassette composition and an additional 6 columns specifying circRNA annotations. Full descriptions of the 18 columns can be found in the `CIRCexplorer2` [documentation](https://circexplorer2.readthedocs.io/en/latest/modules/annotate/#output). -- `circrna_discovery/CIRCexplorer2/${sample_id}/` - - `${sample_id}.bed`: Filtered, annotated circRNAs in customised BED12 format. - - `fasta/`: Mature spliced length circRNA FASTA sequences. -
[CIRCexplorer2](https://circexplorer2.readthedocs.io/en/latest/) uses `*.Chimeric.out.junction` files generated from `STAR` 2 pass mode to extract back-splice junction sites using the `CIRCexplorer2 parse` module. Following this, `CIRCexplorer2 annotate` performs re-alignment of reads to the back-splice junction sites to determine the precise positions of downstream donor and upstream acceptor splice sites. Back-splice junction sites are subsequently updated and annotated using the customised annotation text file. -### circRNA finder +#### circRNA finder
Output files -- `circrna_discovery/circRNA_Finder/intermediates/${sample_id}/` +- `bsj_detection/tools/circrna_finder/intermediates/${sample_id}/` - - `*.Aligned.sortedByCoord.out.bam`: Coordinate sorted bam file containing aligned reads and chimeric reads. - - `*.Chimeric.out.junction`: Each line contains the details of chimerically aligned reads. Full descriptions of columns can be found in `STAR` [documentation](https://physiology.med.cornell.edu/faculty/skrabanek/lab/angsd/lecture_notes/STARmanual.pdf) (section 5.4). - - `*.Chimeric.out.sam`: Chimeric alignments in SAM format. - - `*.Log.final.out`: Summary mapping statistics after mapping job is complete, useful for quality control. The statistics are calculated for each read (single- or paired-end) and then summed or averaged over all reads. - - `*.Log.out`: Main log file with a lot of detailed information about the run. This file is most useful for troubleshooting and debugging. - - `*.Log.progress.out`: Reports job progress statistics, such as the number of processed reads, % of mapped reads etc. - - `*.SJ.out.tab`: High confidence collapsed splice junctions in tab-delimited form. Full description of columns can be found in `STAR` [documentation](https://physiology.med.cornell.edu/faculty/skrabanek/lab/angsd/lecture_notes/STARmanual.pdf) (section 4.4). - - `*.Chimeric.out.sorted.{bam,bam.bai}`: (Sorted and indexed) bam file with all chimeric reads identified by STAR. The circRNA junction spanning reads are a subset of these. - `*.filteredJunctions.bed`: A bed file with **all** circular junctions found by the pipeline. The score column indicates the number reads spanning each junction. - `*.s_filteredJunctions.bed`: A bed file with those junctions in `*.filteredJunctions.bed` that are flanked by GT-AG splice sites. The score column indicates the number reads spanning each junction. - `*.s_filteredJunctions_fw.bed`: A bed file with the same circular junctions as in file (b), but here the score column gives the average number of forward spliced reads at both splice sites around each circular junction. -- `circrna_discovery/circRNA_Finder/${sample_id}/ - - `${sample_id}.bed`: Filtered, annotated circRNAs in customised BED12 format. - - `fasta/`: Mature spliced length circRNA FASTA sequences. -
-[circRNA finder](https://github.com/orzechoj/circRNA_finder) uses `*.Chimeric.out.sam`, `*.Chimeric.out.junction` & `*.SJ.out.tab` files to identify circular RNAs in RNA-Seq data. +[circRNA finder](https://github.com/orzechoj/circRNA_finder) uses `*.Chimeric.out.sam`, `*.Chimeric.out.junction` & `*.SJ.out.tab` from STAR 2nd pass files to identify circular RNAs in RNA-Seq data. -### CIRIquant +#### CIRIquant
Output files -- `circrna_discovery/CIRIquant/intermediates/${sample_id}/` - +- `bsj_detection/tools/ciriquant/intermediates/${sample_id}/` - `*.log`: A `CIRIerror.log` file which should be empty, and a `${sample_id}.log` file which contains the output log of `CIRIquant`. - `*.bed`: `CIRI2` output file in BED 6 format. - `*.gtf`: Output file from `CIRIquant` in GTF format. Full description of the columns available in the `CIRIquant` [documentation](https://ciriquant-cookbook.readthedocs.io/en/latest/quantification.html#output-format). @@ -226,29 +205,17 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - `*_index.*.ht2`: `BWA` index files of the pseudo reference. - `*_index.fa`: Reference FASTA file of candidate circular reads. -- `circrna_discovery/CIRIquant/${sample_id}/`` - - `${sample_id}.bed`: Filtered, annotated circRNAs in customised BED12 format. - - `fasta/`: Mature spliced length circRNA FASTA sequences. -
[CIRIquant](https://github.com/Kevinzjy/CIRIquant) operates by aligning RNA-Seq reads using `HISAT2` and [CIRI2](https://sourceforge.net/projects/ciri/files/CIRI2/) to identify putative circRNAs. Next, a pseudo reference index is generated using `bwa index` by concatenating the two full-length sequences of the putative back-splice junction regions. Candidate circular reads are re-aligned against this pseudo reference using `bwa mem`, and back-splice junction reads are determined if they can be linearly and completely aligned to the putative back-splice junction regions. -### DCC +#### DCC
Output files -- `/circrna_discovery/DCC/intermediates/${sample_id}/` - - - `*CircCoordinates`: Circular RNA annotations in BED format. Full description of the columns are available in the `DCC` [documentation](https://github.com/dieterich-lab/DCC#output-files-generated-by-dcc). - - `*CircRNACount`: A table containing read counts for circRNAs detected. - - `mate1/`: Output directory of STAR 2nd pass alignment for R1. - - `mate2/`: Output directory of STAR 2nd pass alignment for R2. - -- `circrna_discovery/DCC/${sample_id}/` - - `${sample_id}.bed`: Filtered, annotated circRNAs in customised BED12 format. - - `fasta/`: Mature spliced length circRNA FASTA sequences. +- `/bsj_detection/tools/dcc/intermediates/${sample_id}/` + - `*.txt`: Output file from `DCC` containing position and BSJ read counts of circRNAs.
@@ -257,28 +224,23 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d `DCC` then performs a series of filtering steps on candidate circular reads: 1. Mapping of mates must be consistent with a circular RNA template i.e align to the back-splice junction. -2. Filtering by a minimum number of junction reads per replicate (`nf-core/circrna` has set this parameter to`-Nr 1 1` allowing all reads). +2. Filtering by a minimum number of junction reads per replicate (nf-core/circrna has set this parameter to`-Nr 1 1` allowing all reads). 3. Circular reads are not allowed span more than one gene. 4. Circular reads aligning to mitochondrial genome are removed. 5. Circular reads that lack a canonical (GT/AG) splicing signal at the circRNA junction borders are removed. -### Find circ +#### Find circ
Output files -- `circrna_discovery/find_circ/intermediates/${sample_id}/` - +- `bsj_detection/tools/find_circ/intermediates/${sample_id}/` - `*_anchors.qfa.gz`: 20mer anchors extracted from unmapped reads. - `*_unmapped.bam`: Unmapped RNA-Seq reads to reference genome. - `*.sites.bed`: Output from `find_circ`, first six columns are in standard BED format. A description of the remaining columns is available in the `find_circ` [documentation](https://github.com/marvin-jens/find_circ#output-format). - `*.sites.log`: Summary statistics of candidate circular reads in the sample. - `*.sites.reads`: Tab delimited file containing circRNA ID & sequence. -- `circrna_discovery/find_circ/${sample_id}/` - - `${sample_id}.bed`: Filtered, annotated circRNAs in customised BED12 format. - - `fasta/`: Mature spliced length circRNA FASTA sequences. -
[find circ](https://github.com/marvin-jens/find_circ) utilises `Bowtie2` short read mapper to align RNA-Seq reads to the genome. Reads that align fully and contiguously are discarded. Unmapped reads are converted to 20mers and aligned independently to find unique anchor positions within spliced exons - anchors that align in reverse orientation indicate circular RNA junctions. Anchor alignments are extended and must meet the following criteria: @@ -289,13 +251,12 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d 4. Breakpoint cannot reside more than 2nt inside a 20mer anchor. 5. 2 reads must support the junction. -### MapSplice +#### MapSplice
Output files -- `circrna_discovery/MapSplice/intermediates/${sample_id}/` - +- `bsj_detection/tools/mapsplice/intermediates/${sample_id}/` - `alignments.bam`: Bam file containing aligned reads and fusion alignments. - `deletions.txt`: Report of deletions. - `Fusion output files`: @@ -308,283 +269,245 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - `junctions.txt`: Reported splice junctions. - `stats.txt`: Read alignment, Junction statistics. -- `circrna_discovery/MapSplice/${sample_id}/` - - `${sample_id}.bed`: Filtered, annotated circRNAs in customised BED12 format. - - `fasta/`: Mature spliced length circRNA FASTA sequences. -
[MapSplice](http://www.netlab.uky.edu/p/bioinfo/MapSplice2) first splits reads into segments, and maps them to reference genome by using `Bowtie`. `MapSplice` attempts to fix unmapped segments as gapped alignments, with each gap corresponding to a splice junction. Finally a remapping step is used to identify back-spliced alignments that are in the presence of small exons. -### STAR +#### Segemehl
Output files -- `circrna_discovery/STAR/1st_Pass/${sample_id}/` - - - `*.Aligned.sortedByCoord.out.bam`: Coordinate sorted bam file containing aligned reads and chimeric reads. - - `*.Chimeric.out.junction`: Each line contains the details of chimerically aligned reads. Full descriptions of columns can be found in `STAR` [documentation](https://physiology.med.cornell.edu/faculty/skrabanek/lab/angsd/lecture_notes/STARmanual.pdf) (section 5.4). - - `*.Chimeric.out.sam`: Chimeric alignments in SAM format. - - `*.Log.final.out`: Summary mapping statistics after mapping job is complete, useful for quality control. The statistics are calculated for each read (single- or paired-end) and then summed or averaged over all reads. - - `*.Log.out`: Main log file with a lot of detailed information about the run. This file is most useful for troubleshooting and debugging. - - `*.Log.progress.out`: Reports job progress statistics, such as the number of processed reads, % of mapped reads etc. - - `*.SJ.out.tab`: High confidence collapsed splice junctions in tab-delimited form. Full description of columns can be found in `STAR` [documentation](https://physiology.med.cornell.edu/faculty/skrabanek/lab/angsd/lecture_notes/STARmanual.pdf) (section 4.4). - -- `circrna_discovery/STAR/2nd_Pass/${sample_id}/` - - - `*.Aligned.sortedByCoord.out.bam`: Coordinate sorted bam file containing aligned reads and chimeric reads. - - `*.Chimeric.out.junction`: Each line contains the details of chimerically aligned reads. Full descriptions of columns can be found in `STAR` [documentation](https://physiology.med.cornell.edu/faculty/skrabanek/lab/angsd/lecture_notes/STARmanual.pdf) (section 5.4). - - `*.Chimeric.out.sam`: Chimeric alignments in SAM format. - - `*.Log.final.out`: Summary mapping statistics after mapping job is complete, useful for quality control. The statistics are calculated for each read (single- or paired-end) and then summed or averaged over all reads. - - `*.Log.out`: Main log file with a lot of detailed information about the run. This file is most useful for troubleshooting and debugging. - - `*.Log.progress.out`: Reports job progress statistics, such as the number of processed reads, % of mapped reads etc. - - `*.SJ.out.tab`: High confidence collapsed splice junctions in tab-delimited form. Full description of columns can be found in `STAR` [documentation](https://physiology.med.cornell.edu/faculty/skrabanek/lab/angsd/lecture_notes/STARmanual.pdf) (section 4.4). - -- `circrna_discovery/STAR/SJFile/` - - `*.SJFile.tab`: Chromosome, start, end & strand coordinates of novel splice junctions. +- `bsj_detection/tools/segemehl/intermediates/${sample_id}/` + - `*.bam`: Aligned reads in BAM format + - `*.mult.bed`: Thus, this bed file contains all splice events of a read. The start and end positions indicate the nucleotide after the first split (i.e. the beginning of the first intron) and the nucleotide before the last split (i.e. the end of the last intron), respectively. The name and score are equivalent to the one in the \*.sngl file described above. The following fields 7 & 8 (thickStart and thickEnd) should be the identical to fields 2 & 3. Field 9 holds the color information for the item in RGB encoding (itemRGB). Field 10 (blockCount) indicates the number of splits represented by the BED item. Field 11 is a comma separated list of the intron sizes (blockSizes). Field 12 is the comma separated list of intron starts (blockStarts). + - `*.sngl.bed`: The bed file contains all single splice events predicted in the split read alignments. + - `*.trns.bed`: The custom text file contains all single split alignments predicted to be in trans, i.e. split alignments that are located on different chromosomes and/or different strands.
-[STAR](https://github.com/alexdobin/STAR) can characterise novel splice junctions in RNA-Seq data by specifying `--ChimOutType Junctions`, with reported novel junctions written to a `*SJ.out.tab` file (per sample). Following the initial `STAR` alignment, a 2nd pass strategy is employed whereby **all** `*SJ.out.tab` files from RNA-Seq samples are converted to `*SJFile.tab` files of novel junction coordinates and provided during the 2nd alignment step via `--sjdbFileChrStartEnd`. - -This achieves the highest sensitivity for novel junction alignment. For instance, if there is a novel junction that's highly expressed (many reads, confident detection) in the wild-type, but only weakly expressed (few reads) in the experimental group, by using junctions detected in all samples for the 2nd pass, `STAR` will detect lowly expressed spliced reads in the experimental group. +`Segemehl` implements split read alignment mode for reads that failed the attempt of collinear alignment. The algorithm will consider circular alignments. Circular splits are output to `${sample_id}.sngl.bed` and parsed using customised scripts to produce counts representative of `Segemehl` quantification. -### Segemehl +#### STAR
Output files -- `circrna_discovery/Segemehl/intermediates/${sample_id}/` - - `*.bam`: Aligned reads in BAM format - - `*_collapsed.bed`: Segemehl circRNA counts in minimal BED 6 format - - `*.mult.bed`: Thus, this bed file contains all splice events of a read. The start and end positions indicate the nucleotide after the first split (i.e. the beginning of the first intron) and the nucleotide before the last split (i.e. the end of the last intron), respectively. The name and score are equivalent to the one in the \*.sngl file described above. The following fields 7 & 8 (thickStart and thickEnd) should be the identical to fields 2 & 3. Field 9 holds the color information for the item in RGB encoding (itemRGB). Field 10 (blockCount) indicates the number of splits represented by the BED item. Field 11 is a comma separated list of the intron sizes (blockSizes). Field 12 is the comma separated list of intron starts (blockStarts). - - `*.sngl.bed`: The bed file contains all single splice events predicted in the split read alignments. - - `*.trns.bed`: The custom text file contains all single split alignments predicted to be in trans, i.e. split alignments that are located on different chromosomes and/or different strands. +- `bsj_detection/tools/star` + - `1st_pass` + - `*.Aligned.out.bam`: Coordinate sorted bam file containing aligned reads and chimeric reads. + - `*.Chimeric.out.junction`: Each line contains the details of chimerically aligned reads. Full descriptions of columns can be found in `STAR` [documentation](https://physiology.med.cornell.edu/faculty/skrabanek/lab/angsd/lecture_notes/STARmanual.pdf) (section 5.4). + - `*.Log.final.out`: Summary mapping statistics after mapping job is complete, useful for quality control. The statistics are calculated for each read (single- or paired-end) and then summed or averaged over all reads. + - `*.Log.out`: Main log file with a lot of detailed information about the run. This file is most useful for troubleshooting and debugging. + - `*.Log.progress.out`: Reports job progress statistics, such as the number of processed reads, % of mapped reads etc. + - `*.SJ.out.tab`: High confidence collapsed splice junctions in tab-delimited form. Full description of columns can be found in `STAR` [documentation](https://physiology.med.cornell.edu/faculty/skrabanek/lab/angsd/lecture_notes/STARmanual.pdf) (section 4.4). + - `2nd_pass` + - `*.Aligned.out.bam`: Coordinate sorted bam file containing aligned reads and chimeric reads. + - `*.Chimeric.out.junction`: Each line contains the details of chimerically aligned reads. Full descriptions of columns can be found in `STAR` [documentation](https://physiology.med.cornell.edu/faculty/skrabanek/lab/angsd/lecture_notes/STARmanual.pdf) (section 5.4). + - `*.Chimeric.out.sam`: Chimeric alignments in SAM format. + - `*.Log.final.out`: Summary mapping statistics after mapping job is complete, useful for quality control. The statistics are calculated for each read (single- or paired-end) and then summed or averaged over all reads. + - `*.Log.out`: Main log file with a lot of detailed information about the run. This file is most useful for troubleshooting and debugging. + - `*.Log.progress.out`: Reports job progress statistics, such as the number of processed reads, % of mapped reads etc. + - `*.SJ.out.tab`: High confidence collapsed splice junctions in tab-delimited form. Full description of columns can be found in `STAR` [documentation](https://physiology.med.cornell.edu/faculty/skrabanek/lab/angsd/lecture_notes/STARmanual.pdf) (section 4.4). + - `sjdb` + - `dataset.SJ.out.tab`: Chromosome, start, end & strand coordinates of novel splice junctions for **all samples** aligned using STAR 1st pass.
-`Segemehl` implements split read alignment mode for reads that failed the attempt of collinear alignment. The algorithm will consider circular alignments. Circular splits are output to `${sample_id}.sngl.bed` and parsed using customised scripts to produce counts representative of `Segemehl` quantification. +STAR in 2-pass mode is used to identify novel splice junctions in RNA-Seq data. The first pass of STAR is used to generate a genome index and align reads to the reference genome. The second pass of STAR uses the splice junctions identified in the first pass to align reads to the reference genome. This does not increase the number of detected novel junctions, but allows for more sensitive detection of splice reads mapping to novel junctions. -### Count Matrix +### Per sample
Output files -- `circrna_discovery/` - - `count_matrix.txt`: Raw circRNA read counts for all samples in matrix format. +- `bsj_detection/samples/${sample_id}/` + - `*.grouped.bed`: Grouped BSJ calls in BED format. Score column represents the number of tools that support the BSJ. + - `*.filtered.bed`: Based on `*.grouped.bed`, but filtered for BSJs with at least `tool_filter` supporting tools. + - `*.intersect_gtf.bed`: Intersection of `*.filtered.bed` with the reference GTF file. Intermediate file for annotation. + - `*.intersect_database.bed`: Intersection of `*.filtered.bed` with the database BED file. Intermediate file for annotation. + - `*.annotated.bed`: Annotated BSJ calls in BED format, based on `*.filtered.bed`. + - `*.annotated.gtf`: Annotated BSJ calls in GTF format, based on `*.filtered.bed`. + - `*.fa`: Extracted sequences of the circRNAs in FASTA format, based on `*.filtered.bed`. + - `*.upset.png`: Sample-specific upset plot of BSJ calls across tools.
-`nf-core/circrna` produces a counts matrix of circRNA read counts for each sample. circRNAs with BSJ reads < `--bsj_reads ` have been removed during the quantification step, with a further filtering step included depending on the number of quantification tools selected. If the user has selected more than one circRNA quantification tool, `nf-core/circrna` will demand that a circRNA be called by at least two quantification tools or else it is removed. This approach is recommended to reduce the number of false positives. - -## miRNA Prediction +nf-core/circrna produces a sample-specific set of BSJ calls. The BSJ calls are filtered for BSJs with at least `tool_filter` supporting tools. The filtered BSJ calls are then annotated with the reference GTF file and the database BED file. An upset plot is generated to visualise the overlap of BSJ calls across tools. -### miRanda +### Combined
Output files -- `mirna_prediction/miRanda/${sample_id}/` - - `*.miRanda.txt`: Raw outputs from `miRanda`. +- `bsj_detection/combined/` + - `*.combined.bed`: Unique BSJ calls across samples in BED format. + - `*.intersect_gtf.bed`: Intersection of `*.filtered.bed` with the reference GTF file. Intermediate file for annotation. + - `*.intersect_database.bed`: Intersection of `*.filtered.bed` with the database BED file. Intermediate file for annotation. + - `*.annotated.bed`: Annotated BSJ calls in BED format, based on `*.filtered.bed`. + - `*.annotated.gtf`: Annotated BSJ calls in GTF format, based on `*.filtered.bed`. + - `*.fa`: Extracted sequences of the circRNAs in FASTA format, based on `*.filtered.bed`. + - `*.upset.png`: Combined upset plot of BSJ calls across samples.
-[miRanda](http://cbio.mskcc.org/miRNA2003/miranda.html) performs miRNA target prediction of a genomic sequence against a miRNA database in 2 phases: +nf-core/circrna combines the sample-specific BSJ calls into a single file. The filtered BSJ calls are then annotated with the reference GTF file and the database BED file. An upset plot is generated to visualise the overlap of BSJ calls across tools. -1. First a dynamic programming local alignment is carried out between the query miRNA sequence and the reference sequence. This alignment procedure scores based on sequence complementarity and not on sequence identity. -2. Secondly, the algorithm takes high-scoring alignments detected from phase 1 and estimates the thermodynamic stability of RNA duplexes based on these alignments. This second phase of the method utilises folding routines from the `RNAlib` library, part of the [ViennaRNA](https://www.tbi.univie.ac.at/RNA/) package. +## Quantification -### TargetScan +Since we now know the BSJ locations, we can now quantify their expression by mapping the reads to the region between the BSJ start and end coordinates. As each read can potentially originate from both linear and circular transcripts, the pipeline performs a joint quantification of the linear and circular transcriptome. +The quantification is performed using psirc-quant, which is a wrapper around `kallisto`. It allows for inferential-uncertainty aware quantification of linear and circular transcripts. + +### Transcriptome
Output files -- `mirna_prediction/TargetScan/${sample_id}/` - - `*.targetscan.txt`: Raw outputs from `TargetScan`. +- `quantification/transcriptome/` + - `*.combined.gtf`: Combined linear and circular transcriptome in GTF format. + - `*.filtered.gtf`: Filtered linear and circular transcriptome in GTF format, based on `*.combined.gtf`. + - `*.fasta`: Combined linear and circular transcriptome in FASTA format, based on `*.filtered.gtf`. + - `*.marked.fasta`: Transcript sequences in FASTA format with the circRNA sequences marked with a `C` field in the header. + - `*.tx2gene.tsv`: Transcript to gene mapping file.
-[TargetScan](http://www.targetscan.org/vert_72/) predicts biological targets of miRNAs by searching for the presence of conserved 8mer, 7mer, and 6mer sites within the circRNA mature sequence that match the seed region of each miRNA. - -### miRNA targets +### Per sample
Output files -- `mirna_prediction/${sample_id}/` - - `*_miRNA_targets.txt`: Filtered target miRNAs of circRNAs called by quantification tools. Columns are self explanatory: miRNA, Score, Energy_KcalMol, Start, End, Site_type. +- `quantification/samples/${sample_id}/` + - `psirc` + - `*.abundance.h5`: Abundance estimates in HDF5 format. + - `*.abundance.tsv`: Abundance estimates in TSV format. + - `*.run_info.json`: Run information in JSON format. + - `pseudoalignments.bam`: Pseudoalignments in BAM format. + - `pseudoalignments.bai`: Index file for pseudoalignments. + - `tximeta/` + - `*.rds`: RDS file containing the the sample-specific transcript quantification data. + - `tximport/` + - `*.gene_counts_length_scaled.tsv`: Gene counts scaled by transcript length. + - `*.gene_counts_scaled.tsv`: Gene counts scaled by library size. + - `*.gene_counts.tsv`: Gene counts. + - `*.gene_lengths.tsv`: Gene lengths. + - `*.gene_tpm.tsv`: Gene TPM values. + - `*.transcript_counts.tsv`: Transcript counts. + - `*.transcript_lengths.tsv`: Transcript lengths. + - `*.transcript_tpm.tsv`: Transcript TPM values.
-`nf-core/circrna` performs miRNA target filtering on `miRanda` and `TargetScan` predictions: - -1. miRNA must be called by both `miRanda` and `TargetScan`. -2. If a site within the circRNA mature sequence shares duplicate miRNA ID's overlapping the same coordinates, the miRNA with the highest score is kept. +nf-core/circrna performs quantification of linear and circular transcripts using `psirc-quant`. The quantification results are stored in HDF5 and TSV format. The pipeline also generates a `tximeta` RDS file containing the sample-specific transcript quantification data. The `tximport` directory contains gene and transcript counts, lengths and TPM values. -### Circos Plot +### Combined
Output files -- `mirna_prediction/${sample_id}/` - - `*_miRNA_Plot.pdf`: Circos plot of mature spliced circRNA sequence with exon boundaries where applicable, displaying miRNA binding sites. -

- circRNA - miRNA circos plot -

+- `quantification/combined/` + - `gene_counts.csv`: Count matrix of genes across samples. + - `gene_tpm.csv`: TPM matrix of genes across samples. + - `tx_counts.csv`: Count matrix of transcripts across samples. + - `tx_tpm.csv`: TPM matrix of transcripts across samples. + - `linear.tsv`: Count matrix of linear transcripts across samples. + - `circular.tsv`: Count matrix of circular transcripts across samples. + - `experiments.merged.rds`: RDS file containing a SummarizedExperiment with the merged transcript quantification data.
-`nf-core/circrna` plots the filtered miRNA targets given using a circos plot, displaying the miRNA response elements along the mature circRNA sequence. Please note this plot becomes overcrowded when plotting `EIciRNAs` due to their highly variable sequence length (in contrast to `circRNAs` and `ciRNAs` which typically fall within the range of 100 - 1000nt). Therefore `EIciRNAs` with large mature spliced lengths should be considered as potentially spurious calls. - -## Differential Expression Analysis +nf-core/circrna combines the sample-specific quantification results into proper count matrices. It also generates an RDS file containing a SummarizedExperiment with the merged transcript quantification data. -`nf-core/circrna` will perform differential expression analysis by contrasting every variable within the `condition` column i.e the response variable. +## miRNA Prediction -| samples | condition | -| ------------- | --------- | -| control_rep1 | control | -| control_rep2 | control | -| control_rep3 | control | -| lung_rep1 | lung | -| lung_rep2 | lung | -| lung_rep3 | lung | -| melanoma_rep1 | melanoma | -| melanoma_rep2 | melanoma | -| melanoma_rep3 | melanoma | +### Binding Sites -The above experimental design will produce the `DESeq2` design formula `~ condition` and loop through the nested factors within `condition` producing outputs for `control_vs_lung`, `control_vs_melanoma`, `lung_vs_control`, `lung_vs_melanoma`, `melanoma_vs_control` and `melanoma_vs_lung`, capturing every possible contrast. +#### Tools -_N.B:_ In the phenotype file the response variable must be called `condition`, these values are hard-coded in the automated differential expression analysis R script. +This section contains predicted binding sites for miRNA-target interactions generated by various computational tools. +Each tool utilizes unique algorithms and criteria to identify potential miRNA binding sites on target genomic sequences, providing complementary insights into miRNA regulatory networks. -### circRNA +##### miRanda
Output files -- `differential_expression/circRNA/` - - - `DESeq2_log2_transformed_counts.txt`: _log2(Normalised counts + 1)_ - - - `DESeq2_normalized_counts.txt`: Normalised circRNA counts. - - - `control_vs_lung/` +- `mirna_prediction/bindingsites/tools/miranda/output` + - `*.miranda.txt`: Raw predictions from `miRanda`. +- `mirna_prediction/bindingsites/tools/miranda/unified` + - `*.miranda.tsv`: Unified predictions from `miRanda`. - - `DESeq2_{control_vs_lung}_Adj_pvalue_distribution.pdf`: Histogram of Adj pvalues from `results(dds)` displaying the distribution of circRNAs that reject the null hypothesis (padj <= 0.05). -

- circRNA adj-p histogram -

- - - `DESeq2_{control_vs_lung}_down_regulated_differential_expression.txt`: DESeq2 `results()` output filtered to include down regulated circRNAs (fold change <= -1, pvalue <= 0.05) in `condition` with respect to `control`. - - - `DESeq2_{control_vs_lung}_fold_change_distribution.pdf`: Histogram of fold-change values for differentially expressed circRNAs. -

- circRNA FC histogram -

+
- - `DESeq2_{control_vs_lung}_heatmap.pdf`: Heatmap of all differentially expressed circRNAs. -

- circRNA heatmap -

+[miRanda](http://cbio.mskcc.org/miRNA2003/miranda.html) performs miRNA target prediction of a genomic sequence against a miRNA database in 2 phases: - - `DESeq2_{control_vs_lung}_MA_plot.pdf`: Plot of the relationship between intensity and difference between the contrast made by `DESeq2`. -

- circRNA heatmap -

+1. First a dynamic programming local alignment is carried out between the query miRNA sequence and the reference sequence. This alignment procedure scores based on sequence complementarity and not on sequence identity. +2. Secondly, the algorithm takes high-scoring alignments detected from phase 1 and estimates the thermodynamic stability of RNA duplexes based on these alignments. This second phase of the method utilises folding routines from the `RNAlib` library, part of the [ViennaRNA](https://www.tbi.univie.ac.at/RNA/) package. - - `DESeq2_{control_vs_lung}_pvalue_distribution.pdf`: Histogram of pvalues from `results(dds)` displaying the distribution of circRNAs that reject the null hypothesis (pvalue <= 0.05). -

- circRNA pval dist -

+##### TargetScan - - `DESeq2_{condition_vs_lung}_up_regulated_differential_expression.txt`: DEseq2 `results()` ouput filtered to include up regulated circRNAs (fold change >= 1, pvalue <= 0.05) in `condition` with respect to `control`. +
+Output files - - `DESeq2_{condition_vs_lung}_volcano_plot.pdf`: Volcano plot of differentially expressed circRNAs from DESeq2 `results()` using [EnhancedVolcano](https://www.bioconductor.org/packages/release/bioc/vignettes/EnhancedVolcano/inst/doc/EnhancedVolcano.html). -

- circRNA volcano -

+- `mirna_prediction/bindingsites/tools/targetscan/output` + - `*.targetscan.txt`: Raw predictions from `TargetScan`. +- `mirna_prediction/bindingsites/tools/targetscan/unified` + - `*.targetscan.tsv`: Unified predictions from `TargetScan`.
-Sample outputs from `control_vs_lung` are given below, one of 6 `DESeq2` results folders returned by the experimental design given above. - -_Note:_ The test dataset produces sparsely populated plots due to aggressive subsampling. +[TargetScan](http://www.targetscan.org/vert_72/) predicts biological targets of miRNAs by searching for the presence of conserved 8mer, 7mer, and 6mer sites within the circRNA mature sequence that match the seed region of each miRNA. -### Boxplots +#### Targets
Output files -- `differential_expression/boxplots/` - - - `control_vs_lung` - - - `*boxplot.pdf`: Boxplot of differentially expressed circRNAs in `control_vs_lung`. -

- circRNA boxplot -

- - - `control_vs_lung` - - `*boxplot.pdf`: Boxplot of differentially expressed circRNAs in `control_vs_melanoma`. -

- circRNA boxplot -

+- `mirna_prediction/binding_sites/targets` + - `*_miRNA_targets.txt`: Filtered target miRNAs of circRNAs called by quantification tools. Columns are self explanatory: miRNA, Score, Energy_KcalMol, Start, End, Site_type.
-`nf-core/circrna` will produce boxplots of differentially expressed circRNAs (normalised expression) between all contrasts available in `condition`. +nf-core/circrna performs miRNA target filtering on `miRanda` and `TargetScan` predictions: -_Note:_ The output files give examples for `control_vs_lung` and `control_vs_melanoma`. +1. miRNA must be called by both `miRanda` and `TargetScan`. +2. If a site within the circRNA mature sequence shares duplicate miRNA ID's overlapping the same coordinates, the miRNA with the highest score is kept. -### RNA-Seq +#### Majority Vote
Output files -- `differential_expression/RNA-Seq/` - - - `DESeq2_log2_transformed_counts.txt`: _log2(Normalised counts + 1)_ +- `mirna_prediction/binding_sites/majority_vote` + - `mirna.targets.tsv`: Stores miRNA-target mappings with all targets listed per miRNA, making it compact and suitable for bulk analyses. + - `mirna.majority.tsv`: Lists each miRNA-target interaction on a separate line, which is helpful for detailed analysis of each interaction independently. - - `DESeq2_normalized_counts.txt`: Normalised RNA-Seq counts. +
- - `control_vs_lung/` +nf-core/circrna performs a majority vote on the predicted miRNA targets from [TargetScan](http://www.targetscan.org/vert_72/) and [miRanda](http://cbio.mskcc.org/miRNA2003/miranda.html) based on a +threshold specified by the user. - - `DESeq2_{control_vs_lung}_Adj_pvalue_distribution.pdf`: Histogram of Adj pvalues from `results(dds)` displaying the distribution of genes that reject the null hypothesis (padj <= 0.05). -

- circRNA adj-p histogram -

+### miRNA Expression - - `DESeq2_{control_vs_lung}_down_regulated_differential_expression.txt`: DESeq2 `results()` output filtered to include down regulated genes (fold change <= -1, pvalue <= 0.05) in `condition` with respect to `control`. +
+Output files - - `DESeq2_{control_vs_lung}_fold_change_distribution.pdf`: Histogram of fold-change values for differentially expressed genes. -

- circRNA FC histogram -

+- `mirna_prediction/mirna_expression/` + - `mirna.normalized_counts.tsv`: Contains normalized miRNA expression of all samples. + - `mirna.normalized_counts_filtered.tsv`: Contains miRNA expression after filtering. - - `DESeq2_{control_vs_lung}_heatmap.pdf`: Heatmap of all differentially expressed genes. -

- circRNA heatmap -

+
- - `DESeq2_{control_vs_lung}_MA_plot.pdf`: Plot of the relationship between intensity and difference between the contrast made by `DESeq2`. -

- circRNA heatmap -

+nf-core/circrna processes miRNA expression data by normalizing and filtering it for further analysis. - - `DESeq2_{control_vs_lung}_pvalue_distribution.pdf`: Histogram of pvalues from `results(dds)` displaying the distribution of genes that reject the null hypothesis (pvalue <= 0.05). -

- circRNA pval dist -

+### Correlation - - `DESeq2_{condition_vs_lung}_up_regulated_differential_expression.txt`: DEseq2 `results()` ouput filtered to include up regulated genes (fold change >= 1, pvalue <= 0.05) in `condition` with respect to `control`. +
+Output files - - `DESeq2_{condition_vs_lung}_volcano_plot.pdf`: Volcano plot of differentially expressed genes from DESeq2 `results()` using [EnhancedVolcano](https://www.bioconductor.org/packages/release/bioc/vignettes/EnhancedVolcano/inst/doc/EnhancedVolcano.html). -

- circRNA volcano -

+- `mirna_prediction/correlation` + - `*.tsv`: Files named after the specific miRNA containing correlation results for that miRNA with its target transcripts.
-Sample outputs from `control_vs_lung` are given below, one of 6 `DESeq2` results folders returned by the experimental design given above. - -_Note:_ The test dataset produces sparsely populated plots due to aggressive subsampling. +nf-core/circrna computes correlations between miRNA and transcript expression levels and writes the results to individual TSV files for each miRNA-target interaction specified in the input binding sites file. diff --git a/docs/usage.md b/docs/usage.md index 953fb3370..2f3b2166b 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -1,269 +1,225 @@ # nf-core/circrna: Usage -It is recommended that first time users run `nf-core/circrna` with the minimal test dataset either locally or on a HPC, referring to the [output documentation](https://nf-co.re/circrna/dev/output) before running a full analysis. +## :warning: Please read this documentation on the nf-core website: [https://nf-co.re/rnaseq/usage](https://nf-co.re/rnaseq/usage) -```console -nextflow run nf-core/circrna -profile test, -``` - -## Running the pipeline - -A typical command for running the pipeline is as follows: +> _Documentation of pipeline parameters is generated automatically from the pipeline schema and can no longer be found in markdown files._ -```console -nextflow run nf-core/circrna \ - -profile \ - --genome 'GRCh37' \ - --input 'samples.csv' \ - --input_type 'fastq' -``` +## Pipeline parameters -By default, `nf-core/circrna` runs the circRNA discovery analysis module using `CIRCexplorer2`. The above command will perform circRNA quantification using these tools on ENSEMBL GRCh37 reference annotation files as defined in the iGenomes config. +Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration except for parameters; see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). -### Updating the pipeline +## Samplesheet input -To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 4 columns, and a header row as shown in the examples below. -```console -nextflow pull nf-core/circrna +```bash +--input '[path to samplesheet file]' ``` -When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. - -### Reproducibility - -It's a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. - -First, go to the [nf-core/circrna releases page](https://github.com/nf-core/circrna/releases) and find the latest pipeline version - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. Of course, you can switch to another version by changing the number after the `-r` flag. - -This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. - -## Input specifications - -Input data can be passed to `nf-core/circrna` in two possible ways using the `--input` parameter. - -### `--input ""` +### Multiple runs of the same sample -The simplest way to pass input data to `nf-core/circrna` is by providing the path to the input data with a suitable wildcard glob pattern: +The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes. If you set the strandedness value to `auto` the pipeline will use the tool-defaults throughout the pipeline. -#### fastq - -```console ---input "/data/*_r{1,2}.fastq.gz" +```csv title="samplesheet.csv" +sample,fastq_1,fastq_2,strandedness +CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,auto +CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz,auto +CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz,auto ``` -##### bam - -```console ---input "/data/*.bam" -``` +### Full samplesheet -> Beware that providing a path to input data will result in samples being named according to the common tuple key based on the glob pattern supplied. Take this into consideration when designing your phenotype file for differential expression analysis. +The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 4 columns to match those defined in the table below. -### `--input samples.csv` +A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. -Alternatively, the user may wish to provide a CSV file containing the absolute paths to input fastq/bam files. +```csv title="samplesheet.csv" +sample,fastq_1,fastq_2,strandedness +CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,forward +CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz,forward +CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz,forward +TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz,,reverse +TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz,,reverse +TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz,,reverse +TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz,,reverse +``` -The headers of the CSV file must be: `Sample_ID,Read1,Read2,Bam`. +| Column | Description | +| -------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | +| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `strandedness` | Sample strand-specificity. Must be one of `unstranded`, `forward`, `reverse` or `auto`. | -> This approach is recommended for most real life situations, where in-house sequencing facilities file naming convention requires the user to manually match file names to metadata. The below input files use `TCGA` identifiers as proof of concept. +An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. -Valid examples for fastq/bam input data in a CSV file is given below: +## BSJ detection -| Sample_ID | Read1 | Read2 | Bam | -| ---------------- | :---------------------------------------------------------------------- | ----------------------------------------------------------------------- | --- | -| TCGA-EJ-7783-11A | /data/f4c1b2b1-ba1f-4355-a1ac-3e952cf351a5_gdc_realn_rehead_R1.fastq.gz | /data/f4c1b2b1-ba1f-4355-a1ac-3e952cf351a5_gdc_realn_rehead_R2.fastq.gz | NA | -| TCGA-G9-6365-11A | /data/8a36555b-9e27-40ee-a8df-4b15d6580a02_gdc_realn_rehead_R1.fastq.gz | /data/8a36555b-9e27-40ee-a8df-4b15d6580a02_gdc_realn_rehead_R2.fastq.gz | NA | -| TCGA-EJ-7782-11A | /data/8b3d4a3d-2bfa-48f8-b31f-901f49a5bf6b_gdc_realn_rehead_R1.fastq.gz | /data/8b3d4a3d-2bfa-48f8-b31f-901f49a5bf6b_gdc_realn_rehead_R2.fastq.gz | NA | -| TCGA-CH-5772-01A | /data/b6546f66-3c13-4390-9643-d1fb3d660a2f_gdc_realn_rehead_R1.fastq.gz | /data/b6546f66-3c13-4390-9643-d1fb3d660a2f_gdc_realn_rehead_R2.fastq.gz | NA | -| TCGA-EJ-5518-01A | /data/afbbc370-5970-43d3-b9f8-f40f8e649bb6_gdc_realn_rehead_R1.fastq.gz | /data/afbbc370-5970-43d3-b9f8-f40f8e649bb6_gdc_realn_rehead_R2.fastq.gz | NA | -| TCGA-KK-A8I4-01A | /data/81254692-ee1e-4985-bd0a-4929eed4c620_gdc_realn_rehead_R1.fastq.gz | /data/81254692-ee1e-4985-bd0a-4929eed4c620_gdc_realn_rehead_R2.fastq.gz | NA | +This part of the pipeline is responsible for the detection of back-splice junctions (BSJs) in the input data. The following tools are currently supported: ---- +- `CIRCexplorer2` +- `circRNA finder` +- `CIRIquant` +- `DCC` +- `find circ` +- `MapSplice` +- `Segemehl` -| Sample_ID | Read1 | Read2 | Bam | -| :--------------- | ----- | ----- | :-------------------------------------------------------------- | -| TCGA-EJ-7783-11A | NA | NA | /data/f4c1b2b1-ba1f-4355-a1ac-3e952cf351a5_gdc_realn_rehead.bam | -| TCGA-G9-6365-11A | NA | NA | /data/8a36555b-9e27-40ee-a8df-4b15d6580a02_gdc_realn_rehead.bam | -| TCGA-EJ-7782-11A | NA | NA | /data/8b3d4a3d-2bfa-48f8-b31f-901f49a5bf6b_gdc_realn_rehead.bam | -| TCGA-CH-5772-01A | NA | NA | /data/b6546f66-3c13-4390-9643-d1fb3d660a2f_gdc_realn_rehead.bam | -| TCGA-EJ-5518-01A | NA | NA | /data/afbbc370-5970-43d3-b9f8-f40f8e649bb6_gdc_realn_rehead.bam | -| TCGA-KK-A8I4-01A | NA | NA | /data/81254692-ee1e-4985-bd0a-4929eed4c620_gdc_realn_rehead.bam | +The tools to be used can be specified using the `tools` parameter. +Each of the tools also quantifies how many reads support each BSJ. You can specify a cutoff for the minimum number of reads supporting a BSJ using the `bsj_reads` parameter. +Additionally, the parameter `tool_filter` can be used to specify how many tools a BSJ has to be detected by to be considered as a valid hit. -> Do not leave any cell empty in the CSV file. +For instructions on how to interpret the output of this section, please check out the [output documentation](https://nf-co.re/circrna/output#bsj-detection). -### `--phenotype` +## Annotation -When running the differential expression analysis module, an input `phenotype.csv` file is required to specify levels for `DESeq2`. At a minimum, the user must supply one column of levels for `DESeq2` which **must be called condition**. This should be the primary contrast of interest in your experiment (e.g case vs. control). If additional columns are supplied to the phenotype file, they will be controlled for in the linear mixed model. A brief proof of concept is given below in R notation: +The annotation is generally based on the reference GTF file. It can also utilize BED files that are provided by the various circRNA databases. +The GTF-based annotation allows setting the parameter `exon_boundary` to specify a window around exons. If the BSJ is within this window, it will be annotated as a circRNA - otherwise, it will be annotated as an exon-intron circRNA (EI-circRNA). The default value is `0`. -```R -colnames(phenotype) - [1] 'Sample_ID' 'condition' +For the database-based annotation, an additional sample sheet is required: -print(dds$design) - [1] ' ~ condition' +```csv title="annotation.csv" +name,file,min_overlap +db1,db1.bed,0.9 +db2,db2.bed,0.8 ``` -```R -colnames(phenotype) - [1] 'Sample_ID' 'condition' 'replicates' 'location' +| Column | Description | +| ------------- | --------------------------------------------------------------------------------------------- | +| `name` | Name of the database. This will be used as a prefix for the region names in the output files. | +| `file` | Path to the BED file. The file has to be a valid BED6 file. | +| `min_overlap` | Minimum bidirectional overlap required between the BSJ and the region in the BED file. | -print(dds$design) - [1] ' ~ location + replicates + condition' -``` - -It is recommended to use an input CSV file in conjunction with your phenotype file as the `Sample_ID` column **must match** the first column of the `phenotype.csv` file. +The output of the annotation step will be bundled with the outputs of the BSJ detection step. -A valid example of a `phenotype.csv` file (matching the TCGA example input CSV file above) is given below: +## miRNA prediction -| Sample_ID | condition | -| ---------------- | --------- | -| TCGA-EJ-7783-11A | control | -| TCGA-G9-6365-11A | control | -| TCGA-EJ-7782-11A | control | -| TCGA-CH-5772-01A | tumor | -| TCGA-EJ-5518-01A | tumor | -| TCGA-KK-A8I4-01A | tumor | +This section allows looking for miRNA binding sites in the circRNAs. +The following tools are currently supported: -## Analysis modules +- `miRanda` +- `TargetScan` -`nf-core/circrna` provides 3 analysis modules to the user: +This section will only be executed if the `mature` parameter is provided. +The parameter `mature` should point to a FASTA file containing mature miRNA sequences. +By providing a TSV file containing the miRNA expression of all samples via `mirna_expression`, this +sub-workflow will perform additional normalization and filtering of `mirna_expression` and `mature` before +executing the miRNA binding size prediction. -1. circRNA quantification & annotation. -2. miRNA target prediction. -3. Differential circRNA expression analysis. +To view the outputs of the module, please see the output [documentation](https://nf-co.re/circrna/dev/output#mirna-prediction). -### circRNA discovery +## Statistical tests -The core module of `nf-core/circrna`, a user can utilise up to seven circRNA quantification tools to fully characterise the circRNA profile in samples. Currently, supported tools include `CIRCexplorer2`, `circRNA finder`, `CIRIquant`, `DCC`, `find circ` , `MapSplice` & `Segemehl` however, the authors of `nf-core/circrna` welcome contributions from authors of novel quantification tools to keep the workflow current. +Currently, only [CircTest](https://github.com/dieterich-lab/CircTest) is supported for the statistical analysis of the circRNA expression data. The `phenotype` parameter is required for this step. -By default, `nf-core/circrna` runs the circRNA discovery analysis module. +A valid example of a `phenotype.csv` file (matching the "Full samplesheet") is shown here: -```console -nextflow run nf-core/circrna \ - -profile \ - --genome 'GRCh37' \ - --input 'samples.csv' \ - --input_type 'fastq' \ - --module 'circrna_discovery' +```csv title="phenotype.csv" +sample,condition +CONTROL_REP1,control +CONTROL_REP2,control +CONTROL_REP3,control +TREATMENT_REP1,treatment +TREATMENT_REP2,treatment +TREATMENT_REP3,treatment ``` -To view the outputs of the module, please see the output [documentation](https://nf-co.re/circrna/dev/output#circrna-quantification). - -> Please note that this module must be included for every run of the workflow +Note that `TREATMENT_REP3` only has one entry in the `phenotype.csv` file, even though it has two entries in the `samplesheet.csv` file. +If the `phenotype` parameter is provided, the phenotype information will also be added to the `SummarizedExperiment` object, that results from the "Quantification" step. -#### Tool selection +## Running the pipeline -The user may use one, all or any combination of circRNA quantification tools listed above in the analysis. To select which tools to use for the analysis, specify the `--tool` parameter in the configuration profile or pass it via the command line when running the workflow: +The typical command for running the pipeline is as follows: -```console -nextflow run nf-core/circrna \ - -profile \ - --genome 'GRCh37' \ - --input 'samples.csv' \ - --input_type 'fastq' \ - --module 'circrna_discovery' \ - --tool 'ciriquant,dcc,find_circ' +```bash +nextflow run \ + nf-core/circrna \ + --input \ + --outdir \ + --gtf \ + --fasta \ + --igenomes_ignore \ + --genome null \ + -profile docker ``` -> When providing multiple tools, separate each entry with a comma. - -#### circRNA filtering - -`nf-core/circrna` offers robust filtering of each called circRNA to reduce the number of spurious calls within the dataset. +> **NB:** Loading iGenomes configuration remains the default for reasons of consistency with other workflows, but should be disabled when not using iGenomes, applying the recommended usage above. -##### BSJ reads +This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. -The user can specify the minimum number of reads spanning the back-splice junction site required for a circRNA to be considered for further analysis. circRNAs with counts below this value will be filtered to remove from the results. +Note that the pipeline will create the following files in your working directory: -To apply this filtering method, specify the `--bsj_reads` parameter in the configuration profile or pass it via the command line when running the workflow: - -```console -nextflow run nf-core/circrna \ - -profile \ - --genome 'GRCh37' \ - --input 'samples.csv' \ - --input_type 'fastq' \ - --phenotype 'phenotype.csv' \ - --module 'circrna_discovery' \ - --tool 'ciriquant, dcc, find_circ' \ - --bsj_reads 2 +```bash +work # Directory containing the nextflow working files + # Finished results in specified location (defined with --outdir) +.nextflow_log # Log file from Nextflow +# Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` -Disable the filter by setting the value to 0. +If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file. -##### Multiple tool filtering +Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. -When more than one tool has been provided using the `--tool` parameter, the user can specify the minimum number of tools circRNAs must be called by using `--tool_filter`. Setting this parameter to 0 or 1 will result in the union being output, i.e no filtering is applied. Setting this parameter to 2 will output circRNAs that have been called by at least 2 quantification tools and so on. +:::warning +Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). +::: -> The integer provided to the parameter must be less than or equal to the number of quantification tools provided to `--tool`. +The above pipeline run specified with a params file in yaml format: -To apply this filtering method, specify the `--tool_filter` parameter in the configuration profile or pass it via the command line when running the workflow: - -```console -nextflow run nf-core/circrna \ - -profile \ - --genome 'GRCh37' \ - --input 'samples.csv' \ - --input_type 'fastq' \ - --module 'circrna_discovery' \ - --tool 'ciriquant, dcc, find_circ' \ - --bsj_reads 2 \ - --tool_filter 2 +```bash +nextflow run nf-core/circrna -profile docker -params-file params.yaml ``` -> This filtering method is reflected in the circRNA count matrix. Per tool circRNA annotations are subject to back-splice read filtering only. +with `params.yaml` containing: -##### Handling duplicate circRNAs - -In the event a circRNA has been called by more than one quantification tool, the user can specify which aggregate function to apply to the duplicated circRNA. The accepted values are 'mean' and 'max', which are passed to the workflow using the `--duplicates_fun` parameter. +```yaml +input: './samplesheet.csv' +outdir: './results/' +genome: 'GRCh37' +<...> +``` -### miRNA prediction +You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). -The second module of `nf-core/circrna`, `mirna_prediction` analyses the mature spliced sequences of circRNAs to test for the presence of miRNA response elements using both `miRanda` and `TargetScan`. Results from both tools are consolidated and filtering methods are applied to produce robust miRNA target predictions of circRNAs in the dataset. +### Updating the pipeline -To invoke the module, specify the `--module` parameter via the configuration profile or pass it via the command line when running the workflow: +When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: -```console -nextflow run nf-core/circrna \ - -profile \ - --genome 'GRCh37' \ - --input 'samples.csv' \ - --input_type 'fastq' \ - --module 'circrna_discovery, mirna_prediction' +```bash +nextflow pull nf-core/circrna ``` -To view the outputs of the module, please see the output [documentation](https://nf-co.re/circrna/dev/output#mirna-prediction). +When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. -### Differential circRNA analysis +### Reproducibility -The third and final module of `nf-core/circrna` performs differential expression analysis of circRNAs, returning `DESeq2` result outputs, plots and diagnostic plots for the user. In order to run this module, it is essential that your `phenotype.csv` file is in the correct format - please refer to the input [specifications](https://nf-co.re/circrna/dev/usage#differential-expression-analysis). +It's a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. -To invoke the module, specify the `--module` parameter via the configuration profile or pass it via the command line when running the workflow: +First, go to the [nf-core/circrna releases page](https://github.com/nf-core/circrna/releases) and find the latest pipeline version - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. Of course, you can switch to another version by changing the number after the `-r` flag. -```console -nextflow run nf-core/circrna \ - -profile \ - --genome 'GRCh37' \ - --input 'samples.csv' \ - --input_type 'fastq' \ - --phenotype 'phenotype.csv' \ - --module 'circrna_discovery, differential_expression' -``` +This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. -To view the outputs of the module, please see the output [documentation](https://nf-co.re/circrna/dev/output#differential-expression-analysis). +To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. + +:::tip +If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. +::: ## Core Nextflow arguments -> **NB:** These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +:::note +These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +::: ### `-profile` Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. -Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Conda) - see below. +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. -> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +:::info +We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +::: The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). @@ -290,8 +246,12 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - `charliecloud` - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) - Pulls software from Docker Hub: [`nfcore/circrna`](https://hub.docker.com/r/nfcore/circrna/) +- `apptainer` + - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) +- `wave` + - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later). - `conda` - - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. ### `-resume` @@ -309,29 +269,28 @@ Specify the path to a specific config file (this is a core Nextflow command). Se Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. -For example, if the nf-core/rnaseq pipeline is failing after multiple re-submissions of the `STAR_ALIGN` process due to an exit code of `137` this would indicate that there is an out of memory issue: - -```console -[62/149eb0] NOTE: Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) -- Execution is retried (1) -Error executing process > 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)' -@@ -149,28 +149,34 @@ -Command executed: - STAR \ - --genomeDir star \ - --readFilesIn WT_REP1_trimmed.fq.gz \ - --runThreadN 2 \ - --outFileNamePrefix WT_REP1. \ - -Command exit status: - 137 -Command output: - (empty) +To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. + +### Custom Containers + +In some cases you may wish to change which container or conda environment a step of the pipeline uses for a particular tool. By default nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However in some cases the pipeline specified version maybe out of date. + +To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. + +### Custom Tool Arguments + +A pipeline might not always support every possible argument or option of a particular tool used in pipeline. Fortunately, nf-core pipelines provide some freedom to users to insert additional parameters that the pipeline does not include by default. + Command error: - .command.sh: line 9: 30 Killed STAR --genomeDir star --readFilesIn WT_REP1_trimmed.fq.gz --runThreadN 2 --outFileNamePrefix WT_REP1. +.command.sh: line 9: 30 Killed STAR --genomeDir star --readFilesIn WT_REP1_trimmed.fq.gz --runThreadN 2 --outFileNamePrefix WT_REP1. Work dir: - /home/pipelinetest/work/9d/172ca5881234073e8d76f2a19c88fb +/home/pipelinetest/work/9d/172ca5881234073e8d76f2a19c88fb + Tip: you can replicate the issue by changing to the process work dir and entering the command `bash .command.run` -``` + +```` + +To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. #### For beginners @@ -349,53 +308,27 @@ The custom config below can then be provided to the pipeline via the [`-c`](#-c) ```nextflow process { - withName: star { - memory = 32.GB - } + withName: 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN' { + memory = 100.GB + } } -``` - -To find the exact name of a process you wish to modify the compute resources, check the live-status of a nextflow run displayed on your terminal or check the nextflow error for a line like so: `Error executing process > 'bwa'`. In this case the name to specify in the custom config file is `bwa`. - -### Updating containers (advanced users) - -The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. If for some reason you need to use a different version of a particular tool with the pipeline then you just need to identify the `process` name and override the Nextflow `container` definition for that process using the `withName` declaration. For example, in the [nf-core/viralrecon](https://nf-co.re/viralrecon) pipeline a tool called [Pangolin](https://github.com/cov-lineages/pangolin) has been used during the COVID-19 pandemic to assign lineages to SARS-CoV-2 genome sequenced samples. Given that the lineage assignments change quite frequently it doesn't make sense to re-release the nf-core/viralrecon everytime a new version of Pangolin has been released. However, you can override the default container used by the pipeline by creating a custom config file and passing it as a command-line argument via `-c custom.config`. - -1. Check the default version used by the pipeline in the module file for [Pangolin](https://github.com/nf-core/viralrecon/blob/a85d5969f9025409e3618d6c280ef15ce417df65/modules/nf-core/software/pangolin/main.nf#L14-L19) -2. Find the latest version of the Biocontainer available on [Quay.io](https://quay.io/repository/biocontainers/pangolin?tag=latest&tab=tags) -3. Create the custom config accordingly: +```` - - For Docker: +> **NB:** We specify the full process name i.e. `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN` in the config file because this takes priority over the short name (`STAR_ALIGN`) and allows existing configuration using the full process name to be correctly overridden. +> +> If you get a warning suggesting that the process selector isn't recognised check that the process name has been specified correctly. - ```nextflow - process { - withName: PANGOLIN { - container = 'quay.io/biocontainers/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` +### Custom Containers - - For Singularity: +In some cases you may wish to change which container or conda environment a step of the pipeline uses for a particular tool. By default nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However in some cases the pipeline specified version maybe out of date. - ```nextflow - process { - withName: PANGOLIN { - container = 'https://depot.galaxyproject.org/singularity/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` +To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. - - For Conda: +### Custom Tool Arguments - ```nextflow - process { - withName: PANGOLIN { - conda = 'bioconda::pangolin=3.0.5' - } - } - ``` +A pipeline might not always support every possible argument or option of a particular tool used in pipeline. Fortunately, nf-core pipelines provide some freedom to users to insert additional parameters that the pipeline does not include by default. -> **NB:** If you wish to periodically update individual tool-specific results (e.g. Pangolin) generated by the pipeline then you must ensure to keep the `work/` directory otherwise the `-resume` ability of the pipeline will be compromised and it will restart from scratch. +To learn how to provide additional arguments to a particular tool of the pipeline, please see the [customising tool arguments](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) section of the nf-core website. ### nf-core/configs diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy deleted file mode 100755 index 33cd4f6e8..000000000 --- a/lib/NfcoreSchema.groovy +++ /dev/null @@ -1,528 +0,0 @@ -// -// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. -// - -import org.everit.json.schema.Schema -import org.everit.json.schema.loader.SchemaLoader -import org.everit.json.schema.ValidationException -import org.json.JSONObject -import org.json.JSONTokener -import org.json.JSONArray -import groovy.json.JsonSlurper -import groovy.json.JsonBuilder - -class NfcoreSchema { - - // - // Resolve Schema path relative to main workflow directory - // - public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { - return "${workflow.projectDir}/${schema_filename}" - } - - // - // Function to loop over all parameters defined in schema and check - // whether the given parameters adhere to the specifications - // - /* groovylint-disable-next-line UnusedPrivateMethodParameter */ - public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { - def has_error = false - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Check for nextflow core params and unexpected params - def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text - def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') - def nf_params = [ - // Options for base `nextflow` command - 'bg', - 'c', - 'C', - 'config', - 'd', - 'D', - 'dockerize', - 'h', - 'log', - 'q', - 'quiet', - 'syslog', - 'v', - - // Options for `nextflow run` command - 'ansi', - 'ansi-log', - 'bg', - 'bucket-dir', - 'c', - 'cache', - 'config', - 'dsl2', - 'dump-channels', - 'dump-hashes', - 'E', - 'entry', - 'latest', - 'lib', - 'main-script', - 'N', - 'name', - 'offline', - 'params-file', - 'pi', - 'plugins', - 'poll-interval', - 'pool-size', - 'profile', - 'ps', - 'qs', - 'queue-size', - 'r', - 'resume', - 'revision', - 'stdin', - 'stub', - 'stub-run', - 'test', - 'w', - 'with-charliecloud', - 'with-conda', - 'with-dag', - 'with-docker', - 'with-mpi', - 'with-notification', - 'with-podman', - 'with-report', - 'with-singularity', - 'with-timeline', - 'with-tower', - 'with-trace', - 'with-weblog', - 'without-docker', - 'without-podman', - 'work-dir' - ] - def unexpectedParams = [] - - // Collect expected parameters from the schema - def expectedParams = [] - def enums = [:] - for (group in schemaParams) { - for (p in group.value['properties']) { - expectedParams.push(p.key) - if (group.value['properties'][p.key].containsKey('enum')) { - enums[p.key] = group.value['properties'][p.key]['enum'] - } - } - } - - for (specifiedParam in params.keySet()) { - // nextflow params - if (nf_params.contains(specifiedParam)) { - log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'" - has_error = true - } - // unexpected params - def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' - def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() } - def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase() - def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase)) - if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) { - // Temporarily remove camelCase/camel-case params #1035 - def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()} - if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){ - unexpectedParams.push(specifiedParam) - } - } - } - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Validate parameters against the schema - InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() - JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) - - // Remove anything that's in params.schema_ignore_params - raw_schema = removeIgnoredParams(raw_schema, params) - - Schema schema = SchemaLoader.load(raw_schema) - - // Clean the parameters - def cleanedParams = cleanParameters(params) - - // Convert to JSONObject - def jsonParams = new JsonBuilder(cleanedParams) - JSONObject params_json = new JSONObject(jsonParams.toString()) - - // Validate - try { - schema.validate(params_json) - } catch (ValidationException e) { - println '' - log.error 'ERROR: Validation of pipeline parameters failed!' - JSONObject exceptionJSON = e.toJSON() - printExceptions(exceptionJSON, params_json, log, enums) - println '' - has_error = true - } - - // Check for unexpected parameters - if (unexpectedParams.size() > 0) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - println '' - def warn_msg = 'Found unexpected parameters:' - for (unexpectedParam in unexpectedParams) { - warn_msg = warn_msg + "\n* --${unexpectedParam}: ${params[unexpectedParam].toString()}" - } - log.warn warn_msg - log.info "- ${colors.dim}Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}" - println '' - } - - if (has_error) { - System.exit(1) - } - } - - // - // Beautify parameters for --help - // - public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - Integer num_hidden = 0 - String output = '' - output += 'Typical pipeline command:\n\n' - output += " ${colors.cyan}${command}${colors.reset}\n\n" - Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - Integer max_chars = paramsMaxChars(params_map) + 1 - Integer desc_indent = max_chars + 14 - Integer dec_linewidth = 160 - desc_indent - for (group in params_map.keySet()) { - Integer num_params = 0 - String group_output = colors.underlined + colors.bold + group + colors.reset + '\n' - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (group_params.get(param).hidden && !params.show_hidden_params) { - num_hidden += 1 - continue; - } - def type = '[' + group_params.get(param).type + ']' - def description = group_params.get(param).description - def defaultValue = group_params.get(param).default != null ? " [default: " + group_params.get(param).default.toString() + "]" : '' - def description_default = description + colors.dim + defaultValue + colors.reset - // Wrap long description texts - // Loosely based on https://dzone.com/articles/groovy-plain-text-word-wrap - if (description_default.length() > dec_linewidth){ - List olines = [] - String oline = "" // " " * indent - description_default.split(" ").each() { wrd -> - if ((oline.size() + wrd.size()) <= dec_linewidth) { - oline += wrd + " " - } else { - olines += oline - oline = wrd + " " - } - } - olines += oline - description_default = olines.join("\n" + " " * desc_indent) - } - group_output += " --" + param.padRight(max_chars) + colors.dim + type.padRight(10) + colors.reset + description_default + '\n' - num_params += 1 - } - group_output += '\n' - if (num_params > 0){ - output += group_output - } - } - if (num_hidden > 0){ - output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset - } - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Groovy Map summarising parameters/workflow options used by the pipeline - // - public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { - // Get a selection of core Nextflow workflow options - def Map workflow_summary = [:] - if (workflow.revision) { - workflow_summary['revision'] = workflow.revision - } - workflow_summary['runName'] = workflow.runName - if (workflow.containerEngine) { - workflow_summary['containerEngine'] = workflow.containerEngine - } - if (workflow.container) { - workflow_summary['container'] = workflow.container - } - workflow_summary['launchDir'] = workflow.launchDir - workflow_summary['workDir'] = workflow.workDir - workflow_summary['projectDir'] = workflow.projectDir - workflow_summary['userName'] = workflow.userName - workflow_summary['profile'] = workflow.profile - workflow_summary['configFiles'] = workflow.configFiles.join(', ') - - // Get pipeline parameters defined in JSON Schema - def Map params_summary = [:] - def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - for (group in params_map.keySet()) { - def sub_params = new LinkedHashMap() - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (params.containsKey(param)) { - def params_value = params.get(param) - def schema_value = group_params.get(param).default - def param_type = group_params.get(param).type - if (schema_value != null) { - if (param_type == 'string') { - if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { - def sub_string = schema_value.replace('\$projectDir', '') - sub_string = sub_string.replace('\${projectDir}', '') - if (params_value.contains(sub_string)) { - schema_value = params_value - } - } - if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { - def sub_string = schema_value.replace('\$params.outdir', '') - sub_string = sub_string.replace('\${params.outdir}', '') - if ("${params.outdir}${sub_string}" == params_value) { - schema_value = params_value - } - } - } - } - - // We have a default in the schema, and this isn't it - if (schema_value != null && params_value != schema_value) { - sub_params.put(param, params_value) - } - // No default in the schema, and this isn't empty - else if (schema_value == null && params_value != "" && params_value != null && params_value != false) { - sub_params.put(param, params_value) - } - } - } - params_summary.put(group, sub_params) - } - return [ 'Core Nextflow options' : workflow_summary ] << params_summary - } - - // - // Beautify parameters for summary and return as string - // - public static String paramsSummaryLog(workflow, params) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - String output = '' - def params_map = paramsSummaryMap(workflow, params) - def max_chars = paramsMaxChars(params_map) - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - if (group_params) { - output += colors.bold + group + colors.reset + '\n' - for (param in group_params.keySet()) { - output += " " + colors.blue + param.padRight(max_chars) + ": " + colors.green + group_params.get(param) + colors.reset + '\n' - } - output += '\n' - } - } - output += "!! Only displaying parameters that differ from the pipeline defaults !!\n" - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Loop over nested exceptions and print the causingException - // - private static void printExceptions(ex_json, params_json, log, enums, limit=5) { - def causingExceptions = ex_json['causingExceptions'] - if (causingExceptions.length() == 0) { - def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/ - // Missing required param - if (m.matches()) { - log.error "* Missing required parameter: --${m[0][1]}" - } - // Other base-level error - else if (ex_json['pointerToViolation'] == '#') { - log.error "* ${ex_json['message']}" - } - // Error with specific param - else { - def param = ex_json['pointerToViolation'] - ~/^#\// - def param_val = params_json[param].toString() - if (enums.containsKey(param)) { - def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices" - if (enums[param].size() > limit) { - log.error "${error_msg} (${limit} of ${enums[param].size()}): ${enums[param][0..limit-1].join(', ')}, ... )" - } else { - log.error "${error_msg}: ${enums[param].join(', ')})" - } - } else { - log.error "* --${param}: ${ex_json['message']} (${param_val})" - } - } - } - for (ex in causingExceptions) { - printExceptions(ex, params_json, log, enums) - } - } - - // - // Remove an element from a JSONArray - // - private static JSONArray removeElement(json_array, element) { - def list = [] - int len = json_array.length() - for (int i=0;i - if(raw_schema.keySet().contains('definitions')){ - raw_schema.definitions.each { definition -> - for (key in definition.keySet()){ - if (definition[key].get("properties").keySet().contains(ignore_param)){ - // Remove the param to ignore - definition[key].get("properties").remove(ignore_param) - // If the param was required, change this - if (definition[key].has("required")) { - def cleaned_required = removeElement(definition[key].required, ignore_param) - definition[key].put("required", cleaned_required) - } - } - } - } - } - if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) { - raw_schema.get("properties").remove(ignore_param) - } - if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) { - def cleaned_required = removeElement(raw_schema.required, ignore_param) - raw_schema.put("required", cleaned_required) - } - } - return raw_schema - } - - // - // Clean and check parameters relative to Nextflow native classes - // - private static Map cleanParameters(params) { - def new_params = params.getClass().newInstance(params) - for (p in params) { - // remove anything evaluating to false - if (!p['value']) { - new_params.remove(p.key) - } - // Cast MemoryUnit to String - if (p['value'].getClass() == nextflow.util.MemoryUnit) { - new_params.replace(p.key, p['value'].toString()) - } - // Cast Duration to String - if (p['value'].getClass() == nextflow.util.Duration) { - new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) - } - // Cast LinkedHashMap to String - if (p['value'].getClass() == LinkedHashMap) { - new_params.replace(p.key, p['value'].toString()) - } - } - return new_params - } - - // - // This function tries to read a JSON params file - // - private static LinkedHashMap paramsLoad(String json_schema) { - def params_map = new LinkedHashMap() - try { - params_map = paramsRead(json_schema) - } catch (Exception e) { - println "Could not read parameters settings from JSON. $e" - params_map = new LinkedHashMap() - } - return params_map - } - - // - // Method to actually read in JSON file using Groovy. - // Group (as Key), values are all parameters - // - Parameter1 as Key, Description as Value - // - Parameter2 as Key, Description as Value - // .... - // Group - // - - private static LinkedHashMap paramsRead(String json_schema) throws Exception { - def json = new File(json_schema).text - def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') - def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') - /* Tree looks like this in nf-core schema - * definitions <- this is what the first get('definitions') gets us - group 1 - title - description - properties - parameter 1 - type - description - parameter 2 - type - description - group 2 - title - description - properties - parameter 1 - type - description - * properties <- parameters can also be ungrouped, outside of definitions - parameter 1 - type - description - */ - - // Grouped params - def params_map = new LinkedHashMap() - schema_definitions.each { key, val -> - def Map group = schema_definitions."$key".properties // Gets the property object of the group - def title = schema_definitions."$key".title - def sub_params = new LinkedHashMap() - group.each { innerkey, value -> - sub_params.put(innerkey, value) - } - params_map.put(title, sub_params) - } - - // Ungrouped params - def ungrouped_params = new LinkedHashMap() - schema_properties.each { innerkey, value -> - ungrouped_params.put(innerkey, value) - } - params_map.put("Other parameters", ungrouped_params) - - return params_map - } - - // - // Get maximum number of characters across all parameter names - // - private static Integer paramsMaxChars(params_map) { - Integer max_chars = 0 - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (param.size() > max_chars) { - max_chars = param.size() - } - } - } - return max_chars - } -} diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy deleted file mode 100755 index 25a0a74a6..000000000 --- a/lib/NfcoreTemplate.groovy +++ /dev/null @@ -1,336 +0,0 @@ -// -// This file holds several functions used within the nf-core pipeline template. -// - -import org.yaml.snakeyaml.Yaml - -class NfcoreTemplate { - - // - // Check AWS Batch related parameters have been specified correctly - // - public static void awsBatch(workflow, params) { - if (workflow.profile.contains('awsbatch')) { - // Check params.awsqueue and params.awsregion have been set if running on AWSBatch - assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - // Check outdir paths to be S3 buckets if running on AWSBatch - assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" - } - } - - // - // Warn if a -profile or Nextflow config has not been provided to run the pipeline - // - public static void checkConfigProvided(workflow, log) { - if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { - log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + - "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + - " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + - " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + - " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + - "Please refer to the quick start section and usage docs for the pipeline.\n " - } - } - - // - // Generate version string - // - public static String version(workflow) { - String version_string = "" - - if (workflow.manifest.version) { - def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' - version_string += "${prefix_v}${workflow.manifest.version}" - } - - if (workflow.commitId) { - def git_shortsha = workflow.commitId.substring(0, 7) - version_string += "-g${git_shortsha}" - } - - return version_string - } - - // - // Construct and send completion email - // - public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) { - - // Set up the e-mail variables - def subject = "[$workflow.manifest.name] Successful: $workflow.runName" - if (!workflow.success) { - subject = "[$workflow.manifest.name] FAILED: $workflow.runName" - } - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['Date Started'] = workflow.start - misc_fields['Date Completed'] = workflow.complete - misc_fields['Pipeline script file path'] = workflow.scriptFile - misc_fields['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision - misc_fields['Nextflow Version'] = workflow.nextflow.version - misc_fields['Nextflow Build'] = workflow.nextflow.build - misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - def email_fields = [:] - email_fields['version'] = NfcoreTemplate.version(workflow) - email_fields['runName'] = workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary << misc_fields - - // On success try attach the multiqc report - def mqc_report = null - try { - if (workflow.success) { - mqc_report = multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { - if (mqc_report.size() > 1) { - log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" - } - mqc_report = mqc_report[0] - } - } - } catch (all) { - if (multiqc_report) { - log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" - } - } - - // Check if we are only sending emails on failure - def email_address = params.email - if (!params.email && params.email_on_fail && !workflow.success) { - email_address = params.email_on_fail - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$projectDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$projectDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] - def sf = new File("$projectDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - Map colors = logColours(params.monochrome_logs) - if (email_address) { - try { - if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" - } catch (all) { - // Catch failures and try with plaintext - def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { - mail_cmd += [ '-A', mqc_report ] - } - mail_cmd.execute() << email_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" - } - } - - // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") - output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") - output_tf.withWriter { w -> w << email_txt } - } - - // - // Construct and send a notification to a web server as JSON - // e.g. Microsoft Teams and Slack - // - public static void IM_notification(workflow, params, summary_params, projectDir, log) { - def hook_url = params.hook_url - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['start'] = workflow.start - misc_fields['complete'] = workflow.complete - misc_fields['scriptfile'] = workflow.scriptFile - misc_fields['scriptid'] = workflow.scriptId - if (workflow.repository) misc_fields['repository'] = workflow.repository - if (workflow.commitId) misc_fields['commitid'] = workflow.commitId - if (workflow.revision) misc_fields['revision'] = workflow.revision - misc_fields['nxf_version'] = workflow.nextflow.version - misc_fields['nxf_build'] = workflow.nextflow.build - misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp - - def msg_fields = [:] - msg_fields['version'] = NfcoreTemplate.version(workflow) - msg_fields['runName'] = workflow.runName - msg_fields['success'] = workflow.success - msg_fields['dateComplete'] = workflow.complete - msg_fields['duration'] = workflow.duration - msg_fields['exitStatus'] = workflow.exitStatus - msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - msg_fields['errorReport'] = (workflow.errorReport ?: 'None') - msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") - msg_fields['projectDir'] = workflow.projectDir - msg_fields['summary'] = summary << misc_fields - - // Render the JSON template - def engine = new groovy.text.GStringTemplateEngine() - // Different JSON depending on the service provider - // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format - def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" - def hf = new File("$projectDir/assets/${json_path}") - def json_template = engine.createTemplate(hf).make(msg_fields) - def json_message = json_template.toString() - - // POST - def post = new URL(hook_url).openConnection(); - post.setRequestMethod("POST") - post.setDoOutput(true) - post.setRequestProperty("Content-Type", "application/json") - post.getOutputStream().write(json_message.getBytes("UTF-8")); - def postRC = post.getResponseCode(); - if (! postRC.equals(200)) { - log.warn(post.getErrorStream().getText()); - } - } - - // - // Print pipeline summary on completion - // - public static void summary(workflow, params, log) { - Map colors = logColours(params.monochrome_logs) - if (workflow.success) { - if (workflow.stats.ignoredCount == 0) { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" - } - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" - } - } - - // - // ANSII Colours used for terminal logging - // - public static Map logColours(Boolean monochrome_logs) { - Map colorcodes = [:] - - // Reset / Meta - colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" - colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" - colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" - colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" - colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" - colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" - colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" - - // Regular Colors - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" - - // Bold - colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" - colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" - colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" - colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" - colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" - colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" - colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" - colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" - - // Underline - colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" - colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" - colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" - colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" - colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" - colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" - colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" - colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" - - // High Intensity - colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" - colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" - colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" - colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" - colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" - colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" - colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" - colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" - - // Bold High Intensity - colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" - colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" - colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" - colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" - colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" - colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" - colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" - - return colorcodes - } - - // - // Does what is says on the tin - // - public static String dashedLine(monochrome_logs) { - Map colors = logColours(monochrome_logs) - return "-${colors.dim}----------------------------------------------------${colors.reset}-" - } - - // - // nf-core logo - // - public static String logo(workflow, monochrome_logs) { - Map colors = logColours(monochrome_logs) - String workflow_version = NfcoreTemplate.version(workflow) - String.format( - """\n - ${dashedLine(monochrome_logs)} - ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} - ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} - ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} - ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} - ${colors.green}`._,._,\'${colors.reset} - ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset} - ${dashedLine(monochrome_logs)} - """.stripIndent() - ) - } -} diff --git a/lib/Utils.groovy b/lib/Utils.groovy deleted file mode 100644 index 8d030f4e8..000000000 --- a/lib/Utils.groovy +++ /dev/null @@ -1,47 +0,0 @@ -// -// This file holds several Groovy functions that could be useful for any Nextflow pipeline -// - -import org.yaml.snakeyaml.Yaml - -class Utils { - - // - // When running with -profile conda, warn if channels have not been set-up appropriately - // - public static void checkCondaChannels(log) { - Yaml parser = new Yaml() - def channels = [] - try { - def config = parser.load("conda config --show channels".execute().text) - channels = config.channels - } catch(NullPointerException | IOException e) { - log.warn "Could not verify conda channel configuration." - return - } - - // Check that all channels are present - // This channel list is ordered by required channel priority. - def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] - def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean - - // Check that they are in the right order - def channel_priority_violation = false - def n = required_channels_in_order.size() - for (int i = 0; i < n - 1; i++) { - channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) - } - - if (channels_missing | channel_priority_violation) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " There is a problem with your Conda configuration!\n\n" + - " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/\n" + - " The observed channel order is \n" + - " ${channels}\n" + - " but the following channel order is required:\n" + - " ${required_channels_in_order}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - } - } -} diff --git a/lib/WorkflowCircrna.groovy b/lib/WorkflowCircrna.groovy deleted file mode 100755 index 7833ed4a4..000000000 --- a/lib/WorkflowCircrna.groovy +++ /dev/null @@ -1,77 +0,0 @@ -// -// This file holds several functions specific to the workflow/circrna.nf in the nf-core/circrna pipeline -// - -import groovy.text.SimpleTemplateEngine - -class WorkflowCircrna { - - // - // Check and validate parameters - // - public static void initialise(params, log) { - genomeExistsError(params, log) - - - if (!params.fasta) { - log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." - System.exit(1) - } - } - - // - // Get workflow summary for MultiQC - // - public static String paramsSummaryMultiqc(workflow, summary) { - String summary_section = '' - for (group in summary.keySet()) { - def group_params = summary.get(group) // This gets the parameters of that particular group - if (group_params) { - summary_section += "

$group

\n" - summary_section += "
\n" - for (param in group_params.keySet()) { - summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" - } - summary_section += "
\n" - } - } - - String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" - yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" - yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" - yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" - yaml_file_text += "plot_type: 'html'\n" - yaml_file_text += "data: |\n" - yaml_file_text += "${summary_section}" - return yaml_file_text - } - - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) { - // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file - def meta = [:] - meta.workflow = run_workflow.toMap() - meta["manifest_map"] = run_workflow.manifest.toMap() - - meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" - meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " - - def methods_text = mqc_methods_yaml.text - - def engine = new SimpleTemplateEngine() - def description_html = engine.createTemplate(methods_text).make(meta) - - return description_html - }// - // Exit pipeline if incorrect --genome key provided - // - private static void genomeExistsError(params, log) { - if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + - " Currently, the available genome keys are:\n" + - " ${params.genomes.keySet().join(", ")}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - System.exit(1) - } - } -} diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy deleted file mode 100755 index 04b19a7d7..000000000 --- a/lib/WorkflowMain.groovy +++ /dev/null @@ -1,99 +0,0 @@ -// -// This file holds several functions specific to the main.nf workflow in the nf-core/circrna pipeline -// - -class WorkflowMain { - - // - // Citation string for pipeline - // - public static String citation(workflow) { - return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - // TODO nf-core: Add Zenodo DOI for pipeline after first release - //"* The pipeline\n" + - //" https://doi.org/10.5281/zenodo.XXXXXXX\n\n" + - "* The nf-core framework\n" + - " https://doi.org/10.1038/s41587-020-0439-x\n\n" + - "* Software dependencies\n" + - " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" - } - - // - // Generate help string - // - public static String help(workflow, params, log) { - def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" - def help_string = '' - help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) - help_string += NfcoreSchema.paramsHelp(workflow, params, command) - help_string += '\n' + citation(workflow) + '\n' - help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) - return help_string - } - - // - // Generate parameter summary log string - // - public static String paramsSummaryLog(workflow, params, log) { - def summary_log = '' - summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) - summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) - summary_log += '\n' + citation(workflow) + '\n' - summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) - return summary_log - } - - // - // Validate parameters and print summary to screen - // - public static void initialise(workflow, params, log) { - // Print help to screen if required - if (params.help) { - log.info help(workflow, params, log) - System.exit(0) - } - - // Print workflow version and exit on --version - if (params.version) { - String workflow_version = NfcoreTemplate.version(workflow) - log.info "${workflow.manifest.name} ${workflow_version}" - System.exit(0) - } - - // Print parameter summary log to screen - log.info paramsSummaryLog(workflow, params, log) - - // Validate workflow parameters via the JSON schema - if (params.validate_params) { - NfcoreSchema.validateParameters(workflow, params, log) - } - - // Check that a -profile or Nextflow config has been provided to run the pipeline - NfcoreTemplate.checkConfigProvided(workflow, log) - - // Check that conda channels are set-up correctly - if (params.enable_conda) { - Utils.checkCondaChannels(log) - } - - // Check AWS batch settings - NfcoreTemplate.awsBatch(workflow, params) - - // Check input has been provided - if (!params.input) { - log.error "Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'" - System.exit(1) - } - } - // - // Get attribute from genome config file e.g. fasta - // - public static Object getGenomeAttribute(params, attribute) { - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] - } - } - return null - } -} diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar deleted file mode 100644 index 805c8bb5e..000000000 Binary files a/lib/nfcore_external_java_deps.jar and /dev/null differ diff --git a/main.nf b/main.nf index 526c619d6..9be3219c6 100644 --- a/main.nf +++ b/main.nf @@ -4,7 +4,6 @@ nf-core/circrna ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Github : https://github.com/nf-core/circrna - Website: https://nf-co.re/circrna Slack : https://nfcore.slack.com/channels/circrna ---------------------------------------------------------------------------------------- @@ -14,47 +13,110 @@ nextflow.enable.dsl = 2 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - GENOME PARAMETER VALUES + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +include { CIRCRNA } from './workflows/circrna' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_circrna_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_circrna_pipeline' + +include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_circrna_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE & PRINT PARAMETER SUMMARY + GENOME PARAMETER VALUES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -WorkflowMain.initialise(workflow, params, log) +params.fasta = getGenomeAttribute('fasta') +params.gtf = getGenomeAttribute('gtf') +params.bwa = getGenomeAttribute('bwa') +params.star = getGenomeAttribute('star') +params.bowtie = getGenomeAttribute('bowtie') +params.bowtie2 = getGenomeAttribute('bowtie2') +params.mature = getGenomeAttribute('mature') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - NAMED WORKFLOW FOR PIPELINE + NAMED WORKFLOWS FOR PIPELINE ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { CIRCRNA } from './workflows/circrna' - // -// WORKFLOW: Run main nf-core/circrna analysis pipeline +// WORKFLOW: Run main analysis pipeline depending on type of input // workflow NFCORE_CIRCRNA { - CIRCRNA () -} + main: + + ch_versions = Channel.empty() + + // + // WORKFLOW: Run nf-core/circrna workflow + // + ch_samplesheet = Channel.fromSamplesheet("input") + ch_fasta = Channel.value([[id: "fasta"], file(params.fasta, checkIfExists:true)]) + ch_gtf = Channel.value([[id: "gtf"], file(params.gtf, checkIfExists:true)]) + ch_mature = params.mature ? Channel.value([[id: "mature"], file(params.mature, checkIfExists:true)]) : Channel.empty() + ch_phenotype = params.phenotype ? Channel.value([[id: "phenotype"], file(params.phenotype, checkIfExists:true)]) : Channel.empty() + ch_annotation = params.annotation ? Channel.fromSamplesheet("annotation") : Channel.empty() + ch_mirna = params.mature && params.mirna_expression ? Channel.value([[id: "mirna"], file(params.mirna_expression, checkIfExists:true)]) : Channel.empty() + + CIRCRNA ( + ch_samplesheet, + ch_phenotype, + ch_fasta, + ch_gtf, + ch_mature, + ch_annotation, + ch_versions, + ch_mirna + ) + emit: + multiqc_report = CIRCRNA.out.multiqc_report // channel: /path/to/multiqc_report.html + +} /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN ALL WORKFLOWS + RUN MAIN WORKFLOW ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// WORKFLOW: Execute a single named workflow for the pipeline -// See: https://github.com/nf-core/rnaseq/issues/619 -// workflow { + + main: + + // + // SUBWORKFLOW: Run initialisation tasks + // + PIPELINE_INITIALISATION ( + params.version, + params.help, + params.validate_params, + params.monochrome_logs, + args, + params.outdir, + params.input + ) + + // + // WORKFLOW: Run main workflow + // NFCORE_CIRCRNA () + + // + // SUBWORKFLOW: Run completion tasks + // + PIPELINE_COMPLETION ( + params.email, + params.email_on_fail, + params.plaintext_email, + params.outdir, + params.monochrome_logs, + params.hook_url, + NFCORE_CIRCRNA.out.multiqc_report + ) } /* diff --git a/modules.json b/modules.json index 5e7f3bc36..c1bed4145 100644 --- a/modules.json +++ b/modules.json @@ -5,125 +5,240 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "bedtools/getfasta": { + "branch": "master", + "git_sha": "cdcdd5e3d806f0ff3983c40c69e0b07bb44ec299", + "installed_by": ["modules"] + }, + "bedtools/groupby": { + "branch": "master", + "git_sha": "3b248b84694d1939ac4bb33df84bf6233a34d668", + "installed_by": ["modules"] + }, + "bedtools/intersect": { + "branch": "master", + "git_sha": "575e1bc54b083fb15e7dd8b5fcc40bea60e8ce83", + "installed_by": ["modules"] + }, + "bedtools/sort": { + "branch": "master", + "git_sha": "571a5feac4c9ce0a8df0bc15b94230e7f3e8db47", + "installed_by": ["modules"] + }, + "bioawk": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"], + "patch": "modules/nf-core/bioawk/bioawk.diff" + }, "bowtie/align": { "branch": "master", - "git_sha": "653588be2a4aadab487b530643dbc9baf7a485c4", + "git_sha": "3c77ca9aac783e76c3614a06db3bfe4fef619bde", "installed_by": ["modules"] }, "bowtie/build": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "bowtie2/align": { "branch": "master", - "git_sha": "1a96d72a3a5079424bc9a9ac5c2ca72902af327f", + "git_sha": "e4bad511789f16d0df39ee306b2cd50418365048", "installed_by": ["modules"] }, "bowtie2/build": { "branch": "master", - "git_sha": "e797efb47b0d3b2124753beb55dc83ab9512bceb", + "git_sha": "1fea64f5132a813ec97c1c6d3a74e0aee7142b6d", "installed_by": ["modules"] }, "bwa/index": { "branch": "master", - "git_sha": "9518fa4f65f3fb8cde24fde7d40333b39ec8fd65", + "git_sha": "e0ff65e1fb313677de09f5f477ae3da30ce19b7b", + "installed_by": ["modules"] + }, + "cat/cat": { + "branch": "master", + "git_sha": "9437e6053dccf4aafa022bfd6e7e9de67e625af8", "installed_by": ["modules"] }, "cat/fastq": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "4fc983ad0b30e6e32696fa7d980c76c7bfe1c03e", "installed_by": ["modules"] }, "circexplorer2/annotate": { "branch": "master", - "git_sha": "e3ae75b5e54042683d5a1889fc733a4bf51f6819", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "circexplorer2/parse": { "branch": "master", - "git_sha": "e3ae75b5e54042683d5a1889fc733a4bf51f6819", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "csvtk/join": { + "branch": "master", + "git_sha": "614abbf126f287a3068dc86997b2e1b6a93abe20", + "installed_by": ["modules"] + }, + "csvtk/split": { + "branch": "master", + "git_sha": "614abbf126f287a3068dc86997b2e1b6a93abe20", "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "8022c68e7403eecbd8ba9c49496f69f8c49d50f0", + "git_sha": "de45447d060b8c8b98575bc637a4a575fd0638e1", + "installed_by": ["modules"] + }, + "custom/gtffilter": { + "branch": "master", + "git_sha": "a0aee18374b7f072aa0f89f4d66f5a3a9f8176d2", + "installed_by": ["modules"] + }, + "custom/tx2gene": { + "branch": "master", + "git_sha": "ec155021a9104441bf6a9bae3b55d1b5b0bfdb3a", "installed_by": ["modules"] }, "fastqc": { "branch": "master", - "git_sha": "810e8f2603ec38401d49a4aaed06f6d058745552", + "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd", + "installed_by": ["modules"] + }, + "gawk": { + "branch": "master", + "git_sha": "cf3ed075695639b0a0924eb0901146df1996dc08", + "installed_by": ["modules"] + }, + "gnu/sort": { + "branch": "master", + "git_sha": "a3cc42943548378b726610f45bb5a79ab3f0b633", "installed_by": ["modules"] }, "hisat2/align": { "branch": "master", - "git_sha": "653588be2a4aadab487b530643dbc9baf7a485c4", + "git_sha": "400037f54de4b0c42712ec5a499d9fd9e66250d1", "installed_by": ["modules"] }, "hisat2/build": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "400037f54de4b0c42712ec5a499d9fd9e66250d1", "installed_by": ["modules"] }, "hisat2/extractsplicesites": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "400037f54de4b0c42712ec5a499d9fd9e66250d1", "installed_by": ["modules"] }, "miranda": { "branch": "master", - "git_sha": "270fc80f29c4dbb1a973ca1d7d748bdd56e97301", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", "installed_by": ["modules"] }, - "samtools/index": { + "samtools/faidx": { "branch": "master", - "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", + "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519", "installed_by": ["modules"] }, + "samtools/flagstat": { + "branch": "master", + "git_sha": "46eca555142d6e597729fcb682adcc791796f514", + "installed_by": ["bam_stats_samtools"] + }, + "samtools/idxstats": { + "branch": "master", + "git_sha": "46eca555142d6e597729fcb682adcc791796f514", + "installed_by": ["bam_stats_samtools"] + }, + "samtools/index": { + "branch": "master", + "git_sha": "46eca555142d6e597729fcb682adcc791796f514", + "installed_by": ["bam_sort_stats_samtools", "modules"] + }, "samtools/sort": { "branch": "master", - "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", - "installed_by": ["modules"] + "git_sha": "46eca555142d6e597729fcb682adcc791796f514", + "installed_by": ["bam_sort_stats_samtools", "modules"] + }, + "samtools/stats": { + "branch": "master", + "git_sha": "46eca555142d6e597729fcb682adcc791796f514", + "installed_by": ["bam_stats_samtools"] }, "samtools/view": { "branch": "master", - "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", + "git_sha": "6c2309aaec566c0d44a6cf14d4b2d0c51afe2e91", "installed_by": ["modules"] }, "segemehl/align": { "branch": "master", - "git_sha": "a52cd7df53f3e88895362f2c8fd43cfe00fbf1a3", + "git_sha": "9a6b0745dbb5359286d36dee2183ffab240abba0", "installed_by": ["modules"] }, "segemehl/index": { "branch": "master", - "git_sha": "a52cd7df53f3e88895362f2c8fd43cfe00fbf1a3", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "star/align": { "branch": "master", - "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", + "git_sha": "a21faa6a3481af92a343a10926f59c189a2c16c9", "installed_by": ["modules"] }, "star/genomegenerate": { "branch": "master", - "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", + "git_sha": "a21faa6a3481af92a343a10926f59c189a2c16c9", "installed_by": ["modules"] }, "stringtie/stringtie": { "branch": "master", - "git_sha": "d0e6f468af1d51ce3ea4ac4fb5e58723eac3938c", + "git_sha": "b1b959609bda44341120aed1766329909f54b8d0", "installed_by": ["modules"] }, "trimgalore": { "branch": "master", - "git_sha": "b51a69e30973c71950225c817ad07a3337d22c40", + "git_sha": "a98418419ae6c9df3cf6cf108d1e1aba71037d5a", "installed_by": ["modules"] + }, + "tximeta/tximport": { + "branch": "master", + "git_sha": "5d095e8413da1f4c72b7d07ce87f75c09482486f", + "installed_by": ["modules"] + } + } + }, + "subworkflows": { + "nf-core": { + "bam_sort_stats_samtools": { + "branch": "master", + "git_sha": "46eca555142d6e597729fcb682adcc791796f514", + "installed_by": ["subworkflows"] + }, + "bam_stats_samtools": { + "branch": "master", + "git_sha": "0eacd714effe5aac1c1de26593873960b3346cab", + "installed_by": ["bam_sort_stats_samtools", "subworkflows"] + }, + "utils_nextflow_pipeline": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + }, + "utils_nfcore_pipeline": { + "branch": "master", + "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", + "installed_by": ["subworkflows"] + }, + "utils_nfvalidation_plugin": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] } } } diff --git a/modules/local/annotation/environment.yml b/modules/local/annotation/environment.yml new file mode 100644 index 000000000..8c3113528 --- /dev/null +++ b/modules/local/annotation/environment.yml @@ -0,0 +1,5 @@ +name: annotation +channels: + - conda-forge +dependencies: + - pandas=1.5.2 diff --git a/modules/local/annotation/full_annotation/main.nf b/modules/local/annotation/full_annotation/main.nf deleted file mode 100644 index 87e4e4354..000000000 --- a/modules/local/annotation/full_annotation/main.nf +++ /dev/null @@ -1,45 +0,0 @@ -process ANNOTATION { - tag "${meta.id}:${meta.tool}" - label 'process_high' - - conda (params.enable_conda ? "bioconda::ucsc-gtftogenepred=377 bioconda::ucsc-genepredtobed=377 bioconda::bedtools=2.27.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-d7ee3552d06d8acebbc660507b48487c7369e221:07daadbfe8182aa3c974c7b78924d5c8730b922d-0' : - 'quay.io/biocontainers/mulled-v2-d7ee3552d06d8acebbc660507b48487c7369e221:07daadbfe8182aa3c974c7b78924d5c8730b922d-0' }" - - input: - tuple val(meta), path(bed) - path gtf - val exon_boundary - - output: - tuple val(meta), path("${prefix}.bed"), emit: bed - path("*.log") , emit: log - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = '377' - """ - grep -vf ${workflow.projectDir}/bin/unwanted_biotypes.txt $gtf > filt.gtf - mv $bed circs.bed - - annotate_outputs.sh $exon_boundary &> ${prefix}.log - mv master_bed12.bed ${prefix}.bed.tmp - - awk -v FS="\t" '{print \$11}' ${prefix}.bed.tmp > mature_len.tmp - awk -v FS="," '{for(i=t=0;i mature_length - - paste ${prefix}.bed.tmp mature_length > ${prefix}.bed - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") - ucsc: $VERSION - END_VERSIONS - """ -} diff --git a/modules/local/annotation/main.nf b/modules/local/annotation/main.nf new file mode 100644 index 000000000..7a698e40b --- /dev/null +++ b/modules/local/annotation/main.nf @@ -0,0 +1,23 @@ +process ANNOTATION { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pandas:1.5.2' : + 'biocontainers/pandas:1.5.2' }" + + input: + tuple val(meta), path(gtf_intersection), path(db_intersections) + val(exon_boundary) + + output: + tuple val(meta), path("${prefix}.bed"), emit: bed + tuple val(meta), path("${prefix}.gtf"), emit: gtf + + path "versions.yml" , emit: versions + + script: + prefix = task.ext.prefix ?: meta.id + template 'annotation.py' +} diff --git a/modules/local/annotation/parent_gene/main.nf b/modules/local/annotation/parent_gene/main.nf deleted file mode 100644 index 4edefbddb..000000000 --- a/modules/local/annotation/parent_gene/main.nf +++ /dev/null @@ -1,42 +0,0 @@ -process PARENT_GENE { - label 'process_high' - - conda (params.enable_conda ? "bioconda::ucsc-gtftogenepred=377 bioconda::ucsc-genepredtobed=377 bioconda::bedtools=2.27.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-d7ee3552d06d8acebbc660507b48487c7369e221:07daadbfe8182aa3c974c7b78924d5c8730b922d-0' : - 'quay.io/biocontainers/mulled-v2-d7ee3552d06d8acebbc660507b48487c7369e221:07daadbfe8182aa3c974c7b78924d5c8730b922d-0' }" - - input: - path circrna_matrix - path gtf - val exon_boundary - - output: - path "circrna_host-gene.txt" , emit: circ_host_map - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def VERSION = '377' - """ - # remove redundant biotypes from GTF. - grep -vf ${workflow.projectDir}/bin/unwanted_biotypes.txt $gtf > filt.gtf - - # generate circrna BED file. - tail -n +2 $circrna_matrix | awk '{print \$1}' > IDs.txt - ID_to_BED.sh IDs.txt - cat *.bed > merged.txt && rm IDs.txt && rm *.bed && mv merged.txt circs.bed - - # Re-use annotation script to identify the host gene. - annotate_outputs.sh $exon_boundary &> annotation.log - awk -v OFS="\t" '{print \$4, \$14}' master_bed12.bed > circrna_host-gene.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") - ucsc: $VERSION - END_VERSIONS - """ -} diff --git a/modules/local/annotation/templates/annotation.py b/modules/local/annotation/templates/annotation.py new file mode 100755 index 000000000..b0ff285be --- /dev/null +++ b/modules/local/annotation/templates/annotation.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python + +import pandas as pd +import numpy as np +import platform +import csv + +def format_yaml_like(data: dict, indent: int = 0) -> str: + """Formats a dictionary to a YAML-like string. + + Args: + data (dict): The dictionary to format. + indent (int): The current indentation level. + + Returns: + str: A string formatted as YAML. + """ + yaml_str = "" + for key, value in data.items(): + spaces = " " * indent + if isinstance(value, dict): + yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}" + else: + yaml_str += f"{spaces}{key}: {value}\\n" + return yaml_str + + +columns = { + 0: 'chr', + 1: 'start', + 2: 'end', + 3: 'name', + 4: 'score', + 5: 'strand', + 9: 'tx_start', + 10: 'tx_end', + 14: 'attributes' +} + +attributes = ['gene_id', 'gene_name', 'transcript_id'] + +exon_boundary = int("${exon_boundary}") + +try: + df = pd.read_csv("${gtf_intersection}", sep="\\t", header=None, usecols=columns.keys()) +except pd.errors.EmptyDataError: + raise ValueError("Intersection between circRNAs and GTF file is empty.") +df = df.rename(columns=columns) + +# Extract circRNAs without match +mask = df['tx_start'] == -1 +df_intergenic = df[mask] +df = df[~mask] +df_intergenic['type'] = 'intergenic-circRNA' +df_intergenic['gene_id'] = 'intergenic_' + df_intergenic['name'] +df_intergenic['gene_name'] = 'intergenic_' + df_intergenic['name'] +df_intergenic['transcript_id'] = 'intergenic_' + df_intergenic['name'] + +# Convert attributes to a dictionary +df['attributes'] = df['attributes'].apply(lambda row: dict([[value.strip(r'"') for value in entry.strip().split(' ', 1)] for entry in row.split(';') if entry])) +# Make sure all attributes are present +df_incomplete = df['attributes'].apply(lambda row: ", ".join([key for key in attributes if key not in row])) +df_incomplete = df_incomplete[df_incomplete != ""] +if len(df_incomplete) > 0: + counts = df_incomplete.value_counts() + counts.name = 'count' + counts.index.name = 'missing' + raise ValueError(f"The following attributes are missing in the intersection file:\\n\\n{counts.to_frame()}") +# Keep only the attributes we want +df['attributes'] = df['attributes'].apply(lambda row: {key: row[key] for key in attributes if key in row}) +# Convert attributes to columns +df = pd.concat([df.drop(['attributes'], axis=1), df['attributes'].apply(pd.Series)], axis=1) + +df['any_outside'] = (df['start'] < df['tx_start'] - exon_boundary) | (df['end'] > df['tx_end'] + exon_boundary) +# Perfect is inverse of any_outside +df['perfect'] = ~df['any_outside'] +# Drop any_outside +df = df.drop(['any_outside', 'tx_start', 'tx_end'], axis=1) + +df = df.groupby(['chr', 'start', 'end', 'strand']).aggregate({ + 'name': lambda x: x.iloc[0], + 'score': lambda x: x.iloc[0], + 'gene_id': lambda x: list(x), + 'gene_name': lambda x: list(x), + 'transcript_id': lambda x: list(x), + 'perfect': lambda x: list(x) +}) + +def filter_perfect(row, col): + if any(row['perfect']): + matching_values = [value for value, perfectness in zip(row[col], row['perfect']) if perfectness] + else: + matching_values = row[col] + valid_values = set([value for value in matching_values if type(value) == str]) + return ",".join(valid_values) if valid_values else "NaN" + +def determine_type(row): + if row["no_transcript"]: + return "ciRNA" + if any(row['perfect']): + return "circRNA" + else: + return 'EI-circRNA' + +df['no_transcript'] = df['transcript_id'].apply(lambda x: all([type(value) != str and np.isnan(value) for value in x])) +df['type'] = df.apply(lambda row: determine_type(row), axis=1) +df['gene_id'] = df.apply(lambda row: filter_perfect(row, 'gene_id'), axis=1) +df['gene_name'] = df.apply(lambda row: filter_perfect(row, 'gene_name'), axis=1) +df['transcript_id'] = df.apply(lambda row: filter_perfect(row, 'transcript_id'), axis=1) +# Drop perfect +df = df.drop(['perfect'], axis=1) + +df = df.reset_index() +df_intergenic = df_intergenic.reset_index() +bed_order = ['chr', 'start', 'end', 'name', 'score', 'strand', 'type', 'gene_id', 'gene_name', 'transcript_id'] +df = df[bed_order] +df_intergenic = df_intergenic[bed_order] + +df = pd.concat([df, df_intergenic], axis=0) + +db_intersections = "${db_intersections}".split() +has_db = len(db_intersections) > 0 + +if has_db: + db_colnames = ['chr', 'start', 'end', 'name', 'score', 'strand', 'db_chr', 'db_start', 'db_end', 'db_name', 'db_score', 'db_strand'] + db_usecols = ['chr', 'start', 'end', 'name', 'score', 'strand', 'db_name'] + df_databases = pd.concat([pd.read_csv(db_path, sep="\\t", names=db_colnames, usecols=db_usecols) for db_path in db_intersections]) + + # Group by chr, start, end, name, score, strand, and aggregate the db_name to list + df_databases = df_databases.groupby(['chr', 'start', 'end', 'name', 'score', 'strand']).aggregate({ + 'db_name': lambda x: ",".join([val for val in x if val != '.']) + }) + + df_databases['db_name'] = df_databases['db_name'].apply(lambda x: x if x else '.') + + df = df.merge(df_databases, how='left', on=['chr', 'start', 'end', 'name', 'score', 'strand']) +else: + df['db_name'] = "." + +# Sort by chr, start, end +df = df.sort_values(['chr', 'start', 'end']) + +df.to_csv("${prefix}.bed", sep='\\t', index=False, header=False) + +# Convert to GTF +df['source'] = 'circRNA' +df['frame'] = '.' +df['attributes'] = 'gene_id "' + df['gene_id'] + '"; gene_name "' + df['gene_name'] + '"; transcript_id "circ_' + df['name'] + '"; db_ids "' + df['db_name'] + '";' + +gtf_order = ['chr', 'source', 'type', 'start', 'end', 'score', 'strand', 'frame', 'attributes'] +df = df[gtf_order] + +df.to_csv("${prefix}.gtf", sep='\\t', index=False, header=False, quoting=csv.QUOTE_NONE) + +# Versions + +versions = { + "${task.process}": { + "python": platform.python_version(), + "pandas": pd.__version__, + "numpy": np.__version__ + } +} + +with open("versions.yml", "w") as f: + f.write(format_yaml_like(versions)) diff --git a/modules/local/circexplorer2/filter/main.nf b/modules/local/circexplorer2/filter/main.nf deleted file mode 100644 index 3594ac95d..000000000 --- a/modules/local/circexplorer2/filter/main.nf +++ /dev/null @@ -1,24 +0,0 @@ -process CIRCEXPLORER2_FILTER { - tag "$meta.id" - label 'process_single' - - input: - tuple val(meta), path(txt) - val(bsj_reads) - - output: - tuple val(meta), path("${prefix}_${meta.tool}_circs.bed"), emit: results - tuple val(meta), path("${prefix}_${meta.tool}.bed") , emit: matrix - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - """ - awk '{if(\$13 >= ${bsj_reads}) print \$0}' ${prefix}.txt | awk -v OFS="\t" '{print \$1,\$2,\$3,\$6,\$13}' > ${prefix}_${meta.tool}.bed - - awk -v OFS="\t" '{print \$1, \$2, \$3, \$1":"\$2"-"\$3":"\$4, \$5, \$4}' ${prefix}_${meta.tool}.bed > ${prefix}_${meta.tool}_circs.bed - """ -} diff --git a/modules/local/circexplorer2/reference/main.nf b/modules/local/circexplorer2/reference/main.nf index c30978607..f16fa9c0b 100644 --- a/modules/local/circexplorer2/reference/main.nf +++ b/modules/local/circexplorer2/reference/main.nf @@ -2,10 +2,10 @@ process CIRCEXPLORER2_REFERENCE { tag "$gtf" label 'process_single' - conda (params.enable_conda ? "bioconda::ucsc-gtftogenepred=377 bioconda::ucsc-genepredtobed=377 bioconda::bedtools=2.27.0" : null) + conda "bioconda::ucsc-gtftogenepred=377 bioconda::ucsc-genepredtobed=377 bioconda::bedtools=2.27.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-d7ee3552d06d8acebbc660507b48487c7369e221:07daadbfe8182aa3c974c7b78924d5c8730b922d-0' : - 'quay.io/biocontainers/mulled-v2-d7ee3552d06d8acebbc660507b48487c7369e221:07daadbfe8182aa3c974c7b78924d5c8730b922d-0' }" + 'biocontainers/mulled-v2-d7ee3552d06d8acebbc660507b48487c7369e221:07daadbfe8182aa3c974c7b78924d5c8730b922d-0' }" input: path gtf @@ -27,10 +27,11 @@ process CIRCEXPLORER2_REFERENCE { $gtf \ ${prefix}.genepred - awk -v OFS="\t" '{print \$12, \$1, \$2, \$3, \$4, \$5, \$6, \$7, \$8, \$9, \$10}' ${prefix}.genepred > ${prefix}.txt + awk -v OFS="\\t" '{print \$12, \$1, \$2, \$3, \$4, \$5, \$6, \$7, \$8, \$9, \$10}' ${prefix}.genepred > ${prefix}.txt cat <<-END_VERSIONS > versions.yml "${task.process}": + awk: \$(awk --version | head -n1 | cut -d' ' -f3 | sed 's/,//g' ) ucsc: $VERSION END_VERSIONS """ diff --git a/modules/local/circrna_finder/filter/main.nf b/modules/local/circrna_finder/filter/main.nf deleted file mode 100644 index 423dee854..000000000 --- a/modules/local/circrna_finder/filter/main.nf +++ /dev/null @@ -1,40 +0,0 @@ -process CIRCRNA_FINDER_FILTER { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::circrna_finder=1.2" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/circrna_finder%3A1.2--pl5321hdfd78af_1' : - 'quay.io/biocontainers/circrna_finder:1.2--pl5321hdfd78af_1' }" - - input: - tuple val(meta), path(sam), path(junction), path(tab) - path fasta - val bsj_reads - - output: - tuple val(meta), path("${prefix}_circrna_finder_circs.bed"), emit: results - tuple val(meta), path("${prefix}_circrna_finder.bed") , emit: matrix - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = 'v1.2' - """ - mkdir -p star_dir && mv *.tab *.junction *.sam star_dir - postProcessStarAlignment.pl --starDir star_dir/ --outDir ./ - - awk '{if(\$5 >= ${bsj_reads}) print \$0}' ${prefix}.filteredJunctions.bed | awk -v OFS="\t" -F"\t" '{print \$1,\$2,\$3,\$6,\$5}' > ${prefix}_circrna_finder.bed - - awk -v OFS="\t" '{print \$1, \$2, \$3, \$1":"\$2"-"\$3":"\$4, \$5, \$4}' ${prefix}_circrna_finder.bed > ${prefix}_circrna_finder_circs.bed - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - circRNA_finder: $VERSION - END_VERSIONS - """ -} diff --git a/modules/local/circrna_finder/main.nf b/modules/local/circrna_finder/main.nf new file mode 100644 index 000000000..e9a407205 --- /dev/null +++ b/modules/local/circrna_finder/main.nf @@ -0,0 +1,34 @@ +process CIRCRNA_FINDER { + tag "$meta.id" + label 'process_low' + + conda "bioconda::circrna_finder=1.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/circrna_finder%3A1.2--pl5321hdfd78af_1' : + 'biocontainers/circrna_finder:1.2--pl5321hdfd78af_1' }" + + input: + tuple val(meta), path(star_input, stageAs: 'input/') + + output: + tuple val(meta), path("${prefix}.filteredJunctions.bed"), emit: results + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = 'v1.2' + """ + postProcessStarAlignment.pl --starDir input/ --outDir ./ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + awk: \$(awk --version | head -n1 | cut -d' ' -f3 | sed 's/,//g' ) + cat: \$(cat --version | head -n 1 | sed -e 's/cat (GNU coreutils) //') + circRNA_finder: $VERSION + END_VERSIONS + """ +} diff --git a/modules/local/circtest/circtest/main.nf b/modules/local/circtest/circtest/main.nf new file mode 100644 index 000000000..9c441c832 --- /dev/null +++ b/modules/local/circtest/circtest/main.nf @@ -0,0 +1,25 @@ +process CIRCTEST_CIRCTEST { + label 'process_medium' + + conda "conda-forge::r-base=4.2.2 conda-forge::r-aod=1.3.2 conda-forge::r-ggplot2=3.4.0 conda-forge::r-plyr=1.8.8" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-c79b00aa4647c739dbe7e8480789d3ba67988f2e:0' : + 'biocontainers/mulled-v2-c79b00aa4647c739dbe7e8480789d3ba67988f2e:0' }" + + input: + tuple val(meta) , path(circ_counts) + tuple val(meta2), path(gene_counts) + tuple val(meta3), path(phenotype) + + output: + tuple val(meta), path("${prefix}_summary.txt"), emit: summary + tuple val(meta), path("*.pdf") , emit: plots + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + prefix = task.ext.prefix ?: meta.id + template 'circtest.R' +} diff --git a/bin/circ_test.R b/modules/local/circtest/circtest/templates/circtest.R similarity index 81% rename from bin/circ_test.R rename to modules/local/circtest/circtest/templates/circtest.R index 96a4f81e4..debdf0395 100755 --- a/bin/circ_test.R +++ b/modules/local/circtest/circtest/templates/circtest.R @@ -4,7 +4,11 @@ require(aod) require(plyr) require(ggplot2) -# The CircTest devs never had containers in mind for their code. It is not published on any R channels, nor are there any stable releases on their github page. I have decided to take their source functions and roll with it instead. +## CircTest functions +## Package: CircTest (https://github.com/dieterich-lab/CircTest) +## Version: 0.1.1 +## Author(s): Jun Cheng, Tobias Jakobi +## License: GPL ## SUMMMARY @@ -42,13 +46,13 @@ summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE, # Rename the "mean" column datac <- rename(datac, c("mean" = measurevar)) - datac$se <- datac$sd / sqrt(datac$N) # Calculate standard error of the mean + datac\$se <- datac\$sd / sqrt(datac\$N) # Calculate standard error of the mean # Confidence interval multiplier for standard error # Calculate t-statistic for confidence interval: # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1 - ciMult <- qt(conf.interval/2 + .5, datac$N-1) - datac$ci <- datac$se * ciMult + ciMult <- qt(conf.interval/2 + .5, datac\$N-1) + datac\$ci <- datac\$se * ciMult return(datac) } @@ -140,8 +144,7 @@ Circ.ratioplot <- function(Circ,Linear,CircCoordinates = None,plotrow='1',size=2 theme(legend.text=element_text(size=size)) + theme(plot.title = element_text(size=size)) + theme(axis.text.y = element_text(margin=margin(5,5,10,5,"pt")))+ - #labs(list(title=paste("Annotation: ", genename, "\nChr ", toString(Circ[plotrow,circle_description]),sep=""),x=x,y=y)) + - ggtitle(paste("Annotation: ", genename, "\nChr ", toString(Circ[plotrow,circle_description]),sep="")) + + ggtitle(paste("Annotation: ", genename, "\\nChr ", toString(Circ[plotrow,circle_description]),sep="")) + ylab("circRNA/(circRNA + Linear RNA)") + xlab("Sample") + geom_errorbar(aes(ymin=Ratio, ymax=Ratio+se), width=.2 , size=2) + @@ -179,7 +182,6 @@ Circ.ratioplot <- function(Circ,Linear,CircCoordinates = None,plotrow='1',size=2 Circ.lineplot <- function(Circ,Linear,CircCoordinates = None,plotrow='1',size=18,ncol=2,groupindicator1=NULL,groupindicator2=NULL,x='Conditions',y='Counts', circle_description = c(1:3), gene_column = None){ require(ggplot2) - #require(Rmisc) if( !is.null(groupindicator1) & length(groupindicator1) != ncol(Circ)-length(circle_description) ){ stop("If provided, the length of groupindicator1 should be equal to the number of samples.") @@ -327,6 +329,8 @@ Circ.test <- function(Circ, Linear, CircCoordinates=None, group, alpha=0.05, plo # groups if ( length(group) != ncol(Circ)-length(circle_description) ){ + print(length(group)) + print(ncol(Circ)-length(circle_description)) stop("length of 'group' must be equal to the number of samples of 'Circ' and 'Linear'. ") } group <- factor(group) @@ -362,7 +366,7 @@ Circ.test <- function(Circ, Linear, CircCoordinates=None, group, alpha=0.05, plo tmp_rations <- data.frame(Ratio=as.numeric(Circ[i,-circle_description])/(as.numeric(Linear[i,-circle_description])+as.numeric(Circ[i,-circle_description])), group=group) for (rep_group in seq(1,max(as.numeric(levels(group))),1)){ - tmp_df[i, paste("group_",rep_group,"_ratio_mean",sep="")] <- mean(na.omit(unlist(tmp_rations[tmp_rations$group==rep_group,1]))) + tmp_df[i, paste("group_",rep_group,"_ratio_mean",sep="")] <- mean(na.omit(unlist(tmp_rations[tmp_rations\$group==rep_group,1]))) } # Constract data frame @@ -376,21 +380,16 @@ Circ.test <- function(Circ, Linear, CircCoordinates=None, group, alpha=0.05, plo # test models a <- anova(fitNull,fitAlt) p.value <- a@anova.table[,11][2] - # print(predict(fitAlt,testdat, se.fit=T)) p.val <- c( p.val, p.value ) - # dir <- 1 # fitAlt@param[2][["group2"]] - # direction <- c(direction, dir) } message(paste(counter, "candidates processed in total")) - Circ$direction <- direction - #names(Circ$direction ) <- c("direction") + Circ\$direction <- direction p.adj <- p.adjust(p.val,n=sum(!is.na(p.val)),'BH') # select significant ones sig_dat <- Circ[p.adj<=alpha & !is.na(p.adj),] sig_ratios <- tmp_df[p.adj<=alpha & !is.na(p.adj),] sig_p <- p.adj[p.adj<=alpha & !is.na(p.adj)] - # direction <- direction[p.adj<=alpha & !is.na(p.adj)] # sort by p-val sig_dat <- sig_dat[order(sig_p),] @@ -404,9 +403,6 @@ Circ.test <- function(Circ, Linear, CircCoordinates=None, group, alpha=0.05, plo rownames(summary_table) <- rownames(sig_dat) names(summary_table) <- c(names(sig_dat)[circle_description],"sig_p",names(sig_ratios)[circle_description]) } else { - # summary_table <- cbind(CircCoordinates[rownames(sig_dat),],sig_p,sig_dat$direction) - # colnames(summary_table) <- c(colnames(CircCoordinates),"sig_p","direction") - summary_table <- cbind(CircCoordinates[rownames(sig_dat),],sig_p,sig_ratios) colnames(summary_table) <- c(colnames(CircCoordinates),"sig_p",colnames(sig_ratios)) } @@ -420,85 +416,60 @@ Circ.test <- function(Circ, Linear, CircCoordinates=None, group, alpha=0.05, plo p.adj=p.adj, sig_p=sig_p, ratios=sig_ratios - # direction=direction ) ) } -## MY CODE - -args = commandArgs(trailingOnly = TRUE) - -circ = read.table(args[1], header=T, sep=",") -linear = read.table(args[2], header=T, sep=",") -pheno = read.table(args[3], header=T, sep=",", row.names = "Sample_ID") - - -# No need to enforce any filtering for circTest. -# 'filter.sample' - this has been applied to called circs using the 'tool.filter' param -# 'filter.count' - this has been applied to called circs using bsj_filter param -# 'percentage' - set to extremely low value (do not want to discard circRNAs - let the user inspect themselves). - -# Need to apply the phenotype csv file correctly to circtest. -n_covars <- ncol(pheno) -if( n_covars == 2){ - covariate_1 <- as.factor(pheno[,1]) - covariate_2 <- as.factor(pheno[,2]) -}else{ - covariate_1 <- as.factor(pheno[,1]) -} - -n_reps <- as.numeric(table(covariate_1)[1]) - -Circ_filtered <- Circ.filter(circ = circ, linear = linear, Nreplicates = n_reps, filter.sample = 1, filter.count = 1, percentage = 0.00001, circle_description = c(1:4)) -Linear_filtered <- linear[rownames(Circ_filtered),] - +## MAIN -# groups must be numerically encoded -group = as.numeric(covariate_1) -test <- Circ.test(Circ_filtered, Linear_filtered, group=group, circle_description = c(1:4)) -write.table(test$summary_table, "summary_table.txt", row.names=F) +circs = read.table("${circ_counts}", header=T, sep="\\t") +genes = read.table("${gene_counts}", header=T, sep="\\t") +pheno = read.csv ("${phenotype}" , header=T, row.names = "sample") +circs <- Circ.filter(circ = circs, linear = genes, filter.sample = 2, filter.count = 5, percentage = 0.00001) +genes <- genes[rownames(circs),] -# Apply pheno to output once more.. +description <- c(1) +pheno <- pheno[colnames(circs[,-description]),,drop=FALSE] -if( n_covars == 2 ){ +test <- Circ.test(circs, genes, group=as.numeric(as.factor(pheno\$condition)), circle_description = description) +write.table(test\$summary_table, "${prefix}_summary.txt", row.names=F) - group_indicator1 <- as.character(covariate_1) - group_indicator2 <- as.character(covariate_2) - pdf("circ_linear_ratio_plots.pdf", width = 8, height = 10) - for (i in rownames(test$summary_table)) { - Circ.ratioplot(Circ_filtered, Linear_filtered, plotrow=i, groupindicator1=groupindicator1, groupindicator2 = group_indicator2, - circle_description = c(1:4) ) - } - dev.off() - - pdf("circ_linear_line_plots.pdf", width = 8, height = 10) - for (i in rownames(test$summary_table)) { - Circ.lineplot(Circ_filtered, Linear_filtered, plotrow=i, groupindicator1=group_indicator1, groupindicator2 = group_indicator2, - circle_description = c(1:4) ) - } - dev.off() - -}else{ - - group_indicator1 <- as.character(covariate_1) - - pdf("circ_linear_ratio_plots.pdf", width = 8, height = 10) - for (i in rownames(test$summary_table)) { - Circ.ratioplot(Circ_filtered, Linear_filtered, plotrow=i, groupindicator1=group_indicator1, - lab_legend = colnames(pheno)[1], circle_description = c(1:4) ) - } - dev.off() - - pdf("circ_linear_line_plots.pdf", width = 8, height = 10) - for (i in rownames(test$summary_table)) { - Circ.lineplot(Circ_filtered, Linear_filtered, plotrow=i, groupindicator1=group_indicator1, - circle_description = c(1:4) ) - } - dev.off() +pdf("circ_linear_ratio_plots.pdf", width = 8, height = 10) +for (i in rownames(test\$summary_table)) { + Circ.ratioplot(circs, genes, plotrow=i, groupindicator1=pheno\$condition, + lab_legend = 'condition', circle_description = description ) } +dev.off() -# include variables, makes life easier in case user wishes to report bugs to workflow. -save.image("circ_test.RData") +pdf("circ_linear_line_plots.pdf", width = 8, height = 10) +for (i in rownames(test\$summary_table)) { + Circ.lineplot(circs, genes, plotrow=i, groupindicator1=pheno\$condition, + circle_description = description ) +} +dev.off() + + +################################################ +################################################ +## VERSIONS FILE ## +################################################ +################################################ + +r.version <- strsplit(version[['version.string']], ' ')[[1]][3] + +writeLines( + c( + '"${task.process}":', + paste(' r-base:', r.version), + paste(' aod:', packageVersion('aod')), + paste(' plyr:', packageVersion('plyr')), + paste(' ggplot2:', packageVersion('ggplot2')) + ), +'versions.yml') + +################################################ +################################################ +################################################ +################################################ diff --git a/modules/local/circtest/prepare/environment.yml b/modules/local/circtest/prepare/environment.yml new file mode 100644 index 000000000..02becc7cb --- /dev/null +++ b/modules/local/circtest/prepare/environment.yml @@ -0,0 +1,5 @@ +name: circtest_prepare +channels: + - conda-forge +dependencies: + - conda-forge::r-base=4.2.1 diff --git a/modules/local/circtest/prepare/main.nf b/modules/local/circtest/prepare/main.nf index 3b92e52de..4d3029c12 100644 --- a/modules/local/circtest/prepare/main.nf +++ b/modules/local/circtest/prepare/main.nf @@ -1,32 +1,23 @@ -process PREPARE_CLR_TEST { - label 'process_medium' +process CIRCTEST_PREPARE { + label 'process_low' - conda (params.enable_conda ? "r-base r-aod r-ggplot2 r-plyr" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-c79b00aa4647c739dbe7e8480789d3ba67988f2e:0' : - 'quay.io/biocontainers/mulled-v2-c79b00aa4647c739dbe7e8480789d3ba67988f2e:0' }" + conda "${moduleDir}/environment.yml" + container "biocontainers/r-base:4.2.1" input: - path(gene_matrix) - path(circrna_matrix) - path(circ_host_map) - path(gtf) + tuple val(meta), path(circ_counts) + tuple val(meta2), path(gene_counts) output: - path "circ.csv" , emit: circular - path "linear.csv" , emit: linear - path "versions.yml", emit: versions + tuple val(meta), path('*_circs.tsv'), emit: circ_counts, optional: true + tuple val(meta), path('*_genes.tsv'), emit: gene_counts, optional: true + + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - """ - prepare_circ_test.R - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') - END_VERSIONS - """ + prefix = task.ext.prefix ?: meta.id + template 'prepare.R' } diff --git a/modules/local/circtest/prepare/templates/prepare.R b/modules/local/circtest/prepare/templates/prepare.R new file mode 100644 index 000000000..d5dcdebb0 --- /dev/null +++ b/modules/local/circtest/prepare/templates/prepare.R @@ -0,0 +1,39 @@ +#!/usr/bin/env Rscript + +circ <- read.table("${circ_counts}", header=T, sep="\\t", check.names = FALSE) +gene <- read.table("${gene_counts}", header=T, sep="\\t", check.names = FALSE, row.names = 1) + +gene <- gene[circ\$gene_id, ] + +rownames(circ) <- circ\$tx +rownames(gene) <- rownames(circ) +circ\$tx <- NULL + +if (nrow(circ) != nrow(gene)) { + stop("Number of rows in circ and gene counts do not match") +} + +if (nrow(circ) > 0) { + write.table(circ, "${prefix}_circs.tsv", sep="\\t", quote=F, row.names=T) + write.table(gene, "${prefix}_genes.tsv", sep="\\t", quote=F, row.names=T) +} + +################################################ +################################################ +## VERSIONS FILE ## +################################################ +################################################ + +r.version <- strsplit(version[['version.string']], ' ')[[1]][3] + +writeLines( + c( + '"${task.process}":', + paste(' r-base:', r.version) + ), +'versions.yml') + +################################################ +################################################ +################################################ +################################################ diff --git a/modules/local/circtest/test/main.nf b/modules/local/circtest/test/main.nf deleted file mode 100644 index 5999ddaa9..000000000 --- a/modules/local/circtest/test/main.nf +++ /dev/null @@ -1,33 +0,0 @@ -process CIRCTEST { - label 'process_medium' - - conda (params.enable_conda ? "r-base r-aod r-ggplot2 r-plyr" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-c79b00aa4647c739dbe7e8480789d3ba67988f2e:0' : - 'quay.io/biocontainers/mulled-v2-c79b00aa4647c739dbe7e8480789d3ba67988f2e:0' }" - - input: - path(circ_csv) - path(linear_csv) - path(phenotype) - - output: - path "*" , emit: results - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - """ - circ_test.R $circ_csv $linear_csv $phenotype - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') - aod: \$(Rscript -e "library(aod); cat(as.character(packageVersion('aod')))") - ggplot2: \$(Rscript -e "library(ggplot2); cat(as.character(packageVersion('ggplot2')))") - plyr: \$(Rscript -e "library(plyr); cat(as.character(packageVersion('plyr')))") - END_VERSIONS - """ -} diff --git a/modules/local/ciriquant/filter/main.nf b/modules/local/ciriquant/filter/main.nf deleted file mode 100644 index a1341a94c..000000000 --- a/modules/local/ciriquant/filter/main.nf +++ /dev/null @@ -1,31 +0,0 @@ -process CIRIQUANT_FILTER { - tag "$meta.id" - label 'process_single' - - input: - tuple val(meta), path(gtf) - val bsj_reads - - output: - tuple val(meta), path("${prefix}_ciriquant_circs.bed"), emit: results - tuple val(meta), path("${prefix}_ciriquant.bed") , emit: matrix - - when: - task.ext.when == null || task.ext.when - - script: - prefix = task.ext.prefix ?: "${meta.id}" - """ - grep -v "#" ${prefix}.gtf | awk '{print \$14}' | cut -d '.' -f1 > counts - grep -v "#" ${prefix}.gtf | awk -v OFS="\t" '{print \$1,\$4,\$5,\$7}' > ${prefix}.tmp - paste ${prefix}.tmp counts > ${prefix}_unfilt.bed - - awk '{if(\$5 >= ${bsj_reads}) print \$0}' ${prefix}_unfilt.bed > ${prefix}_filt.bed - grep -v '^\$' ${prefix}_filt.bed > ${prefix}_ciriquant - - awk -v OFS="\t" '{\$2-=1;print}' ${prefix}_ciriquant > ${prefix}_ciriquant.bed - rm ${prefix}.gtf - - awk -v OFS="\t" '{print \$1, \$2, \$3, \$1":"\$2"-"\$3":"\$4, \$5, \$4}' ${prefix}_ciriquant.bed > ${prefix}_ciriquant_circs.bed - """ -} diff --git a/modules/local/ciriquant/ciriquant/main.nf b/modules/local/ciriquant/main.nf similarity index 50% rename from modules/local/ciriquant/ciriquant/main.nf rename to modules/local/ciriquant/main.nf index 8e358aefe..db2289294 100644 --- a/modules/local/ciriquant/ciriquant/main.nf +++ b/modules/local/ciriquant/main.nf @@ -2,14 +2,17 @@ process CIRIQUANT { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::ciriquant=1.1.2" : null) + conda "bioconda::ciriquant=1.1.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ciriquant:1.1.2--pyhdfd78af_2' : - 'quay.io/biocontainers/ciriquant:1.1.2--pyhdfd78af_2' }" + 'biocontainers/ciriquant:1.1.2--pyhdfd78af_2' }" input: tuple val(meta), path(reads) - path yml + tuple val(meta2), path(gtf) + tuple val(meta3), path(fasta) + tuple val(meta4), path(bwa) + tuple val(meta5), path(hisat2) output: tuple val(meta), path("${prefix}/${prefix}.gtf"), emit: gtf @@ -23,20 +26,37 @@ process CIRIQUANT { def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" def VERSION = '2.1.0' + def strandedness = meta.strandedness ?: 'auto' + def library_type = strandedness == 'auto' ? '' : strandedness == 'unstranded' ? '-l 0' : strandedness == 'forward' ? '-l 1' : '-l 2' """ + BWA=`which bwa` + HISAT2=`which hisat2` + STRINGTIE=`which stringtie` + SAMTOOLS=`which samtools` + + BWA_FILE=`ls ${bwa}/*.bwt` + BWA_PREFIX=`basename \$BWA_FILE .bwt` + + HISAT2_FILE=`ls ${hisat2}/*.1.ht2` + HISAT2_PREFIX=`basename \$HISAT2_FILE .1.ht2` + + printf "name: ciriquant\\ntools:\\n bwa: \$BWA\\n hisat2: \$HISAT2\\n stringtie: \$STRINGTIE\\n samtools: \$SAMTOOLS\\n\\nreference:\\n fasta: ${fasta}\\n gtf: ${gtf}\\n bwa_index: ${bwa}/\$BWA_PREFIX\\n hisat_index: ${hisat2}/\$HISAT2_PREFIX" > config.yml + CIRIquant \\ -t ${task.cpus} \\ -1 ${reads[0]} \\ -2 ${reads[1]} \\ - --config $yml \\ + --config config.yml \\ --no-gene \\ -o ${prefix} \\ - -p ${prefix} + -p ${prefix} \\ + ${library_type} \\ + ${args} cat <<-END_VERSIONS > versions.yml "${task.process}": bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') - ciriquant : \$(echo \$(CIRIquant --version 2>&1) | sed 's/CIRIquant //g' ) + ciriquant: \$(echo \$(CIRIquant --version 2>&1) | sed 's/CIRIquant //g' ) samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') stringtie: \$(stringtie --version 2>&1) hisat2: $VERSION diff --git a/modules/local/ciriquant/yml/main.nf b/modules/local/ciriquant/yml/main.nf deleted file mode 100644 index 3489a219a..000000000 --- a/modules/local/ciriquant/yml/main.nf +++ /dev/null @@ -1,37 +0,0 @@ -process CIRIQUANT_YML { - label 'process_single' - - conda (params.enable_conda ? "bioconda::ciriquant=1.1.2" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ciriquant:1.1.2--pyhdfd78af_2' : - 'quay.io/biocontainers/ciriquant:1.1.2--pyhdfd78af_2' }" - - input: - path gtf - path fasta - path bwa - path hisat2 - - output: - path "travis.yml" , emit: yml - - when: - task.ext.when == null || task.ext.when - - script: - bwa_prefix = fasta.toString() == 'genome.fa' ? fasta.toString() : fasta.toString() - ~/.(fa|fasta)$/ - hisat2_prefix = fasta.toString() - ~/.(fa|fasta)$/ - fasta_path = fasta.toRealPath() - gtf_path = gtf.toRealPath() - bwa_path = bwa.toRealPath() - hisat2_path = hisat2.toRealPath() - """ - BWA=`which bwa` - HISAT2=`which hisat2` - STRINGTIE=`which stringtie` - SAMTOOLS=`which samtools` - - touch travis.yml - printf "name: ciriquant\ntools:\n bwa: \$BWA\n hisat2: \$HISAT2\n stringtie: \$STRINGTIE\n samtools: \$SAMTOOLS\n\nreference:\n fasta: ${fasta_path}\n gtf: ${gtf_path}\n bwa_index: ${bwa_path}/${bwa_prefix}\n hisat_index: ${hisat2_path}/${hisat2_prefix}" >> travis.yml - """ -} diff --git a/modules/local/compute_correlations/environment.yml b/modules/local/compute_correlations/environment.yml new file mode 100644 index 000000000..efe728347 --- /dev/null +++ b/modules/local/compute_correlations/environment.yml @@ -0,0 +1,7 @@ +name: "compute_correlations" +channels: + - conda-forge + - defaults + - bioconda +dependencies: + - "bioconda::bioconductor-fishpond=2.8.0--r43hdfd78af_0" diff --git a/modules/local/compute_correlations/main.nf b/modules/local/compute_correlations/main.nf new file mode 100644 index 000000000..4a72b5eb8 --- /dev/null +++ b/modules/local/compute_correlations/main.nf @@ -0,0 +1,29 @@ +process COMPUTE_CORRELATIONS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bioconductor-fishpond:2.8.0--r43hdfd78af_0' : + 'biocontainers/bioconductor-fishpond:2.8.0--r43hdfd78af_0' }" + + input: + tuple val(meta), path(bindingsites) + tuple val(meta2), path(mirna_expression) + tuple val(meta3), path(transcript_rds) + + output: + tuple val(meta), path("*.tsv"), emit: correlations, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + template 'compute_correlations.R' + + stub: + """ + touch ${meta.id}.circrna_correlation.tsv + """ +} diff --git a/modules/local/compute_correlations/templates/compute_correlations.R b/modules/local/compute_correlations/templates/compute_correlations.R new file mode 100644 index 000000000..d9976226d --- /dev/null +++ b/modules/local/compute_correlations/templates/compute_correlations.R @@ -0,0 +1,62 @@ +#!/usr/bin/env Rscript + +library(fishpond) +suppressMessages(library(SummarizedExperiment)) + +tx_expression <- readRDS('${transcript_rds}') +mi_expression <- read.table('${mirna_expression}', header=TRUE, row.names=1, sep='\\t') +interactions <- read.table('${bindingsites}', sep='\\t') + +tx_expression <- scaleInfReps(tx_expression) +tx_expression <- labelKeep(tx_expression) # Here one can perform custom filtering + +if (!any(mcols(tx_expression)\$keep)) { + stop('No transcripts left after filtering') +} + +result_cols <- c('stat', 'log2FC', 'pvalue', 'locfdr', 'qvalue') + +# Iterate rows of interactions +for (i in 1:nrow(interactions)) { + # Get miRNA and target gene + miRNA <- interactions[i, 1] + targets <- unlist(strsplit(interactions[i, 2], ',')) + + mirna_expression <- mi_expression[miRNA,] + transcript_expression <- tx_expression[targets,] + + if (!any(mcols(transcript_expression)\$keep)) { + print(paste('No transcripts left after filtering for miRNA', miRNA)) + next + } + + # Add miRNA expression to colData so that it can be used for correlation + colData(transcript_expression) <- cbind( + colData(transcript_expression), + t(mirna_expression[, rownames(colData(transcript_expression))]) + ) + + result <- rowData(swish(transcript_expression, miRNA, cor = "${params.mirna_correlation}"))[, result_cols] + result <- result[complete.cases(result), ] + write.table(result, paste0(miRNA, '.tsv'), sep = '\\t') +} + +################################################ +################################################ +## VERSIONS FILE ## +################################################ +################################################ + +r.version <- strsplit(version[['version.string']], ' ')[[1]][3] + +writeLines( + c( + '"${task.process}":', + paste(' r-base:', r.version) + ), +'versions.yml') + +################################################ +################################################ +################################################ +################################################ diff --git a/modules/local/count_matrix/combined/main.nf b/modules/local/count_matrix/combined/main.nf deleted file mode 100644 index 1d27dfd22..000000000 --- a/modules/local/count_matrix/combined/main.nf +++ /dev/null @@ -1,36 +0,0 @@ -process COUNTS_COMBINED { - label 'process_low' - - conda (params.enable_conda ? "r-base=3.6.3 python=2.7.15 r-argparser=0.6 r-dplyr=1.0.5" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-5fbffedf7f529cf3c5093b976deb4290f5e1267a:3456f1432b1c9dad42815275abe2d6cb6f26fd94-0' : - 'quay.io/biocontainers/mulled-v2-5fbffedf7f529cf3c5093b976deb4290f5e1267a:3456f1432b1c9dad42815275abe2d6cb6f26fd94-0' }" - - input: - path(bed) - - output: - path("circRNA_matrix.txt"), emit: dea_matrix - path("count_matrix.txt") , emit: clr_matrix - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - python ${workflow.projectDir}/bin/circRNA_counts_matrix.py > matrix.txt - ## handle non-canon chromosomes here (https://stackoverflow.com/questions/71479919/joining-columns-based-on-number-of-fields) - n_samps=\$(ls *.bed | wc -l) - canon=\$(awk -v a="\$n_samps" 'BEGIN {print a + 4}') - awk -v n="\$canon" '{ for (i = 2; i <= NF - n + 1; ++i) { \$1 = \$1"-"\$i; \$i=""; } } 1' matrix.txt | awk -v OFS="\t" '\$1=\$1' > circRNA_matrix.txt - Rscript ${workflow.projectDir}/bin/reformat_count_matrix.R - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') - argparser: \$(Rscript -e "library(arparser); cat(as.character(packageVersion('argparser')))") - dplyr: \$(Rscript -e "library(dplyr); cat(as.character(packageVersion('dplyr')))") - END_VERSIONS - """ -} diff --git a/modules/local/count_matrix/merge_tools/main.nf b/modules/local/count_matrix/merge_tools/main.nf deleted file mode 100644 index 4bbe71920..000000000 --- a/modules/local/count_matrix/merge_tools/main.nf +++ /dev/null @@ -1,43 +0,0 @@ -process MERGE_TOOLS { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "r-base=3.6.3 python=2.7.15 r-argparser=0.6 r-dplyr=1.0.5" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-5fbffedf7f529cf3c5093b976deb4290f5e1267a:3456f1432b1c9dad42815275abe2d6cb6f26fd94-0' : - 'quay.io/biocontainers/mulled-v2-5fbffedf7f529cf3c5093b976deb4290f5e1267a:3456f1432b1c9dad42815275abe2d6cb6f26fd94-0' }" - - input: - tuple val(meta), path(bed) - val(tool_filter) - val(duplicates_fun) - - output: - tuple val(meta), path("${prefix}.bed"), emit: merged - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - """ - ## make list of files for R to read - ls *.bed > samples.csv - - ## Add catch for empty bed file and delete - bash ${workflow.projectDir}/bin/check_empty.sh - - ## Use intersection of "n" (params.tool_filter) circRNAs called by tools - ## remove duplicate IDs, keep highest count. - Rscript ${workflow.projectDir}/bin/consolidate_algorithms_intersection.R samples.csv $tool_filter $duplicates_fun - mv combined_counts.bed ${prefix}.bed - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') - argparser: \$(Rscript -e "library(arparser); cat(as.character(packageVersion('argparser')))") - dplyr: \$(Rscript -e "library(dplyr); cat(as.character(packageVersion('dplyr')))") - END_VERSIONS - """ -} diff --git a/modules/local/count_matrix/single/main.nf b/modules/local/count_matrix/single/main.nf deleted file mode 100644 index bf6976db4..000000000 --- a/modules/local/count_matrix/single/main.nf +++ /dev/null @@ -1,45 +0,0 @@ -process COUNTS_SINGLE { - tag "${meta.tool}" - label 'process_low' - - conda (params.enable_conda ? "r-base=3.6.3 python=2.7.15 r-argparser=0.6 r-dplyr=1.0.5" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-5fbffedf7f529cf3c5093b976deb4290f5e1267a:3456f1432b1c9dad42815275abe2d6cb6f26fd94-0' : - 'quay.io/biocontainers/mulled-v2-5fbffedf7f529cf3c5093b976deb4290f5e1267a:3456f1432b1c9dad42815275abe2d6cb6f26fd94-0' }" - - input: - tuple val(meta), path(bed) - - output: - path("circRNA_matrix.txt"), emit: dea_matrix - path("count_matrix.txt") , emit: clr_matrix - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def tool_name = "${meta.tool}" - """ - # Strip tool name from BED files (no consolidation prior to this step for 1 tool) - for b in *.bed; do - basename=\${b%".bed"}; - sample_name=\${basename%"_${tool_name}"}; - mv \$b \${sample_name}.bed - done - - python ${workflow.projectDir}/bin/circRNA_counts_matrix.py > matrix.txt - ## handle non-canon chromosomes here (https://stackoverflow.com/questions/71479919/joining-columns-based-on-number-of-fields) - n_samps=\$(ls *.bed | wc -l) - canon=\$(awk -v a="\$n_samps" 'BEGIN {print a + 4}') - awk -v n="\$canon" '{ for (i = 2; i <= NF - n + 1; ++i) { \$1 = \$1"-"\$i; \$i=""; } } 1' matrix.txt | awk -v OFS="\t" '\$1=\$1' > circRNA_matrix.txt - Rscript ${workflow.projectDir}/bin/reformat_count_matrix.R - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') - argparser: \$(Rscript -e "library(arparser); cat(as.character(packageVersion('argparser')))") - dplyr: \$(Rscript -e "library(dplyr); cat(as.character(packageVersion('dplyr')))") - END_VERSIONS - """ -} diff --git a/modules/local/dcc/filter/main.nf b/modules/local/dcc/filter/main.nf deleted file mode 100644 index b9bcca466..000000000 --- a/modules/local/dcc/filter/main.nf +++ /dev/null @@ -1,23 +0,0 @@ -process DCC_FILTER { - tag "$meta.id" - label 'process_single' - - input: - tuple val(meta), path(txt) - val bsj_reads - - output: - tuple val(meta), path("${prefix}_dcc_circs.bed"), emit: results - tuple val(meta), path("${prefix}_dcc.bed") , emit: matrix - - when: - task.ext.when == null || task.ext.when - - script: - prefix = task.ext.prefix ?: "${meta.id}" - """ - awk '{if(\$5 >= ${bsj_reads}) print \$0}' ${prefix}.txt > ${prefix}_dcc.filtered - awk -v OFS="\t" '{\$2-=1;print}' ${prefix}_dcc.filtered > ${prefix}_dcc.bed - awk -v OFS="\t" '{print \$1, \$2, \$3, \$1":"\$2"-"\$3":"\$4, \$5, \$4}' ${prefix}_dcc.bed > ${prefix}_dcc_circs.bed - """ -} diff --git a/modules/local/dcc/dcc/main.nf b/modules/local/dcc/main.nf similarity index 73% rename from modules/local/dcc/dcc/main.nf rename to modules/local/dcc/main.nf index 60cb8b71a..429d7dc59 100644 --- a/modules/local/dcc/dcc/main.nf +++ b/modules/local/dcc/main.nf @@ -2,10 +2,10 @@ process DCC { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::circtools=1.2.1" : null) + conda "bioconda::circtools=1.2.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/circtools:1.2.1--pyh7cba7a3_0' : - 'quay.io/biocontainers/circtools:1.2.1--pyh7cba7a3_0' }" + 'biocontainers/circtools:1.2.1--pyh7cba7a3_0' }" input: tuple val(meta), path(pairs), path(mate1), path(mate2) @@ -22,16 +22,18 @@ process DCC { script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" + def strandedness = meta.strandedness ?: 'auto' + def strand_args = strandedness == 'auto' || strandedness == 'unstranded' ? '-N' : strandedness == 'forward' ? '' : '-ss' if(meta.single_end){ """ sed -i 's/^chr//g' $gtf mkdir ${prefix} && mv ${prefix}.Chimeric.out.junction ${prefix} && printf "${prefix}/${prefix}.Chimeric.out.junction" > samplesheet - DCC @samplesheet -D -an $gtf -Pi -ss -F -M -Nr 1 1 -fg -A $fasta -N -T ${task.cpus} + DCC @samplesheet -D -an $gtf -F -M -Nr 1 1 -A $fasta $strand_args -T ${task.cpus} awk '{print \$6}' CircCoordinates >> strand - paste CircRNACount strand | tail -n +2 | awk -v OFS="\t" '{print \$1,\$2,\$3,\$5,\$4}' >> ${prefix}.txt + paste CircRNACount strand | tail -n +2 | awk -v OFS="\\t" '{print \$1,\$2,\$3,\$5,\$4}' >> ${prefix}.txt cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -46,10 +48,10 @@ process DCC { mkdir ${prefix}_mate1 && mv ${prefix}_mate1.Chimeric.out.junction ${prefix}_mate1 && printf "${prefix}_mate1/${prefix}_mate1.Chimeric.out.junction" > mate1file mkdir ${prefix}_mate2 && mv ${prefix}_mate2.Chimeric.out.junction ${prefix}_mate2 && printf "${prefix}_mate2/${prefix}_mate2.Chimeric.out.junction" > mate2file - DCC @samplesheet -mt1 @mate1file -mt2 @mate2file -D -an $gtf -Pi -ss -F -M -Nr 1 1 -fg -A $fasta -N -T ${task.cpus} + DCC @samplesheet -mt1 @mate1file -mt2 @mate2file -D -an $gtf -Pi -F -M -Nr 1 1 -A $fasta $strand_args -T ${task.cpus} awk '{print \$6}' CircCoordinates >> strand - paste CircRNACount strand | tail -n +2 | awk -v OFS="\t" '{print \$1,\$2,\$3,\$5,\$4}' >> ${prefix}.txt + paste CircRNACount strand | tail -n +2 | awk -v OFS="\\t" '{print \$1,\$2,\$3,\$5,\$4}' >> ${prefix}.txt cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/deseq2/differential_expression/main.nf b/modules/local/deseq2/differential_expression/main.nf deleted file mode 100644 index 83bf6bca6..000000000 --- a/modules/local/deseq2/differential_expression/main.nf +++ /dev/null @@ -1,55 +0,0 @@ -process DESEQ2_DIFFERENTIAL_EXPRESSION { - label 'process_medium' - - conda (params.enable_conda ? "r-base=3.6.3 conda-forge::r-argparser=0.6 conda-forge::r-dplyr=1.0.5 conda-forge::r-ggplot2=3.3.3 r-ggpubr=0.4.0 conda-forge::r-gplots=3.1.1 conda-forge::r-pheatmap=1.0.12 r-plyr=1.8.6 r-pvclust=2.2_0 r-rcolorbrewer=1.1_2 conda-forge::r-circlize=0.4.12 bioconductor-biomart=2.42.0 bioconductor-complexheatmap=2.2.0 bioconductor-deseq2=1.26.0 bioconductor-enhancedvolcano=1.4.0 bioconductor-ihw=1.14.0 bioconductor-org.hs.eg.db=3.10.0 bioconductor-pcatools=1.2.0 bioconductor-tximport=1.14.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-04b2ef814c9c6ab8c196c3e372521b88160dc260:e0cb4046baee3fd35fdbf883ba8af34e3e8af2e8-0' : - 'quay.io/biocontainers/mulled-v2-04b2ef814c9c6ab8c196c3e372521b88160dc260:e0cb4046baee3fd35fdbf883ba8af34e3e8af2e8-0' }" - - input: - path(gene_matrix) - path(phenotype) - path(circrna_matrix) - val(species) - path(biomart_keys) - - output: - path "circRNA" , emit: circular_results - path "RNA-Seq" , emit: linear_results - path "DESeq2_QC" , emit: deseq2_qc - path "*.RData" , emit: rsession - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - """ - ## prepDE && circRNA counts headers are sorted such that uppercase preceedes lowercase i.e Z before a - ## reformat the phenotype file to match the order of the samples. - head -n 1 $phenotype > header - tail -n +2 $phenotype | LC_COLLATE=C sort > sorted_pheno - cat header sorted_pheno > tmp && rm phenotype.csv && mv tmp phenotype.csv - - DEA.R $gene_matrix $phenotype $circrna_matrix $species ensembl_database_map.txt - mv boxplots/ circRNA/ - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') - argparser: \$(Rscript -e "library(argparser); cat(as.character(packageVersion('argparser')))") - biomart: \$(Rscript -e "library(biomaRt); cat(as.character(packageVersion('biomaRt')))") - deseq2: \$(Rscript -e "library(DESeq2); cat(as.character(packageVersion('DESeq2')))") - dplyr: \$(Rscript -e "library(dplyr); cat(as.character(packageVersion('dplyr')))") - enhancedvolcano: \$(Rscript -e "library(EnhancedVolcano); cat(as.character(packageVersion('EnhancedVolcano')))") - gplots: \$(Rscript -e "library(gplots); cat(as.character(packageVersion('gplots')))") - ggplot2: \$(Rscript -e "library(ggplot2); cat(as.character(packageVersion('ggplot2')))") - ggpubr: \$(Rscript -e "library(ggpubr); cat(as.character(packageVersion('ggpubr')))") - ihw: \$(Rscript -e "library(IHW); cat(as.character(packageVersion('IHW')))") - pvclust: \$(Rscript -e "library(pvclust); cat(as.character(packageVersion('pvclust')))") - pcatools: \$(Rscript -e "library(PCAtools); cat(as.character(packageVersion('PCAtools')))") - pheatmap: \$(Rscript -e "library(pheatmap); cat(as.character(packageVersion('pheatmap')))") - rcolorbrewer: \$(Rscript -e "library(RColorBrewer); cat(as.character(packageVersion('RColorBrewer')))") - END_VERSIONS - """ -} diff --git a/modules/local/deseq2/normalization/environment.yml b/modules/local/deseq2/normalization/environment.yml new file mode 100644 index 000000000..8eb117c31 --- /dev/null +++ b/modules/local/deseq2/normalization/environment.yml @@ -0,0 +1,7 @@ +name: deseq2_normalization +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bioconductor-deseq2=1.34.0 diff --git a/modules/local/deseq2/normalization/main.nf b/modules/local/deseq2/normalization/main.nf new file mode 100644 index 000000000..74cb8b5a3 --- /dev/null +++ b/modules/local/deseq2/normalization/main.nf @@ -0,0 +1,32 @@ +process DESEQ2_NORMALIZATION { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bioconductor-deseq2:1.34.0--r41hc247a5b_3' : + 'biocontainers/bioconductor-deseq2:1.34.0--r41hc247a5b_3' }" + + input: + tuple val(meta), path(counts) + + output: + tuple val(meta), path("${meta.id}.normalized_counts.tsv"), emit: normalized + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + template 'deseq_normalization.R' + + stub: + """ + touch ${meta.id}.normalized_counts.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bioconductor-deseq2: \$(Rscript -e "library(DESeq2); cat(as.character(packageVersion('DESeq2')))") + END_VERSIONS + """ +} diff --git a/modules/local/deseq2/normalization/templates/deseq_normalization.R b/modules/local/deseq2/normalization/templates/deseq_normalization.R new file mode 100644 index 000000000..91b366dc5 --- /dev/null +++ b/modules/local/deseq2/normalization/templates/deseq_normalization.R @@ -0,0 +1,50 @@ +#!/usr/bin/env Rscript + +library(DESeq2) + +raw_counts <- read.table("$counts", sep = "\\t", header = TRUE, stringsAsFactors = FALSE, check.names = FALSE) +samples <- colnames(raw_counts)[-c(1)] + +row.names(raw_counts) <- raw_counts\$miRNA +data <- raw_counts[, -1] +mirna_names <- data.frame(miRNA = raw_counts\$miRNA, order = seq_len(nrow(raw_counts))) + +# normalize using DeSeq2, Library Size Estimation +meta_data <- data.frame(samples) +row.names(meta_data) <- meta_data\$samples +all(colnames(data) %in% rownames(meta_data)) +all(colnames(data) == rownames(meta_data)) + +dds <- DESeqDataSetFromMatrix(countData = data, colData = meta_data, design = ~ 1) +dds <- estimateSizeFactors(dds) +sizeFactors(dds) +normalized_counts <- DESeq2::counts(dds, normalized = TRUE) + +# add miRNA IDs back to counts table +merged_data <- merge(mirna_names, normalized_counts, + by.x = "miRNA", by.y = "row.names") +merged_data <- merged_data[order(merged_data\$order), ] + +norm_data <- subset(merged_data, select = -c(order)) + +write.table(norm_data, paste0("${meta.id}.normalized_counts.tsv"), quote = FALSE, sep = "\\t", row.names = FALSE) + +# TODO: (Can be done later) Add support for Samplesheet so that we can eliminate batch effects + + +################################################ +################################################ +## VERSIONS FILE ## +################################################ +################################################ + +r.version <- strsplit(version[['version.string']], ' ')[[1]][3] +deseq2.version <- as.character(packageVersion('DESeq2')) + +writeLines( + c( + '"${task.process}":', + paste(' r-base:', r.version), + paste(' bioconductor-deseq2:', deseq2.version) + ), +'versions.yml') diff --git a/modules/local/fail_on_empty/main.nf b/modules/local/fail_on_empty/main.nf new file mode 100644 index 000000000..aa97b9e83 --- /dev/null +++ b/modules/local/fail_on_empty/main.nf @@ -0,0 +1,19 @@ +process FAIL_ON_EMPTY { + tag "$meta.id" + + input: + tuple val(meta), path(bed) + path(waitFor, stageAs: 'waitFor*.txt') + + exec: + if (!bed) { + log.error ((params.tool_filter <= 1 ? + "No circular RNAs were found by any tool in any sample.\n" : + "No circular RNAs were found by at least ${params.tool_filter} tools in any sample.\n") + + "Feel free to check the preliminary results in '${params.outdir}'\n" + + (params.save_intermediates ? "" : + "You can enable saving intermediate files by setting the parameter 'save_intermediates' to 'true'.")) + + exit 1 + } +} diff --git a/modules/local/fasta/main.nf b/modules/local/fasta/main.nf deleted file mode 100644 index 58c3f5061..000000000 --- a/modules/local/fasta/main.nf +++ /dev/null @@ -1,45 +0,0 @@ -process FASTA { - tag "${meta.id}:${meta.tool}" - label 'process_single' - - conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--h7d7f7ad_2': - 'quay.io/biocontainers/bedtools:2.30.0--h7d7f7ad_2' }" - - input: - tuple val(meta), path(bed) - path fasta - - output: - tuple val(meta), path("${prefix}.fa"), emit: analysis_fasta - path("${prefix}.fasta") , emit: publish_fasta - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = '377' - """ - ## FASTA sequences (bedtools does not like the extra annotation info - split will not work properly) - cut -d\$'\t' -f1-12 ${prefix}.bed > bed12.tmp - bedtools getfasta -fi $fasta -bed bed12.tmp -s -split -name > circ_seq.tmp - - ## clean fasta header - grep -A 1 '>' circ_seq.tmp | cut -d: -f1,2,3 > ${prefix}.fa && rm circ_seq.tmp - - ## add backsplice sequence for miRanda Targetscan, publish canonical FASTA to results. - rm $fasta - bash ${workflow.projectDir}/bin/backsplice_gen.sh ${prefix}.fa - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") - END_VERSIONS - """ -} - - diff --git a/modules/local/find_circ/anchors/main.nf b/modules/local/find_circ/anchors/main.nf index d357c45eb..9bccb403b 100644 --- a/modules/local/find_circ/anchors/main.nf +++ b/modules/local/find_circ/anchors/main.nf @@ -2,10 +2,10 @@ process FIND_CIRC_ANCHORS { tag "$meta.id" label "process_high" - conda (params.enable_conda ? "find_circ=1.2" : null) + conda "bioconda::find_circ=1.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/find_circ%3A1.2--hdfd78af_0' : - 'quay.io/biocontainers/find_circ:1.2--hdfd78af_0' }" + 'biocontainers/find_circ:1.2--hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/local/find_circ/filter/main.nf b/modules/local/find_circ/filter/main.nf deleted file mode 100644 index c22208844..000000000 --- a/modules/local/find_circ/filter/main.nf +++ /dev/null @@ -1,42 +0,0 @@ -process FIND_CIRC_FILTER { - tag "$meta.id" - label "process_low" - - conda (params.enable_conda ? "find_circ=1.2" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/find_circ%3A1.2--hdfd78af_0' : - 'quay.io/biocontainers/find_circ:1.2--hdfd78af_0' }" - - input: - tuple val(meta), path(bed) - val bsj_reads - - output: - tuple val(meta), path("${prefix}_find_circ_circs.bed"), emit: results - tuple val(meta), path("${prefix}_find_circ.bed") , emit: matrix - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = '1.2' - """ - grep CIRCULAR $bed | \ - grep -v chrM | \ - awk '\$5>=${bsj_reads}' | \ - grep UNAMBIGUOUS_BP | grep ANCHOR_UNIQUE | \ - maxlength.py 100000 \ - > ${prefix}.txt - - tail -n +2 ${prefix}.txt | awk -v OFS="\t" '{print \$1,\$2,\$3,\$6,\$5}' > ${prefix}_find_circ.bed - - awk -v OFS="\t" '{print \$1, \$2, \$3, \$1":"\$2"-"\$3":"\$4, \$5, \$4}' ${prefix}_find_circ.bed > ${prefix}_find_circ_circs.bed - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - find_circ: $VERSION - END_VERSIONS - """ -} diff --git a/modules/local/find_circ/find_circ/main.nf b/modules/local/find_circ/find_circ/main.nf index 970c87e50..8a4ca128c 100644 --- a/modules/local/find_circ/find_circ/main.nf +++ b/modules/local/find_circ/find_circ/main.nf @@ -2,10 +2,10 @@ process FIND_CIRC { tag "$meta.id" label "process_high" - conda (params.enable_conda ? "find_circ=1.2 bowtie2" : null) + conda "bioconda::find_circ=1.2 bioconda::bowtie2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-c27e472038a09e49d9147bc52903e12836302c12:60ffb3b15a2c40c669f8d38382b1e6e4b065f5e4-0' : - 'quay.io/biocontainers/mulled-v2-c27e472038a09e49d9147bc52903e12836302c12:60ffb3b15a2c40c669f8d38382b1e6e4b065f5e4-0' }" + 'biocontainers/mulled-v2-c27e472038a09e49d9147bc52903e12836302c12:60ffb3b15a2c40c669f8d38382b1e6e4b065f5e4-0' }" input: tuple val(meta), path(anchors) @@ -23,6 +23,9 @@ process FIND_CIRC { script: prefix = task.ext.prefix ?: "${meta.id}" + args = task.ext.args ?: "" + args2 = task.ext.args2 ?: "" + def strand_arg = meta.strandedness && (meta.strandedness == 'forward' || meta.strandedness == 'reverse') ? "--stranded" : "" def VERSION = '1.2' """ INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/.rev.1.bt2//"` @@ -37,8 +40,9 @@ process FIND_CIRC { --score-min=C,-15,0 \\ -q \\ -x \$INDEX \\ + $args \\ -U $anchors | \\ - find_circ.py --genome=$fasta --prefix=${prefix} --stats=${prefix}.sites.log --reads=${prefix}.sites.reads > ${prefix}.sites.bed + find_circ.py --genome=$fasta $strand_arg $args2 --prefix=${prefix} --stats=${prefix}.sites.log --reads=${prefix}.sites.reads > ${prefix}.sites.bed cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/majority_vote/main.nf b/modules/local/majority_vote/main.nf new file mode 100644 index 000000000..5161ca6d4 --- /dev/null +++ b/modules/local/majority_vote/main.nf @@ -0,0 +1,35 @@ +process MAJORITY_VOTE { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::pandas=1.5.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pandas:1.5.2' : + 'biocontainers/pandas:1.5.2' }" + + input: + tuple val(meta), path(bindingsites) + + + output: + tuple val(meta), path("${meta.id}.majority.tsv"), emit: tsv + tuple val(meta), path("${meta.id}.targets.tsv") , emit: targets + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + template 'majority.py' + + stub: + """ + touch ${meta.id}.majority.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + pandas: \$(python -c "import pandas; print(pandas.__version__)") + END_VERSIONS + """ +} diff --git a/modules/local/majority_vote/templates/majority.py b/modules/local/majority_vote/templates/majority.py new file mode 100644 index 000000000..70c5dd91b --- /dev/null +++ b/modules/local/majority_vote/templates/majority.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python3 +import pandas as pd +import platform + + +def format_yaml_like(data: dict, indent: int = 0) -> str: + """Formats a dictionary to a YAML-like string. + + Args: + data (dict): The dictionary to format. + indent (int): The current indentation level. + + Returns: + str: A string formatted as YAML. + """ + yaml_str = "" + for key, value in data.items(): + spaces = " " * indent + if isinstance(value, dict): + yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}" + else: + yaml_str += f"{spaces}{key}: {value}\\n" + return yaml_str + + +df = pd.read_csv("$bindingsites", + sep="\\t", header=0, names=['mirna', 'target', 'start', 'end', 'tool' ]) +df = df.groupby(['mirna', 'target'])['tool'].apply(set).reset_index() + +# performing majority vote keeping only mirna binding sites that meet the required number of votes +min_tools = int("${params.mirna_tool_filter}") +df = df[df['tool'].apply(len) >= min_tools].copy() +df = df.drop('tool', axis=1) + +df.to_csv('${meta.id}.majority.tsv', sep='\\t', index=False) + +df = df.groupby('mirna')['target'].apply(lambda x: ','.join(x)).reset_index() +df.to_csv('${meta.id}.targets.tsv', sep='\\t', index=False, header=False) + +# Create version file +versions = { + "${task.process}" : { + "python": platform.python_version(), + "pandas": pd.__version__, + } +} + +with open("versions.yml", "w") as f: + f.write(format_yaml_like(versions)) diff --git a/modules/local/mapsplice/align/main.nf b/modules/local/mapsplice/align/main.nf index 4aa91035f..a90930d6e 100644 --- a/modules/local/mapsplice/align/main.nf +++ b/modules/local/mapsplice/align/main.nf @@ -2,15 +2,15 @@ process MAPSPLICE_ALIGN { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::mapsplice=2.2.1" : null) + conda "bioconda::mapsplice=2.2.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mapsplice:2.2.1--py27h07887db_0': - 'quay.io/biocontainers/mapsplice:2.2.1--py27h07887db_0' }" + 'biocontainers/mapsplice:2.2.1--py27h07887db_0' }" input: tuple val(meta), path(reads) path bowtie_index - path chromosomes + tuple val(meta2), path(chromosomes, stageAs: 'chromosomes/*') path gtf output: @@ -26,13 +26,13 @@ process MAPSPLICE_ALIGN { def VERSION = 'v2.2.1' def gtf_prefix = gtf.toString() - ~/.gtf/ if(meta.single_end){ - def handleGzip_R1 = reads[0].toString().endsWith('.gz') ? "gzip -d -f ${reads[0]}" : '' - def read1 = reads[0].toString().endsWith('.gz') ? reads[0].toString() - ~/.gz/ : reads[0] + def handleGzip_R1 = reads[0].getExtension() == 'gz' ? "gzip -d -f ${reads[0]}" : '' + def read1 = reads[0].getExtension() == 'gz' ? reads[0].toString() - ~/.gz/ : reads[0] """ $handleGzip_R1 mapsplice.py \\ - -c $chromosomes \\ + -c chromosomes \\ -x $gtf_prefix \\ -1 ${read1} \\ -p ${task.cpus} \\ @@ -46,17 +46,17 @@ process MAPSPLICE_ALIGN { mapsplice: $VERSION END_VERSIONS """ - }else{ - def handleGzip_R1 = reads[0].toString().endsWith('.gz') ? "gzip -d -f ${reads[0]}" : '' - def handleGzip_R2 = reads[1].toString().endsWith('.gz') ? "gzip -d -f ${reads[1]}" : '' - def read1 = reads[0].toString().endsWith('.gz') ? reads[0].toString() - ~/.gz/ : reads[0] - def read2 = reads[1].toString().endsWith('.gz') ? reads[1].toString() - ~/.gz/ : reads[1] + } else { + def handleGzip_R1 = reads[0].getExtension() == 'gz' ? "gzip -d -f ${reads[0]}" : '' + def handleGzip_R2 = reads[1].getExtension() == 'gz' ? "gzip -d -f ${reads[1]}" : '' + def read1 = reads[0].getExtension() == 'gz' ? reads[0].toString() - ~/.gz/ : reads[0] + def read2 = reads[1].getExtension() == 'gz' ? reads[1].toString() - ~/.gz/ : reads[1] """ $handleGzip_R1 $handleGzip_R2 mapsplice.py \\ - -c $chromosomes \\ + -c chromosomes \\ -x $gtf_prefix \\ -1 ${read1} \\ -2 ${read2} \\ diff --git a/modules/local/mirna_filtering/main.nf b/modules/local/mirna_filtering/main.nf new file mode 100644 index 000000000..03efeea48 --- /dev/null +++ b/modules/local/mirna_filtering/main.nf @@ -0,0 +1,24 @@ +process MIRNA_FILTERING { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/r-base:4.2.1' : + 'biocontainers/r-base:4.2.1' }" + + input: + tuple val(meta), path(normalized_counts) + val(mirna_min_sample_percentage) + val(mirna_min_reads) + + output: + tuple val(meta), path("${meta.id}.normalized_counts_filtered.tsv"), emit: filtered + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + template 'mirna_filtering.R' +} diff --git a/modules/local/mirna_filtering/templates/mirna_filtering.R b/modules/local/mirna_filtering/templates/mirna_filtering.R new file mode 100644 index 000000000..588eb2601 --- /dev/null +++ b/modules/local/mirna_filtering/templates/mirna_filtering.R @@ -0,0 +1,51 @@ +#!/usr/bin/env Rscript + +expression_norm <- read.table("$normalized_counts", + sep = "\\t", + header = TRUE, + stringsAsFactors = FALSE, + check.names = FALSE +) + +samples <- colnames(expression_norm)[-c(1)] + +# filter data: counts > 5 in at least 20% of samples +if (length(samples) < 5) { + stop("Cannot perform filtering on less than 5 samples") +} + +sample_nr_cutoff <- ceiling($mirna_min_sample_percentage * length(samples)) +rows_to_keep <- c() + +for (i in seq_len(nrow(expression_norm))) { + mirna_per_sample <- 0 + for (j in 5:ncol(expression_norm)) { + if (expression_norm[i, j] >= $mirna_min_reads) { + mirna_per_sample <- mirna_per_sample + 1 + } + } + if (mirna_per_sample >= sample_nr_cutoff) { + rows_to_keep <- append(rows_to_keep, i) + } +} + +filtered_data <- expression_norm[rows_to_keep, ] + +write.table(filtered_data, paste0("${meta.id}.normalized_counts_filtered.tsv"), + quote = FALSE, sep = "\\t", + row.names = FALSE) + +################################################ +################################################ +## VERSIONS FILE ## +################################################ +################################################ + +r.version <- strsplit(version[['version.string']], ' ')[[1]][3] + +writeLines( + c( + '"${task.process}":', + paste(' r-base:', r.version) + ), +'versions.yml') diff --git a/modules/local/mirna_targets/main.nf b/modules/local/mirna_targets/main.nf index 7a9c0666c..e525a9f5e 100644 --- a/modules/local/mirna_targets/main.nf +++ b/modules/local/mirna_targets/main.nf @@ -2,17 +2,17 @@ process MIRNA_TARGETS { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null) + conda "bioconda::bedtools=2.30.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--h7d7f7ad_2': - 'quay.io/biocontainers/bedtools:2.30.0--h7d7f7ad_2' }" + 'biocontainers/bedtools:2.30.0--h7d7f7ad_2' }" input: - tuple val(meta), path(targetscan), path(miranda), path(bed12) + tuple val(meta), path(targetscan), path(miranda) output: tuple val(meta), path("${prefix}.mirna_targets.txt"), emit: results - path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -22,8 +22,8 @@ process MIRNA_TARGETS { prefix = task.ext.prefix ?: "${meta.id}" """ ## reformat and sort miRanda, TargetScan outputs, convert to BED for overlaps. - tail -n +2 $targetscan | sort -k1,1 -k4n | awk -v OFS="\t" '{print \$1, \$2, \$4, \$5, \$9}' | awk -v OFS="\t" '{print \$2, \$3, \$4, \$1, "0", \$5}' > targetscan.bed - tail -n +2 $miranda | sort -k2,2 -k7n | awk -v OFS="\t" '{print \$2, \$1, \$3, \$4, \$7, \$8}' | awk -v OFS="\t" '{print \$2, \$5, \$6, \$1, \$3, \$4}' | sed 's/^[^-]*-//g' > miranda.bed + tail -n +2 $targetscan | sort -k1,1 -k4n | awk -v OFS="\\t" '{print \$1, \$2, \$4, \$5, \$9}' | awk -v OFS="\\t" '{print \$2, \$3, \$4, \$1, "0", \$5}' > targetscan.bed + tail -n +2 $miranda | sort -k2,2 -k7n | awk -v OFS="\\t" '{print \$2, \$1, \$3, \$4, \$7, \$8}' | awk -v OFS="\\t" '{print \$2, \$5, \$6, \$1, \$3, \$4}' > miranda.bed ## intersect, consolidate miRanda, TargetScan information about miRs. ## -wa to output miRanda hits - targetscan makes it difficult to resolve duplicate miRNAs at MRE sites. @@ -32,11 +32,12 @@ process MIRNA_TARGETS { ## remove duplicate miRNA entries at MRE sites. ## strategy: sory by circs, sort by start position, sort by site type - the goal is to take the best site type (i.e rank site type found at MRE site). - paste ${prefix}.mirnas.tmp mirna_type | sort -k3,3 -k2n -k7r | awk -v OFS="\t" '{print \$4,\$1,\$2,\$3,\$5,\$6,\$7}' | awk -F "\t" '{if (!seen[\$1,\$2,\$3,\$4,\$5,\$6]++)print}' | sort -k1,1 -k3n > ${prefix}.mirna_targets.tmp - echo -e "circRNA\tmiRNA\tStart\tEnd\tScore\tEnergy_KcalMol\tSite_type" | cat - ${prefix}.mirna_targets.tmp > ${prefix}.mirna_targets.txt + paste ${prefix}.mirnas.tmp mirna_type | sort -k3n -k2n -k7r | awk -v OFS="\\t" '{print \$4,\$1,\$2,\$3,\$5,\$6,\$7}' | awk -F "\\t" '{if (!seen[\$1,\$2,\$3,\$4,\$5,\$6]++)print}' | sort -k1,1 -k3n > ${prefix}.mirna_targets.tmp + echo -e "circRNA\\tmiRNA\\tStart\\tEnd\\tScore\\tEnergy_KcalMol\\tSite_type" | cat - ${prefix}.mirna_targets.tmp > ${prefix}.mirna_targets.txt cat <<-END_VERSIONS > versions.yml "${task.process}": + awk: \$(awk --version | head -n1 | cut -d' ' -f3 | sed 's/,//g' ) bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") END_VERSIONS """ diff --git a/modules/local/psirc/index/environment.yml b/modules/local/psirc/index/environment.yml new file mode 100644 index 000000000..e0303603c --- /dev/null +++ b/modules/local/psirc/index/environment.yml @@ -0,0 +1,6 @@ +name: psirc_index +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::psirc=1.0.0 diff --git a/modules/local/psirc/index/main.nf b/modules/local/psirc/index/main.nf new file mode 100644 index 000000000..c0cae5967 --- /dev/null +++ b/modules/local/psirc/index/main.nf @@ -0,0 +1,26 @@ +process PSIRC_INDEX { + tag "${meta.id}" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/psirc:1.0.0--he1fd2f9_0' : + 'biocontainers/psirc:1.0.0--he1fd2f9_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("psirc.index"), emit: index + path "versions.yml", emit: versions + + script: + """ + psirc-quant index -i psirc.index --make-unique $fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + psirc-quant: \$(psirc-quant version | sed -n 's/^psirc-quant, version \\([0-9.]*\\).*\$/\\1/p') + END_VERSIONS + """ +} diff --git a/modules/local/psirc/quant/environment.yml b/modules/local/psirc/quant/environment.yml new file mode 100644 index 000000000..222b4e893 --- /dev/null +++ b/modules/local/psirc/quant/environment.yml @@ -0,0 +1,6 @@ +name: psirc_quant +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::psirc=1.0.0 diff --git a/modules/local/psirc/quant/main.nf b/modules/local/psirc/quant/main.nf new file mode 100644 index 000000000..862e9ccdb --- /dev/null +++ b/modules/local/psirc/quant/main.nf @@ -0,0 +1,33 @@ +process PSIRC_QUANT { + tag "${meta.id}" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/psirc:1.0.0--he1fd2f9_0' : + 'biocontainers/psirc:1.0.0--he1fd2f9_0' }" + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(index) + tuple val(meta3), path(gtf) + tuple val(meta4), path(chrom_sizes) + val(bootstrap_samples) + + output: + tuple val(meta), path("${meta.id}"), emit: directory + path "versions.yml" , emit: versions + + script: + def single_end = meta.single_end ? "--single -l 76 -s 20" : "" + def genomebam = gtf ? "--genomebam -g $gtf" : "" + def chromosomes = chrom_sizes ? "-c $chrom_sizes" : "" + """ + psirc-quant quant -t $task.cpus -i $index -o $meta.id $single_end $reads -b $bootstrap_samples $genomebam $chromosomes + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + psirc-quant: \$(psirc-quant version | sed -n 's/^psirc-quant, version \\([0-9.]*\\).*\$/\\1/p') + END_VERSIONS + """ +} diff --git a/modules/local/quantification/merge_experiments/environment.yml b/modules/local/quantification/merge_experiments/environment.yml new file mode 100644 index 000000000..07f95f055 --- /dev/null +++ b/modules/local/quantification/merge_experiments/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "fishpond_swish" +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::bioconductor-rtracklayer=1.62.0" diff --git a/modules/local/quantification/merge_experiments/main.nf b/modules/local/quantification/merge_experiments/main.nf new file mode 100644 index 000000000..32fadcd87 --- /dev/null +++ b/modules/local/quantification/merge_experiments/main.nf @@ -0,0 +1,32 @@ +process MERGE_EXPERIMENTS { + tag "$meta.id" + label "process_medium" + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bioconductor-rtracklayer:1.62.0--r43ha9d7317_0' : + 'biocontainers/bioconductor-rtracklayer:1.62.0--r43ha9d7317_0' }" + + input: + tuple val(meta), path(experiments) + tuple val(meta2), path(phenotype) + tuple val(meta3), path(gtf) + tuple val(meta4), path(tpm) + + output: + tuple val(meta), path("${meta.id}.merged.rds"), emit: merged + path "versions.yml" , emit: versions + + script: + template "merge_experiments.r" + + stub: + """ + touch ${meta.id}.merged.rds + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bioconductor-summarizedexperiment: \$(Rscript -e "library(SummarizedExperiment); cat(as.character(packageVersion('SummarizedExperiment')))") + END_VERSIONS + """ +} diff --git a/modules/local/quantification/merge_experiments/templates/merge_experiments.r b/modules/local/quantification/merge_experiments/templates/merge_experiments.r new file mode 100644 index 000000000..4e82e57da --- /dev/null +++ b/modules/local/quantification/merge_experiments/templates/merge_experiments.r @@ -0,0 +1,62 @@ +#!/usr/bin/env Rscript --vanilla + +library(SummarizedExperiment) + +paths <- c('${experiments.join("\', \'")}') +experiments <- lapply(paths, readRDS) + +annotation <- rtracklayer::import('${gtf}') +tpm <- read.table('${tpm}', header=TRUE, row.names=1)[, -1] + +se_assays <- list() + +for (se in experiments) { + assays <- assays(se) + # Iterate over named list of assays + for (assay_name in names(assays)) { + assay <- assays[[assay_name]] + + # Add assay to se_assays for its name + if (is.null(se_assays[[assay_name]])) { + se_assays[[assay_name]] <- assay + } else { + se_assays[[assay_name]] <- cbind(se_assays[[assay_name]], assay) + } + } +} + +se_cbind <- do.call(SummarizedExperiment::cbind, experiments) +se <- SummarizedExperiment(assays = se_assays, colData = colData(se_cbind), rowData = rowData(se_cbind)) + +# Join phenotype data +phenotype_path <- '${phenotype}' +if (file.exists(phenotype_path)) { + phenotype <- read.csv(phenotype_path, stringsAsFactors = FALSE) + colData(se) <- merge(colData(se), phenotype, by.x="names", by.y=colnames(phenotype)[1]) +} + +# Convert string columns to factors +for (col in colnames(colData(se))) { + if (is.character(colData(se)[[col]]) && !(col == "names")) { + colData(se)[[col]] <- as.factor(colData(se)[[col]]) + } +} + +rownames(colData(se)) <- colData(se)\$names +colData(se)\$names <- NULL + +# Add transcript annotation +annotation <- annotation[match(rownames(se), annotation\$transcript_id),] +rowData(se) <- annotation + +# Add TPM +assay(se, "tpm", withDimnames = FALSE) <- tpm[rownames(se), rownames(colData(se))] + +saveRDS(se, '${meta.id}.merged.rds') + +writeLines( + c( + '"${task.process}":', + paste(' bioconductor-summarizedexperiment:', packageVersion('SummarizedExperiment')) + ), +'versions.yml') diff --git a/modules/local/quantification/split_types/environment.yml b/modules/local/quantification/split_types/environment.yml new file mode 100644 index 000000000..0c6dab505 --- /dev/null +++ b/modules/local/quantification/split_types/environment.yml @@ -0,0 +1,6 @@ +name: split_types +channels: + - conda-forge + - bioconda +dependencies: + - gawk=5.1.0 diff --git a/modules/local/quantification/split_types/main.nf b/modules/local/quantification/split_types/main.nf new file mode 100644 index 000000000..a95b18af6 --- /dev/null +++ b/modules/local/quantification/split_types/main.nf @@ -0,0 +1,43 @@ +process SPLIT_TYPES { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gawk:5.1.0' : + 'biocontainers/gawk:5.1.0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("linear.tsv") , emit: linear + tuple val(meta), path("circular.tsv"), emit: circular + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + awk -F'\\t' \\ + 'NR==1 {print > "circular.tsv"; print > "linear.tsv"} \\ + NR>1 {if (\$1 ~ /^circ_/) print > "circular.tsv"; else print > "linear.tsv"}' ${input} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS + """ + + stub: + """ + touch linear.tsv + touch circular.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS + """ +} diff --git a/modules/local/quantification/transcriptome/environment.yml b/modules/local/quantification/transcriptome/environment.yml new file mode 100644 index 000000000..b07ac9022 --- /dev/null +++ b/modules/local/quantification/transcriptome/environment.yml @@ -0,0 +1,6 @@ +name: transcriptome +channels: + - conda-forge + - bioconda +dependencies: + - gffread=0.12.1 diff --git a/modules/local/quantification/transcriptome/main.nf b/modules/local/quantification/transcriptome/main.nf new file mode 100644 index 000000000..747942c07 --- /dev/null +++ b/modules/local/quantification/transcriptome/main.nf @@ -0,0 +1,38 @@ +process TRANSCRIPTOME { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gffread:0.12.1--h8b12597_0' : + 'biocontainers/gffread:0.12.1--h8b12597_0' }" + + input: + tuple val(meta), path(gff) + tuple val(meta2), path(genome) + + output: + tuple val(meta), path("$outfile"), emit: transcriptome + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = task.ext.extension ?: "fasta" + outfile = "${prefix}.${extension}" + """ + gffread \\ + -g $genome \\ + -w $outfile \\ + $args \\ + $gff + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gffread: \$(gffread --version 2>&1) + END_VERSIONS + """ +} diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf deleted file mode 100644 index 33f77b6e7..000000000 --- a/modules/local/samplesheet_check.nf +++ /dev/null @@ -1,31 +0,0 @@ -process SAMPLESHEET_CHECK { - tag "$samplesheet" - label 'process_single' - - conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'quay.io/biocontainers/python:3.8.3' }" - - input: - path samplesheet - - output: - path '*.csv' , emit: csv - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: // This script is bundled with the pipeline, in nf-core/circrna/bin/ - """ - check_samplesheet.py \\ - $samplesheet \\ - samplesheet.valid.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ -} diff --git a/modules/local/segemehl/filter/main.nf b/modules/local/segemehl/filter/main.nf deleted file mode 100644 index 8ea156037..000000000 --- a/modules/local/segemehl/filter/main.nf +++ /dev/null @@ -1,26 +0,0 @@ -process SEGEMEHL_FILTER{ - tag "$meta.id" - label 'process_single' - - input: - tuple val(meta), path(results) - val(bsj_reads) - - output: - tuple val(meta), path("${prefix}_segemehl_circs.bed"), emit: results - tuple val(meta), path("${prefix}_segemehl.bed") , emit: matrix - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - """ - grep ';C;' ${prefix}.sngl.bed | awk -v OFS="\t" '{print \$1,\$2,\$3,\$6}' | sort | uniq -c | awk -v OFS="\t" '{print \$2,\$3,\$4,\$5,\$1}' > ${prefix}_collapsed.bed - - awk -v OFS="\t" -v BSJ=${bsj_reads} '{if(\$5>=BSJ) print \$0}' ${prefix}_collapsed.bed > ${prefix}_segemehl.bed - - awk -v OFS="\t" '{print \$1, \$2, \$3, \$1":"\$2"-"\$3":"\$4, \$5, \$4}' ${prefix}_segemehl.bed > ${prefix}_segemehl_circs.bed - """ -} diff --git a/modules/local/seqkit/split/environment.yml b/modules/local/seqkit/split/environment.yml new file mode 100644 index 000000000..d557b8b31 --- /dev/null +++ b/modules/local/seqkit/split/environment.yml @@ -0,0 +1,7 @@ +name: seqkit_split +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::seqkit=2.8.0 diff --git a/modules/local/seqkit/split/main.nf b/modules/local/seqkit/split/main.nf new file mode 100644 index 000000000..1fbc0c8d6 --- /dev/null +++ b/modules/local/seqkit/split/main.nf @@ -0,0 +1,36 @@ +process SEQKIT_SPLIT { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/seqkit:2.8.0--h9ee0642_0' : + 'biocontainers/seqkit:2.8.0--h9ee0642_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("${prefix}/*"), emit: split + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + seqkit \\ + split \\ + $args \\ + --threads $task.cpus \\ + $fasta \\ + --out-dir ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqkit: \$(echo \$(seqkit 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/star/sjdb/main.nf b/modules/local/star/sjdb/main.nf index 602d0da96..835f4e212 100644 --- a/modules/local/star/sjdb/main.nf +++ b/modules/local/star/sjdb/main.nf @@ -1,18 +1,32 @@ process SJDB { + tag "$meta.id" label 'process_single' + conda "conda-forge::sed=4.7" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + input: - path(sjdb) + tuple val(meta), path(sjdb) val(bsj_reads) output: - path("dataset.SJ.out.tab"), emit: sjtab + tuple val(meta), path("dataset.SJ.out.tab"), emit: sjtab + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: + def VERSION = '1.3.4' """ - cat *.tab | awk -v BSJ=${bsj_reads} '(\$7 >= BSJ && \$6==0)' | cut -f1-6 | sort | uniq > dataset.SJ.out.tab + mkdir tmp + cat *.tab | awk -v BSJ=${bsj_reads} '(\$7 >= BSJ && \$6==0)' | cut -f1-6 | sort -T ./tmp/ | uniq > dataset.SJ.out.tab + rm -rf tmp + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mawk: $VERSION + END_VERSIONS """ } diff --git a/modules/local/stringtie/prepde/main.nf b/modules/local/stringtie/prepde/main.nf index e069cddd9..38f274340 100644 --- a/modules/local/stringtie/prepde/main.nf +++ b/modules/local/stringtie/prepde/main.nf @@ -1,10 +1,10 @@ process STRINGTIE_PREPDE { label 'process_low' - conda (params.enable_conda ? "bioconda::stringtie=2.2.1" : null) + conda "bioconda::stringtie=2.2.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/stringtie:2.2.1--hecb563c_2' : - 'quay.io/biocontainers/stringtie:2.2.1--hecb563c_2' }" + 'biocontainers/stringtie:2.2.1--hecb563c_2' }" input: path gtf @@ -12,14 +12,20 @@ process STRINGTIE_PREPDE { output: path "transcript_count_matrix.csv" , emit: transcript_matrix path "gene_count_matrix.csv" , emit: gene_matrix + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: """ - for file in \$(ls *.gtf); do sample_id=\${file%".transcripts.gtf"}; touch samples.txt; printf "\$sample_id\t\$file\n" >> samples.txt ; done + for file in \$(ls *.gtf); do sample_id=\${file%".transcripts.gtf"}; touch samples.txt; printf "\$sample_id\\t\$file\\n" >> samples.txt ; done prepDE.py -i samples.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + stringtie: \$(stringtie --version 2>&1) + END_VERSIONS """ } diff --git a/modules/local/targetscan/database/main.nf b/modules/local/targetscan/database/main.nf index 0581aa4cf..8ba3ef71c 100644 --- a/modules/local/targetscan/database/main.nf +++ b/modules/local/targetscan/database/main.nf @@ -1,18 +1,30 @@ process TARGETSCAN_DATABASE { - tag "$mature" + tag "$meta.id" label 'process_low' + conda "conda-forge::sed=4.7" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + input: - path(mature) + tuple val(meta), path(mature) output: - path("mature.txt"), emit: mature_txt + tuple val(meta), path("mature.txt") , emit: mature_txt + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: + def VERSION = '1.3.4' """ - bash ${workflow.projectDir}/bin/targetscan_format.sh $mature + targetscan_format.sh $mature + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mawk: $VERSION + END_VERSIONS """ } diff --git a/modules/local/targetscan/predict/main.nf b/modules/local/targetscan/predict/main.nf index ae3de117d..450611c2d 100644 --- a/modules/local/targetscan/predict/main.nf +++ b/modules/local/targetscan/predict/main.nf @@ -2,14 +2,14 @@ process TARGETSCAN { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::targetscan=7.0" : null) + conda "bioconda::targetscan=7.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/targetscan:7.0--pl5321hdfd78af_0' : - 'quay.io/biocontainers/targetscan:7.0--pl5321hdfd78af_0' }" + 'biocontainers/targetscan:7.0--pl5321hdfd78af_0' }" input: tuple val(meta), path(fasta) - path(mature_txt) + tuple val(meta2), path(mature_txt) output: tuple val(meta), path("${prefix}.txt"), emit: txt @@ -25,12 +25,13 @@ process TARGETSCAN { ##format for targetscan cat $fasta | grep ">" | sed 's/>//g' > id cat $fasta | grep -v ">" > seq - paste id seq | awk -v OFS="\t" '{print \$1, "0000", \$2}' > ${prefix}_ts.txt + paste id seq | awk -v OFS="\\t" '{print \$1, "0000", \$2}' > ${prefix}_ts.txt # run targetscan targetscan_70.pl mature.txt ${prefix}_ts.txt ${prefix}.txt cat <<-END_VERSIONS > versions.yml "${task.process}": + awk: \$(awk --version | head -n1 | cut -d' ' -f3 | sed 's/,//g' ) targetscan: $VERSION END_VERSIONS """ diff --git a/modules/local/tximeta/tximeta/environment.yml b/modules/local/tximeta/tximeta/environment.yml new file mode 100644 index 000000000..be4bcd30b --- /dev/null +++ b/modules/local/tximeta/tximeta/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "tximeta_tximeta" +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::bioconductor-tximeta=1.20.1" diff --git a/modules/local/tximeta/tximeta/main.nf b/modules/local/tximeta/tximeta/main.nf new file mode 100644 index 000000000..c1be1d37d --- /dev/null +++ b/modules/local/tximeta/tximeta/main.nf @@ -0,0 +1,34 @@ +process TXIMETA_TXIMETA { + tag "$meta.id" + label "process_medium" + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bioconductor-tximeta%3A1.20.1--r43hdfd78af_1' : + 'biocontainers/bioconductor-tximeta:1.20.1--r43hdfd78af_1' }" + + input: + tuple val(meta), path("quants/*") + val quant_type + + output: + tuple val(meta), path("*.rds"), emit: se + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + prefix = task.ext.prefix ?: meta.id + template 'tximeta.r' + + stub: + """ + touch ${meta.id}.rds + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bioconductor-tximeta: \$(Rscript -e "library(tximeta); cat(as.character(packageVersion('tximeta')))") + END_VERSIONS + """ +} diff --git a/modules/local/tximeta/tximeta/templates/tximeta.r b/modules/local/tximeta/tximeta/templates/tximeta.r new file mode 100755 index 000000000..878ada020 --- /dev/null +++ b/modules/local/tximeta/tximeta/templates/tximeta.r @@ -0,0 +1,66 @@ +#!/usr/bin/env Rscript --vanilla + +# Script for importing and processing transcript-level quantifications. +# Written by Lorena Pantano, later modified by Jonathan Manning, and released +# under the MIT license. + +# Loading required libraries +library(tximeta) + +################################################ +################################################ +## Main script starts here ## +################################################ +################################################ + +# Define pattern for file names based on quantification type +pattern <- ifelse('$quant_type' == "kallisto", + ifelse(length(list.files('quants', pattern = "abundance.h5", recursive = T, full.names = T)) != 0, + "abundance.h5", + "abundance.tsv"), + "quant.sf") + +fns <- list.files('quants', pattern = pattern, recursive = T, full.names = T) +names <- basename(dirname(fns)) +names(fns) <- names + +coldata <- data.frame(files = fns, names = names) +rownames(coldata) <- coldata[["names"]] + +# Import transcript-level quantifications +se <- tximeta(coldata, type = '$quant_type', txOut = TRUE) + +# Save summarized experiment to file +saveRDS(se, file = paste0('$prefix', '.rds')) + +################################################ +################################################ +## R SESSION INFO ## +################################################ +################################################ + +sink(paste("R_sessionInfo.log", sep = '.')) +citation("tximeta") +print(sessionInfo()) +sink() + +################################################ +################################################ +## VERSIONS FILE ## +################################################ +################################################ + +r.version <- strsplit(version[['version.string']], ' ')[[1]][3] +tximeta.version <- as.character(packageVersion('tximeta')) + +writeLines( + c( + '"${task.process}":', + paste(' bioconductor-tximeta:', tximeta.version) + ), +'versions.yml') + +################################################ +################################################ +################################################ +################################################ diff --git a/modules/local/upset/main.nf b/modules/local/upset/main.nf new file mode 100644 index 000000000..08babf765 --- /dev/null +++ b/modules/local/upset/main.nf @@ -0,0 +1,22 @@ +process UPSET { + tag "$meta.id" + label 'process_single' + + conda "conda-forge::python=3.8.3 conda-forge::numpy=1.20.* conda-forge::pandas=1.2.* conda-forge::upsetplot=0.4.4" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-f42a44964bca5225c7860882e231a7b5488b5485:47ef981087c59f79fdbcab4d9d7316e9ac2e688d-0' : + 'biocontainers/mulled-v2-f42a44964bca5225c7860882e231a7b5488b5485:47ef981087c59f79fdbcab4d9d7316e9ac2e688d-0' }" + input: + tuple val(meta), val(tools), path(beds) + + when: + task.ext.when == null || task.ext.when + + output: + tuple val(meta), path("*.png"), emit: plot + path "*.upset_mqc.json" , emit: multiqc + path "versions.yml" , emit: versions + + script: + template "upset.py" +} diff --git a/modules/local/upset/templates/upset.py b/modules/local/upset/templates/upset.py new file mode 100644 index 000000000..f0378bf87 --- /dev/null +++ b/modules/local/upset/templates/upset.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python3 + +import pandas as pd +import platform +import upsetplot +import matplotlib +import matplotlib.pyplot as plt +import distutils.version +import base64 +import json + +def format_yaml_like(data: dict, indent: int = 0) -> str: + """Formats a dictionary to a YAML-like string. + + Args: + data (dict): The dictionary to format. + indent (int): The current indentation level. + + Returns: + str: A string formatted as YAML. + """ + yaml_str = "" + for key, value in data.items(): + spaces = " " * indent + if isinstance(value, dict): + yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}" + else: + yaml_str += f"{spaces}{key}: {value}\\n" + return yaml_str + +df_tools = pd.DataFrame( + { + "tool": "${tools.join(' ')}".split(" "), + "file": "${beds.join(' ')}".split(" ") + } +) + +tool_files = df_tools.groupby("tool").agg(lambda x: x.tolist())["file"].to_dict() +tool_ids = {} + +for tool, files in tool_files.items(): + df_tool = pd.concat([pd.read_csv(f, sep="\\t", header=None) for f in files]) + tool_ids[tool] = set(df_tool[3].unique()) + +dataset = upsetplot.from_contents(tool_ids) + +upsetplot.plot(dataset, orientation='horizontal', show_counts=True) +plot_file = "${meta.id}.upset.png" +plt.savefig(plot_file) + +image_string = base64.b64encode(open(plot_file, "rb").read()).decode("utf-8") +image_html = f'
    ' + +multiqc = { + 'id': "${meta.id}_upset", + 'parent_id': "upset_plots", + 'parent_name': 'UpSet Plots', + 'parent_description': 'UpSet plots showing the overlap between tools for each sample', + 'section_name': 'UpSet: ${meta.id}', + 'description': 'UpSet plot showing the overlap between tools for sample ${meta.id}', + 'plot_type': 'image', + 'data': image_html +} + +with open("${meta.id}.upset_mqc.json", "w") as f: + f.write(json.dumps(multiqc, indent=4)) + +# Create version file +versions = { + "${task.process}" : { + "python": platform.python_version(), + "pandas": pd.__version__, + "upsetplot": upsetplot.__version__, + "matplotlib": matplotlib.__version__ + } +} + +with open("versions.yml", "w") as f: + f.write(format_yaml_like(versions)) diff --git a/modules/nf-core/bedtools/getfasta/environment.yml b/modules/nf-core/bedtools/getfasta/environment.yml new file mode 100644 index 000000000..a89401f2a --- /dev/null +++ b/modules/nf-core/bedtools/getfasta/environment.yml @@ -0,0 +1,7 @@ +name: bedtools_getfasta +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bedtools=2.31.1 diff --git a/modules/nf-core/bedtools/getfasta/main.nf b/modules/nf-core/bedtools/getfasta/main.nf new file mode 100644 index 000000000..b316117d4 --- /dev/null +++ b/modules/nf-core/bedtools/getfasta/main.nf @@ -0,0 +1,50 @@ +process BEDTOOLS_GETFASTA { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : + 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" + + input: + tuple val(meta), path(bed) + path fasta + + output: + tuple val(meta), path("*.fa"), emit: fasta + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if ("$fasta" == "${prefix}.fa") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + bedtools \\ + getfasta \\ + $args \\ + -fi $fasta \\ + -bed $bed \\ + -fo ${prefix}.fa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + if ("$fasta" == "${prefix}.fa") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + touch ${prefix}.fa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/bedtools/getfasta/meta.yml b/modules/nf-core/bedtools/getfasta/meta.yml new file mode 100644 index 000000000..41917fe3f --- /dev/null +++ b/modules/nf-core/bedtools/getfasta/meta.yml @@ -0,0 +1,46 @@ +name: bedtools_getfasta +description: extract sequences in a FASTA file based on intervals defined in a feature file. +keywords: + - bed + - fasta + - getfasta +tools: + - bedtools: + description: | + A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/getfasta.html + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bed: + type: file + description: Bed feature file + pattern: "*.{bed}" + - fasta: + type: file + description: Input fasta file + pattern: "*.{fa,fasta}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Output fasta file with extracted sequences + pattern: "*.{fa}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bedtools/getfasta/tests/main.nf.test b/modules/nf-core/bedtools/getfasta/tests/main.nf.test new file mode 100644 index 000000000..4da7552c8 --- /dev/null +++ b/modules/nf-core/bedtools/getfasta/tests/main.nf.test @@ -0,0 +1,62 @@ +nextflow_process { + + name "Test Process BEDTOOLS_GETFASTA" + script "../main.nf" + process "BEDTOOLS_GETFASTA" + + tag "modules" + tag "modules_nfcore" + tag "bedtools" + tag "bedtools/getfasta" + + test("sarscov2 - bed - fasta") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false], + file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true), + ] + + input[1] = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bed - fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false], + file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true), + ] + + input[1] = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bedtools/getfasta/tests/main.nf.test.snap b/modules/nf-core/bedtools/getfasta/tests/main.nf.test.snap new file mode 100644 index 000000000..69bf33f74 --- /dev/null +++ b/modules/nf-core/bedtools/getfasta/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "sarscov2 - bed - fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fa:md5,41c3a45a57a16c04f828d8f8bb52df70" + ] + ], + "1": [ + "versions.yml:md5,427b4f64b2f05f28f0beef96c9f0d310" + ], + "fasta": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fa:md5,41c3a45a57a16c04f828d8f8bb52df70" + ] + ], + "versions": [ + "versions.yml:md5,427b4f64b2f05f28f0beef96c9f0d310" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T14:16:19.383758985" + }, + "sarscov2 - bed - fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,427b4f64b2f05f28f0beef96c9f0d310" + ], + "fasta": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,427b4f64b2f05f28f0beef96c9f0d310" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T14:16:47.47010536" + } +} \ No newline at end of file diff --git a/modules/nf-core/bedtools/getfasta/tests/tags.yml b/modules/nf-core/bedtools/getfasta/tests/tags.yml new file mode 100644 index 000000000..42ec3026c --- /dev/null +++ b/modules/nf-core/bedtools/getfasta/tests/tags.yml @@ -0,0 +1,2 @@ +bedtools/getfasta: + - "modules/nf-core/bedtools/getfasta/**" diff --git a/modules/nf-core/bedtools/groupby/environment.yml b/modules/nf-core/bedtools/groupby/environment.yml new file mode 100644 index 000000000..dab99ea1f --- /dev/null +++ b/modules/nf-core/bedtools/groupby/environment.yml @@ -0,0 +1,7 @@ +name: bedtools_groupby +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bedtools=2.31.1 diff --git a/modules/nf-core/bedtools/groupby/main.nf b/modules/nf-core/bedtools/groupby/main.nf new file mode 100644 index 000000000..063e7ba2a --- /dev/null +++ b/modules/nf-core/bedtools/groupby/main.nf @@ -0,0 +1,50 @@ +process BEDTOOLS_GROUPBY { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : + 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" + + input: + tuple val(meta), path(bed) + val(summary_col) + + output: + tuple val(meta), path('*.bed'), emit: bed + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}.grouped" + def summary_col = task.ext.summary_col ? "-c ${task.ext.summary_col}" : "-c 5" + if ("$bed" == "${prefix}.bed") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + bedtools \\ + groupby \\ + -i $bed \\ + ${summary_col} \\ + $args \\ + > ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/bedtools/groupby/meta.yml b/modules/nf-core/bedtools/groupby/meta.yml new file mode 100644 index 000000000..bcbc561a0 --- /dev/null +++ b/modules/nf-core/bedtools/groupby/meta.yml @@ -0,0 +1,47 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: bedtools_groupby +description: Groups features in a BED file by given column(s) and computes summary statistics for each group to another column. +keywords: + - bed + - groupby + - bedtools +tools: + - bedtools: + description: | + A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/groupby.html + homepage: https://bedtools.readthedocs.io/en/latest/ + doi: 10.1093/bioinformatics/btq033 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - bed: + type: file + description: Input BED file + pattern: "*.{bed}" + - summary_column: + type: integer + description: Column to be summarized (1-based) +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - bed: + type: file + description: Grouped by bed file with combined features + pattern: "*.{bed}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@mashehu" +maintainers: + - "@mashehu" diff --git a/modules/nf-core/bedtools/intersect/environment.yml b/modules/nf-core/bedtools/intersect/environment.yml new file mode 100644 index 000000000..2a3430508 --- /dev/null +++ b/modules/nf-core/bedtools/intersect/environment.yml @@ -0,0 +1,7 @@ +name: bedtools_intersect +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bedtools=2.31.1 diff --git a/modules/nf-core/bedtools/intersect/main.nf b/modules/nf-core/bedtools/intersect/main.nf new file mode 100644 index 000000000..d9e79e7fa --- /dev/null +++ b/modules/nf-core/bedtools/intersect/main.nf @@ -0,0 +1,59 @@ +process BEDTOOLS_INTERSECT { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : + 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" + + input: + tuple val(meta), path(intervals1), path(intervals2) + tuple val(meta2), path(chrom_sizes) + + output: + tuple val(meta), path("*.${extension}"), emit: intersect + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + //Extension of the output file. It is set by the user via "ext.suffix" in the config. Corresponds to the file format which depends on arguments (e. g., ".bed", ".bam", ".txt", etc.). + extension = task.ext.suffix ?: "${intervals1.extension}" + def sizes = chrom_sizes ? "-g ${chrom_sizes}" : '' + if ("$intervals1" == "${prefix}.${extension}" || + "$intervals2" == "${prefix}.${extension}") + error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + bedtools \\ + intersect \\ + -a $intervals1 \\ + -b $intervals2 \\ + $args \\ + $sizes \\ + > ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + extension = task.ext.suffix ?: "bed" + if ("$intervals1" == "${prefix}.${extension}" || + "$intervals2" == "${prefix}.${extension}") + error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/bedtools/intersect/meta.yml b/modules/nf-core/bedtools/intersect/meta.yml new file mode 100644 index 000000000..0939cb54a --- /dev/null +++ b/modules/nf-core/bedtools/intersect/meta.yml @@ -0,0 +1,59 @@ +name: bedtools_intersect +description: Allows one to screen for overlaps between two sets of genomic features. +keywords: + - bed + - intersect + - overlap +tools: + - bedtools: + description: | + A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/intersect.html + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intervals1: + type: file + description: BAM/BED/GFF/VCF + pattern: "*.{bam|bed|gff|vcf}" + - intervals2: + type: file + description: BAM/BED/GFF/VCF + pattern: "*.{bam|bed|gff|vcf}" + - meta2: + type: map + description: | + Groovy Map containing reference chromosome sizes + e.g. [ id:'test' ] + - chrom_sizes: + type: file + description: Chromosome sizes file + pattern: "*{.sizes,.txt}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intersect: + type: file + description: File containing the description of overlaps found between the two features + pattern: "*.${extension}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@edmundmiller" + - "@sruthipsuresh" + - "@drpatelh" + - "@sidorov-si" +maintainers: + - "@edmundmiller" + - "@sruthipsuresh" + - "@drpatelh" + - "@sidorov-si" diff --git a/modules/nf-core/bedtools/sort/environment.yml b/modules/nf-core/bedtools/sort/environment.yml new file mode 100644 index 000000000..87b2e4252 --- /dev/null +++ b/modules/nf-core/bedtools/sort/environment.yml @@ -0,0 +1,7 @@ +name: bedtools_sort +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bedtools=2.31.1 diff --git a/modules/nf-core/bedtools/sort/main.nf b/modules/nf-core/bedtools/sort/main.nf new file mode 100644 index 000000000..b833150a1 --- /dev/null +++ b/modules/nf-core/bedtools/sort/main.nf @@ -0,0 +1,54 @@ +process BEDTOOLS_SORT { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : + 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" + + input: + tuple val(meta), path(intervals) + path genome_file + + output: + tuple val(meta), path("*.${extension}"), emit: sorted + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def genome_cmd = genome_file ? "-g $genome_file" : "" + extension = task.ext.suffix ?: intervals.extension + if ("$intervals" == "${prefix}.${extension}") { + error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + } + """ + bedtools \\ + sort \\ + -i $intervals \\ + $genome_cmd \\ + $args \\ + > ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + extension = task.ext.suffix ?: intervals.extension + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/bedtools/sort/meta.yml b/modules/nf-core/bedtools/sort/meta.yml new file mode 100644 index 000000000..7c915f5f9 --- /dev/null +++ b/modules/nf-core/bedtools/sort/meta.yml @@ -0,0 +1,54 @@ +name: bedtools_sort +description: Sorts a feature file by chromosome and other criteria. +keywords: + - bed + - sort + - bedtools + - chromosome +tools: + - bedtools: + description: | + A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/sort.html + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intervals: + type: file + description: BED/BEDGRAPH + pattern: "*.{bed|bedGraph}" + - genome_file: + type: file + description: | + Optional reference genome 2 column file that defines the expected chromosome order. + pattern: "*.{fai,txt,chromsizes}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - sorted: + type: file + description: Sorted output file + pattern: "*.${extension}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@edmundmiller" + - "@sruthipsuresh" + - "@drpatelh" + - "@chris-cheshire" + - "@adamrtalbot" +maintainers: + - "@edmundmiller" + - "@sruthipsuresh" + - "@drpatelh" + - "@chris-cheshire" + - "@adamrtalbot" diff --git a/modules/nf-core/bedtools/sort/tests/main.nf.test b/modules/nf-core/bedtools/sort/tests/main.nf.test new file mode 100644 index 000000000..b1f36dd91 --- /dev/null +++ b/modules/nf-core/bedtools/sort/tests/main.nf.test @@ -0,0 +1,58 @@ +nextflow_process { + + name "Test Process BEDTOOLS_SORT" + script "../main.nf" + config "./nextflow.config" + process "BEDTOOLS_SORT" + + tag "modules" + tag "modules_nfcore" + tag "bedtools" + tag "bedtools/sort" + + test("test_bedtools_sort") { + + when { + process { + """ + input[0] = [ [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + + test("test_bedtools_sort_with_genome") { + + when { + process { + """ + input[0] = [ [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/bedtools/sort/tests/main.nf.test.snap b/modules/nf-core/bedtools/sort/tests/main.nf.test.snap new file mode 100644 index 000000000..f10e8b984 --- /dev/null +++ b/modules/nf-core/bedtools/sort/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "test_bedtools_sort_with_genome": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_out.testtext:md5,fe4053cf4de3aebbdfc3be2efb125a74" + ] + ], + "1": [ + "versions.yml:md5,cdbae2c7ebc41e534aaf0835779061f8" + ], + "sorted": [ + [ + { + "id": "test" + }, + "test_out.testtext:md5,fe4053cf4de3aebbdfc3be2efb125a74" + ] + ], + "versions": [ + "versions.yml:md5,cdbae2c7ebc41e534aaf0835779061f8" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-19T10:13:11.830452" + }, + "test_bedtools_sort": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_out.testtext:md5,fe4053cf4de3aebbdfc3be2efb125a74" + ] + ], + "1": [ + "versions.yml:md5,cdbae2c7ebc41e534aaf0835779061f8" + ], + "sorted": [ + [ + { + "id": "test" + }, + "test_out.testtext:md5,fe4053cf4de3aebbdfc3be2efb125a74" + ] + ], + "versions": [ + "versions.yml:md5,cdbae2c7ebc41e534aaf0835779061f8" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-19T10:16:40.535947" + } +} \ No newline at end of file diff --git a/modules/nf-core/bedtools/sort/tests/nextflow.config b/modules/nf-core/bedtools/sort/tests/nextflow.config new file mode 100644 index 000000000..f203c99c5 --- /dev/null +++ b/modules/nf-core/bedtools/sort/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + + withName: BEDTOOLS_SORT { + ext.prefix = { "${meta.id}_out" } + ext.suffix = "testtext" + } + +} \ No newline at end of file diff --git a/modules/nf-core/bedtools/sort/tests/tags.yml b/modules/nf-core/bedtools/sort/tests/tags.yml new file mode 100644 index 000000000..47c85eead --- /dev/null +++ b/modules/nf-core/bedtools/sort/tests/tags.yml @@ -0,0 +1,2 @@ +bedtools/sort: + - "modules/nf-core/bedtools/sort/**" diff --git a/modules/nf-core/bioawk/bioawk.diff b/modules/nf-core/bioawk/bioawk.diff new file mode 100644 index 000000000..1303738f2 --- /dev/null +++ b/modules/nf-core/bioawk/bioawk.diff @@ -0,0 +1,24 @@ +Changes in module 'nf-core/bioawk' +--- modules/nf-core/bioawk/main.nf ++++ modules/nf-core/bioawk/main.nf +@@ -20,15 +20,15 @@ + script: + def args = task.ext.args ?: '' // args is used for the main arguments of the tool + prefix = task.ext.prefix ?: "${meta.id}" ++ suffix = task.ext.suffix ?: input.extension ++ file_name = "${prefix}.${suffix}" + + def VERSION = '1.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + bioawk \\ + $args \\ + $input \\ +- > ${prefix} +- +- gzip ${prefix} ++ > ${file_name} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + +************************************************************ diff --git a/modules/nf-core/bioawk/environment.yml b/modules/nf-core/bioawk/environment.yml new file mode 100644 index 000000000..5fdfd4176 --- /dev/null +++ b/modules/nf-core/bioawk/environment.yml @@ -0,0 +1,7 @@ +name: bioawk +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bioawk=1.0 diff --git a/modules/nf-core/bioawk/main.nf b/modules/nf-core/bioawk/main.nf new file mode 100644 index 000000000..0fded517a --- /dev/null +++ b/modules/nf-core/bioawk/main.nf @@ -0,0 +1,37 @@ +process BIOAWK { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bioawk:1.0--h5bf99c6_6': + 'biocontainers/bioawk:1.0--h5bf99c6_6' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("${prefix}.${suffix}"), emit: output + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' // args is used for the main arguments of the tool + prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: input.extension + + def VERSION = '1.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + bioawk \\ + $args \\ + $input \\ + > ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bioawk: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/bioawk/meta.yml b/modules/nf-core/bioawk/meta.yml new file mode 100644 index 000000000..c9d001118 --- /dev/null +++ b/modules/nf-core/bioawk/meta.yml @@ -0,0 +1,46 @@ +name: "bioawk" +description: Bioawk is an extension to Brian Kernighan's awk, adding the support of several common biological data formats. +keywords: + - bioawk + - fastq + - fasta + - sam + - file manipulation + - awk +tools: + - "bioawk": + description: "BWK awk modified for biological data" + homepage: "https://github.com/lh3/bioawk" + documentation: "https://github.com/lh3/bioawk" + tool_dev_url: "https://github.com/lh3/bioawk" + licence: "['Free software license (https://github.com/lh3/bioawk/blob/master/README.awk#L1)']" +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: Input sequence biological sequence file (optionally gzipped) to be manipulated via program specified in `$args`. + pattern: "*.{bed,gff,sam,vcf,fastq,fasta,tab,bed.gz,gff.gz,sam.gz,vcf.gz,fastq.gz,fasta.gz,tab.gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - output: + type: file + description: | + Manipulated and gzipped version of input sequence file following program specified in `args`. + File name will be what is specified in `$prefix`. Do not include `.gz` suffix in `$prefix`! Output files` will be gzipped for you! + pattern: "*.gz" +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/bowtie/align/environment.yml b/modules/nf-core/bowtie/align/environment.yml new file mode 100644 index 000000000..2617e6f0a --- /dev/null +++ b/modules/nf-core/bowtie/align/environment.yml @@ -0,0 +1,7 @@ +name: bowtie_align +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bowtie=1.3.0 diff --git a/modules/nf-core/bowtie/align/main.nf b/modules/nf-core/bowtie/align/main.nf index 950c495e8..29e9cd533 100644 --- a/modules/nf-core/bowtie/align/main.nf +++ b/modules/nf-core/bowtie/align/main.nf @@ -2,10 +2,10 @@ process BOWTIE_ALIGN { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? 'bioconda::bowtie=1.3.0 bioconda::samtools=1.16.1' : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-ffbf83a6b0ab6ec567a336cf349b80637135bca3:c84c7c55c45af231883d9ff4fe706ac44c479c36-0' : - 'quay.io/biocontainers/mulled-v2-ffbf83a6b0ab6ec567a336cf349b80637135bca3:c84c7c55c45af231883d9ff4fe706ac44c479c36-0' }" + 'biocontainers/mulled-v2-ffbf83a6b0ab6ec567a336cf349b80637135bca3:c84c7c55c45af231883d9ff4fe706ac44c479c36-0' }" input: tuple val(meta), path(reads) @@ -36,7 +36,7 @@ process BOWTIE_ALIGN { $unaligned \\ $args \\ $endedness \\ - 2> ${prefix}.out \\ + 2> >(tee ${prefix}.out >&2) \\ | samtools view $args2 -@ $task.cpus -bS -o ${prefix}.bam - if [ -f ${prefix}.unmapped.fastq ]; then diff --git a/modules/nf-core/bowtie/align/meta.yml b/modules/nf-core/bowtie/align/meta.yml index 0d9250096..89eaedd6c 100644 --- a/modules/nf-core/bowtie/align/meta.yml +++ b/modules/nf-core/bowtie/align/meta.yml @@ -46,3 +46,5 @@ output: pattern: "*.fastq.gz" authors: - "@kevinmenden" +maintainers: + - "@kevinmenden" diff --git a/modules/nf-core/bowtie/build/environment.yml b/modules/nf-core/bowtie/build/environment.yml new file mode 100644 index 000000000..0907b0f84 --- /dev/null +++ b/modules/nf-core/bowtie/build/environment.yml @@ -0,0 +1,7 @@ +name: bowtie_build +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bowtie=1.3.0 diff --git a/modules/nf-core/bowtie/build/main.nf b/modules/nf-core/bowtie/build/main.nf index e01d9855d..05e22fe8c 100644 --- a/modules/nf-core/bowtie/build/main.nf +++ b/modules/nf-core/bowtie/build/main.nf @@ -2,10 +2,10 @@ process BOWTIE_BUILD { tag "$fasta" label 'process_high' - conda (params.enable_conda ? 'bioconda::bowtie=1.3.0' : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bowtie:1.3.0--py38hed8969a_1' : - 'quay.io/biocontainers/bowtie:1.3.0--py38hed8969a_1' }" + 'biocontainers/bowtie:1.3.0--py38hed8969a_1' }" input: path fasta diff --git a/modules/nf-core/bowtie/build/meta.yml b/modules/nf-core/bowtie/build/meta.yml index 0c41bbacb..262855f42 100644 --- a/modules/nf-core/bowtie/build/meta.yml +++ b/modules/nf-core/bowtie/build/meta.yml @@ -30,3 +30,6 @@ output: authors: - "@kevinmenden" - "@drpatelh" +maintainers: + - "@kevinmenden" + - "@drpatelh" diff --git a/modules/nf-core/bowtie2/align/environment.yml b/modules/nf-core/bowtie2/align/environment.yml new file mode 100644 index 000000000..d2796359a --- /dev/null +++ b/modules/nf-core/bowtie2/align/environment.yml @@ -0,0 +1,9 @@ +name: bowtie2_align +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bowtie2=2.5.2 + - bioconda::samtools=1.18 + - conda-forge::pigz=2.6 diff --git a/modules/nf-core/bowtie2/align/main.nf b/modules/nf-core/bowtie2/align/main.nf index 983a3ad3b..809525ad3 100644 --- a/modules/nf-core/bowtie2/align/main.nf +++ b/modules/nf-core/bowtie2/align/main.nf @@ -1,23 +1,28 @@ process BOWTIE2_ALIGN { tag "$meta.id" - label "process_high" + label 'process_high' - conda (params.enable_conda ? "bioconda::bowtie2=2.4.4 bioconda::samtools=1.16.1 conda-forge::pigz=2.6" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:a0ffedb52808e102887f6ce600d092675bf3528a-0' : - 'quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:a0ffedb52808e102887f6ce600d092675bf3528a-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:f70b31a2db15c023d641c32f433fb02cd04df5a6-0' : + 'biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:f70b31a2db15c023d641c32f433fb02cd04df5a6-0' }" input: tuple val(meta) , path(reads) tuple val(meta2), path(index) + tuple val(meta3), path(fasta) val save_unaligned val sort_bam output: - tuple val(meta), path("*.bam") , emit: bam - tuple val(meta), path("*.log") , emit: log - tuple val(meta), path("*fastq.gz"), emit: fastq, optional:true - path "versions.yml" , emit: versions + tuple val(meta), path("*.sam") , emit: sam , optional:true + tuple val(meta), path("*.bam") , emit: bam , optional:true + tuple val(meta), path("*.cram") , emit: cram , optional:true + tuple val(meta), path("*.csi") , emit: csi , optional:true + tuple val(meta), path("*.crai") , emit: crai , optional:true + tuple val(meta), path("*.log") , emit: log + tuple val(meta), path("*fastq.gz") , emit: fastq , optional:true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -38,6 +43,11 @@ process BOWTIE2_ALIGN { } def samtools_command = sort_bam ? 'sort' : 'view' + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension_matcher = (args2 =~ extension_pattern) + def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" """ INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/\\.rev.1.bt2\$//"` @@ -50,8 +60,8 @@ process BOWTIE2_ALIGN { --threads $task.cpus \\ $unaligned \\ $args \\ - 2> ${prefix}.bowtie2.log \\ - | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam - + 2> >(tee ${prefix}.bowtie2.log >&2) \\ + | samtools $samtools_command $args2 --threads $task.cpus ${reference} -o ${prefix}.${extension} - if [ -f ${prefix}.unmapped.fastq.1.gz ]; then mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz @@ -68,4 +78,40 @@ process BOWTIE2_ALIGN { pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) END_VERSIONS """ + + stub: + def args2 = task.ext.args2 ?: "" + def prefix = task.ext.prefix ?: "${meta.id}" + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension = (args2 ==~ extension_pattern) ? (args2 =~ extension_pattern)[0][2].toLowerCase() : "bam" + def create_unmapped = "" + if (meta.single_end) { + create_unmapped = save_unaligned ? "touch ${prefix}.unmapped.fastq.gz" : "" + } else { + create_unmapped = save_unaligned ? "touch ${prefix}.unmapped_1.fastq.gz && touch ${prefix}.unmapped_2.fastq.gz" : "" + } + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + + def create_index = "" + if (extension == "cram") { + create_index = "touch ${prefix}.crai" + } else if (extension == "bam") { + create_index = "touch ${prefix}.csi" + } + + """ + touch ${prefix}.${extension} + ${create_index} + touch ${prefix}.bowtie2.log + ${create_unmapped} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + } diff --git a/modules/nf-core/bowtie2/align/meta.yml b/modules/nf-core/bowtie2/align/meta.yml index c8e9a0012..38610e0ed 100644 --- a/modules/nf-core/bowtie2/align/meta.yml +++ b/modules/nf-core/bowtie2/align/meta.yml @@ -36,6 +36,15 @@ input: type: file description: Bowtie2 genome index files pattern: "*.ebwt" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Bowtie2 genome fasta file + pattern: "*.fasta" - save_unaligned: type: boolean description: | @@ -46,22 +55,41 @@ input: description: use samtools sort (true) or samtools view (false) pattern: "true or false" output: + - sam: + type: file + description: Output SAM file containing read alignments + pattern: "*.sam" - bam: type: file description: Output BAM file containing read alignments - pattern: "*.{bam}" - - versions: + pattern: "*.bam" + - cram: type: file - description: File containing software versions - pattern: "versions.yml" - - fastq: + description: Output CRAM file containing read alignments + pattern: "*.cram" + - csi: type: file - description: Unaligned FastQ files - pattern: "*.fastq.gz" + description: Output SAM/BAM index for large inputs + pattern: "*.csi" + - crai: + type: file + description: Output CRAM index + pattern: "*.crai" - log: type: file description: Aligment log pattern: "*.log" + - fastq: + type: file + description: Unaligned FastQ files + pattern: "*.fastq.gz" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@joseespinosa" - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bowtie2/align/tests/cram_crai.config b/modules/nf-core/bowtie2/align/tests/cram_crai.config new file mode 100644 index 000000000..03f1d5e51 --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/cram_crai.config @@ -0,0 +1,5 @@ +process { + withName: BOWTIE2_ALIGN { + ext.args2 = '--output-fmt cram --write-index' + } +} diff --git a/modules/nf-core/bowtie2/align/tests/large_index.config b/modules/nf-core/bowtie2/align/tests/large_index.config new file mode 100644 index 000000000..fdc1c59dd --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/large_index.config @@ -0,0 +1,5 @@ +process { + withName: BOWTIE2_BUILD { + ext.args = '--large-index' + } +} \ No newline at end of file diff --git a/modules/nf-core/bowtie2/align/tests/main.nf.test b/modules/nf-core/bowtie2/align/tests/main.nf.test new file mode 100644 index 000000000..03aeaf9ee --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/main.nf.test @@ -0,0 +1,623 @@ +nextflow_process { + + name "Test Process BOWTIE2_ALIGN" + script "../main.nf" + process "BOWTIE2_ALIGN" + tag "modules" + tag "modules_nfcore" + tag "bowtie2" + tag "bowtie2/build" + tag "bowtie2/align" + + test("sarscov2 - fastq, index, fasta, false, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, false, false - sam") { + + config "./sam.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.sam[0][1]).readLines()[0..4], + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, false, false - sam2") { + + config "./sam2.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.sam[0][1]).readLines()[0..4], + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, false, true - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false, true - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, large_index, fasta, false, false - bam") { + + config "./large_index.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], large_index, fasta, false, false - bam") { + + config "./large_index.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, true, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, true, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, true, true - cram") { + + config "./cram_crai.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = true //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.cram[0][1]).name, + file(process.out.crai[0][1]).name + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false, false - stub") { + + options "-stub" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, + file(process.out.log[0][1]).name, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, true, false - stub") { + + options "-stub" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, + file(process.out.log[0][1]).name, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bowtie2/align/tests/main.nf.test.snap b/modules/nf-core/bowtie2/align/tests/main.nf.test.snap new file mode 100644 index 000000000..028e7da68 --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/main.nf.test.snap @@ -0,0 +1,311 @@ +{ + "sarscov2 - [fastq1, fastq2], large_index, fasta, false, false - bam": { + "content": [ + "test.bam", + [ + [ + { + "id": "test", + "single_end": false + }, + "test.bowtie2.log:md5,bd89ce1b28c93bf822bae391ffcedd19" + ] + ], + [ + + ], + [ + "versions.yml:md5,01d18ab035146ea790e9a0f70adb758f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T13:19:25.337323" + }, + "sarscov2 - fastq, index, fasta, false, false - sam2": { + "content": [ + [ + "ERR5069949.2151832\t16\tMT192765.1\t17453\t42\t150M\t*\t0\t0\tACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGTCGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTGGTTTATGA\tAAAA&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') END_VERSIONS """ + + stub: + """ + mkdir bowtie2 + touch bowtie2/${fasta.baseName}.{1..4}.bt2 + touch bowtie2/${fasta.baseName}.rev.{1,2}.bt2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/bowtie2/build/meta.yml b/modules/nf-core/bowtie2/build/meta.yml index 0240224d5..2d6879919 100644 --- a/modules/nf-core/bowtie2/build/meta.yml +++ b/modules/nf-core/bowtie2/build/meta.yml @@ -41,3 +41,6 @@ output: authors: - "@joseespinosa" - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bowtie2/build/tests/main.nf.test b/modules/nf-core/bowtie2/build/tests/main.nf.test new file mode 100644 index 000000000..163760257 --- /dev/null +++ b/modules/nf-core/bowtie2/build/tests/main.nf.test @@ -0,0 +1,31 @@ +nextflow_process { + + name "Test Process BOWTIE2_BUILD" + script "modules/nf-core/bowtie2/build/main.nf" + process "BOWTIE2_BUILD" + tag "modules" + tag "modules_nfcore" + tag "bowtie2" + tag "bowtie2/build" + + test("Should run without failures") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + + } + +} diff --git a/modules/nf-core/bowtie2/build/tests/main.nf.test.snap b/modules/nf-core/bowtie2/build/tests/main.nf.test.snap new file mode 100644 index 000000000..6875e0213 --- /dev/null +++ b/modules/nf-core/bowtie2/build/tests/main.nf.test.snap @@ -0,0 +1,45 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "genome.1.bt2:md5,cbe3d0bbea55bc57c99b4bfa25b5fbdf", + "genome.2.bt2:md5,47b153cd1319abc88dda532462651fcf", + "genome.3.bt2:md5,4ed93abba181d8dfab2e303e33114777", + "genome.4.bt2:md5,c25be5f8b0378abf7a58c8a880b87626", + "genome.rev.1.bt2:md5,52be6950579598a990570fbcf5372184", + "genome.rev.2.bt2:md5,e3b4ef343dea4dd571642010a7d09597" + ] + ] + ], + "1": [ + "versions.yml:md5,1df11e9b82891527271c889c880d3974" + ], + "index": [ + [ + { + "id": "test" + }, + [ + "genome.1.bt2:md5,cbe3d0bbea55bc57c99b4bfa25b5fbdf", + "genome.2.bt2:md5,47b153cd1319abc88dda532462651fcf", + "genome.3.bt2:md5,4ed93abba181d8dfab2e303e33114777", + "genome.4.bt2:md5,c25be5f8b0378abf7a58c8a880b87626", + "genome.rev.1.bt2:md5,52be6950579598a990570fbcf5372184", + "genome.rev.2.bt2:md5,e3b4ef343dea4dd571642010a7d09597" + ] + ] + ], + "versions": [ + "versions.yml:md5,1df11e9b82891527271c889c880d3974" + ] + } + ], + "timestamp": "2023-11-23T11:51:01.107681997" + } +} \ No newline at end of file diff --git a/modules/nf-core/bowtie2/build/tests/tags.yml b/modules/nf-core/bowtie2/build/tests/tags.yml new file mode 100644 index 000000000..81aa61dab --- /dev/null +++ b/modules/nf-core/bowtie2/build/tests/tags.yml @@ -0,0 +1,2 @@ +bowtie2/build: + - modules/nf-core/bowtie2/build/** diff --git a/modules/nf-core/bwa/index/environment.yml b/modules/nf-core/bwa/index/environment.yml new file mode 100644 index 000000000..126e00344 --- /dev/null +++ b/modules/nf-core/bwa/index/environment.yml @@ -0,0 +1,7 @@ +name: bwa_index +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bwa=0.7.18 diff --git a/modules/nf-core/bwa/index/main.nf b/modules/nf-core/bwa/index/main.nf index 6d70fc159..2e48b6caa 100644 --- a/modules/nf-core/bwa/index/main.nf +++ b/modules/nf-core/bwa/index/main.nf @@ -2,10 +2,10 @@ process BWA_INDEX { tag "$fasta" label 'process_single' - conda (params.enable_conda ? "bioconda::bwa=0.7.17" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bwa:0.7.17--hed695b0_7' : - 'quay.io/biocontainers/bwa:0.7.17--hed695b0_7' }" + 'https://depot.galaxyproject.org/singularity/bwa:0.7.18--he4a0461_0' : + 'biocontainers/bwa:0.7.18--he4a0461_0' }" input: tuple val(meta), path(fasta) @@ -18,13 +18,14 @@ process BWA_INDEX { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${fasta.baseName}" + def args = task.ext.args ?: '' """ mkdir bwa bwa \\ index \\ $args \\ - -p bwa/${fasta.baseName} \\ + -p bwa/${prefix} \\ $fasta cat <<-END_VERSIONS > versions.yml @@ -34,14 +35,15 @@ process BWA_INDEX { """ stub: + def prefix = task.ext.prefix ?: "${fasta.baseName}" """ mkdir bwa - touch bwa/genome.amb - touch bwa/genome.ann - touch bwa/genome.bwt - touch bwa/genome.pac - touch bwa/genome.sa + touch bwa/${prefix}.amb + touch bwa/${prefix}.ann + touch bwa/${prefix}.bwt + touch bwa/${prefix}.pac + touch bwa/${prefix}.sa cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/bwa/index/meta.yml b/modules/nf-core/bwa/index/meta.yml index 2c6cfcd79..6bbc87a64 100644 --- a/modules/nf-core/bwa/index/meta.yml +++ b/modules/nf-core/bwa/index/meta.yml @@ -11,7 +11,7 @@ tools: BWA is a software package for mapping DNA sequences against a large reference genome, such as the human genome. homepage: http://bio-bwa.sourceforge.net/ - documentation: http://www.htslib.org/doc/samtools.html + documentation: https://bio-bwa.sourceforge.net/bwa.shtml arxiv: arXiv:1303.3997 licence: ["GPL-3.0-or-later"] input: @@ -40,3 +40,7 @@ output: authors: - "@drpatelh" - "@maxulysse" +maintainers: + - "@drpatelh" + - "@maxulysse" + - "@gallvp" diff --git a/modules/nf-core/bwa/index/tests/main.nf.test b/modules/nf-core/bwa/index/tests/main.nf.test new file mode 100644 index 000000000..af33e73ca --- /dev/null +++ b/modules/nf-core/bwa/index/tests/main.nf.test @@ -0,0 +1,33 @@ +nextflow_process { + + name "Test Process BWA_INDEX" + tag "modules_nfcore" + tag "modules" + tag "bwa" + tag "bwa/index" + script "../main.nf" + process "BWA_INDEX" + + test("BWA index") { + + when { + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bwa/index/tests/main.nf.test.snap b/modules/nf-core/bwa/index/tests/main.nf.test.snap new file mode 100644 index 000000000..7c8f04657 --- /dev/null +++ b/modules/nf-core/bwa/index/tests/main.nf.test.snap @@ -0,0 +1,47 @@ +{ + "BWA index": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "genome.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e", + "genome.ann:md5,c32e11f6c859f166c7525a9c1d583567", + "genome.bwt:md5,0469c30a1e239dd08f68afe66fde99da", + "genome.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66", + "genome.sa:md5,ab3952cabf026b48cd3eb5bccbb636d1" + ] + ] + ], + "1": [ + "versions.yml:md5,a64462ac7dfb21f4ade9b02e7f65c5bb" + ], + "index": [ + [ + { + "id": "test" + }, + [ + "genome.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e", + "genome.ann:md5,c32e11f6c859f166c7525a9c1d583567", + "genome.bwt:md5,0469c30a1e239dd08f68afe66fde99da", + "genome.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66", + "genome.sa:md5,ab3952cabf026b48cd3eb5bccbb636d1" + ] + ] + ], + "versions": [ + "versions.yml:md5,a64462ac7dfb21f4ade9b02e7f65c5bb" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-16T11:40:09.925307" + } +} \ No newline at end of file diff --git a/modules/nf-core/bwa/index/tests/tags.yml b/modules/nf-core/bwa/index/tests/tags.yml new file mode 100644 index 000000000..28bb483c4 --- /dev/null +++ b/modules/nf-core/bwa/index/tests/tags.yml @@ -0,0 +1,2 @@ +bwa/index: + - modules/nf-core/bwa/index/** diff --git a/modules/nf-core/cat/cat/environment.yml b/modules/nf-core/cat/cat/environment.yml new file mode 100644 index 000000000..17a04ef23 --- /dev/null +++ b/modules/nf-core/cat/cat/environment.yml @@ -0,0 +1,7 @@ +name: cat_cat +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::pigz=2.3.4 diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf new file mode 100644 index 000000000..adbdbd7ba --- /dev/null +++ b/modules/nf-core/cat/cat/main.nf @@ -0,0 +1,79 @@ +process CAT_CAT { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pigz:2.3.4' : + 'biocontainers/pigz:2.3.4' }" + + input: + tuple val(meta), path(files_in) + + output: + tuple val(meta), path("${prefix}"), emit: file_out + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def file_list = files_in.collect { it.toString() } + + // choose appropriate concatenation tool depending on input and output format + + // | input | output | command1 | command2 | + // |-----------|------------|----------|----------| + // | gzipped | gzipped | cat | | + // | ungzipped | ungzipped | cat | | + // | gzipped | ungzipped | zcat | | + // | ungzipped | gzipped | cat | pigz | + + // Use input file ending as default + prefix = task.ext.prefix ?: "${meta.id}${getFileSuffix(file_list[0])}" + out_zip = prefix.endsWith('.gz') + in_zip = file_list[0].endsWith('.gz') + command1 = (in_zip && !out_zip) ? 'zcat' : 'cat' + command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : '' + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } + """ + $command1 \\ + $args \\ + ${file_list.join(' ')} \\ + $command2 \\ + > ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + + stub: + def file_list = files_in.collect { it.toString() } + prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } + """ + touch $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ +} + +// for .gz files also include the second to last extension if it is present. E.g., .fasta.gz +def getFileSuffix(filename) { + def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/ + return match ? match[0][1] : filename.substring(filename.lastIndexOf('.')) +} + diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml new file mode 100644 index 000000000..00a8db0bc --- /dev/null +++ b/modules/nf-core/cat/cat/meta.yml @@ -0,0 +1,36 @@ +name: cat_cat +description: A module for concatenation of gzipped or uncompressed files +keywords: + - concatenate + - gzip + - cat +tools: + - cat: + description: Just concatenation + documentation: https://man7.org/linux/man-pages/man1/cat.1.html + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - files_in: + type: file + description: List of compressed / uncompressed files + pattern: "*" +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - file_out: + type: file + description: Concatenated file. Will be gzipped if file_out ends with ".gz" + pattern: "${file_out}" +authors: + - "@erikrikarddaniel" + - "@FriederikeHanssen" +maintainers: + - "@erikrikarddaniel" + - "@FriederikeHanssen" diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test new file mode 100644 index 000000000..fcee2d19f --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test @@ -0,0 +1,178 @@ +nextflow_process { + + name "Test Process CAT_CAT" + script "../main.nf" + process "CAT_CAT" + tag "modules" + tag "modules_nfcore" + tag "cat" + tag "cat/cat" + + test("test_cat_name_conflict") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'genome', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert !process.success }, + { assert process.stdout.toString().contains("The name of the input file can't be the same as for the output prefix") } + ) + } + } + + test("test_cat_unzipped_unzipped") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + + test("test_cat_zipped_zipped") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot(lines[0..5]).match("test_cat_zipped_zipped_lines") }, + { assert snapshot(lines.size()).match("test_cat_zipped_zipped_size")} + ) + } + } + + test("test_cat_zipped_unzipped") { + config './nextflow_zipped_unzipped.config' + + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("test_cat_unzipped_zipped") { + config './nextflow_unzipped_zipped.config' + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot(lines[0..5]).match("test_cat_unzipped_zipped_lines") }, + { assert snapshot(lines.size()).match("test_cat_unzipped_zipped_size")} + ) + } + } + + test("test_cat_one_file_unzipped_zipped") { + config './nextflow_unzipped_zipped.config' + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot(lines[0..5]).match("test_cat_one_file_unzipped_zipped_lines") }, + { assert snapshot(lines.size()).match("test_cat_one_file_unzipped_zipped_size")} + ) + } + } +} diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap new file mode 100644 index 000000000..423571ba2 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test.snap @@ -0,0 +1,121 @@ +{ + "test_cat_unzipped_zipped_size": { + "content": [ + 375 + ], + "timestamp": "2023-10-16T14:33:08.049445686" + }, + "test_cat_unzipped_unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "timestamp": "2023-10-16T14:32:18.500464399" + }, + "test_cat_zipped_unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "timestamp": "2023-10-16T14:32:49.642741302" + }, + "test_cat_zipped_zipped_lines": { + "content": [ + [ + "MT192765.1\tGenbank\ttranscript\t259\t29667\t.\t+\t.\tID=unknown_transcript_1;geneID=orf1ab;gene_name=orf1ab", + "MT192765.1\tGenbank\tgene\t259\t21548\t.\t+\t.\tParent=unknown_transcript_1", + "MT192765.1\tGenbank\tCDS\t259\t13461\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t13461\t21548\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t21556\t25377\t.\t+\t0\tParent=unknown_transcript_1;gbkey=CDS;gene=S;note=\"structural protein\";product=\"surface glycoprotein\";protein_id=QIK50427.1", + "MT192765.1\tGenbank\tgene\t21556\t25377\t.\t+\t.\tParent=unknown_transcript_1" + ] + ], + "timestamp": "2023-10-16T14:32:33.629048645" + }, + "test_cat_unzipped_zipped_lines": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ] + ], + "timestamp": "2023-10-16T14:33:08.038830506" + }, + "test_cat_one_file_unzipped_zipped_lines": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ] + ], + "timestamp": "2023-10-16T14:33:21.39642399" + }, + "test_cat_zipped_zipped_size": { + "content": [ + 78 + ], + "timestamp": "2023-10-16T14:32:33.641869244" + }, + "test_cat_one_file_unzipped_zipped_size": { + "content": [ + 374 + ], + "timestamp": "2023-10-16T14:33:21.4094373" + } +} \ No newline at end of file diff --git a/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config new file mode 100644 index 000000000..ec26b0fdc --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config @@ -0,0 +1,6 @@ + +process { + withName: CAT_CAT { + ext.prefix = 'cat.txt.gz' + } +} diff --git a/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config new file mode 100644 index 000000000..fbc79783d --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config @@ -0,0 +1,8 @@ + +process { + + withName: CAT_CAT { + ext.prefix = 'cat.txt' + } + +} diff --git a/modules/nf-core/cat/cat/tests/tags.yml b/modules/nf-core/cat/cat/tests/tags.yml new file mode 100644 index 000000000..37b578f52 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/tags.yml @@ -0,0 +1,2 @@ +cat/cat: + - modules/nf-core/cat/cat/** diff --git a/modules/nf-core/cat/fastq/environment.yml b/modules/nf-core/cat/fastq/environment.yml new file mode 100644 index 000000000..8c69b121f --- /dev/null +++ b/modules/nf-core/cat/fastq/environment.yml @@ -0,0 +1,7 @@ +name: cat_fastq +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::coreutils=8.30 diff --git a/modules/nf-core/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf index 4fa365d33..f132b2adc 100644 --- a/modules/nf-core/cat/fastq/main.nf +++ b/modules/nf-core/cat/fastq/main.nf @@ -2,10 +2,10 @@ process CAT_FASTQ { tag "$meta.id" label 'process_single' - conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" + 'nf-core/ubuntu:20.04' }" input: tuple val(meta), path(reads, stageAs: "input*/*") @@ -76,5 +76,4 @@ process CAT_FASTQ { """ } } - } diff --git a/modules/nf-core/cat/fastq/meta.yml b/modules/nf-core/cat/fastq/meta.yml index c836598e4..db4ac3c79 100644 --- a/modules/nf-core/cat/fastq/meta.yml +++ b/modules/nf-core/cat/fastq/meta.yml @@ -1,6 +1,7 @@ name: cat_fastq description: Concatenates fastq files keywords: + - cat - fastq - concatenate tools: @@ -16,7 +17,7 @@ input: Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - reads: - type: list + type: file description: | List of input FastQ files to be concatenated. output: @@ -33,7 +34,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@joseespinosa" - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test b/modules/nf-core/cat/fastq/tests/main.nf.test new file mode 100644 index 000000000..a71dcb8df --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/main.nf.test @@ -0,0 +1,140 @@ +// NOTE The version snaps may not be consistant +// https://github.com/nf-core/modules/pull/4087#issuecomment-1767948035 +nextflow_process { + + name "Test Process CAT_FASTQ" + script "../main.nf" + process "CAT_FASTQ" + tag "modules" + tag "modules_nfcore" + tag "cat" + tag "cat/fastq" + + test("test_cat_fastq_single_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_paired_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end_same_name") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_paired_end_same_name") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end_single_file") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test.snap b/modules/nf-core/cat/fastq/tests/main.nf.test.snap new file mode 100644 index 000000000..43dfe28fc --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/main.nf.test.snap @@ -0,0 +1,169 @@ +{ + "test_cat_fastq_single_end": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,ee314a9bd568d06617171b0c85f508da" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,ee314a9bd568d06617171b0c85f508da" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "timestamp": "2024-01-17T17:30:39.816981" + }, + "test_cat_fastq_single_end_same_name": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "timestamp": "2024-01-17T17:32:35.229332" + }, + "test_cat_fastq_single_end_single_file": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "timestamp": "2024-01-17T17:34:00.058829" + }, + "test_cat_fastq_paired_end_same_name": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "timestamp": "2024-01-17T17:33:33.031555" + }, + "test_cat_fastq_paired_end": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "timestamp": "2024-01-17T17:32:02.270935" + } +} \ No newline at end of file diff --git a/modules/nf-core/cat/fastq/tests/tags.yml b/modules/nf-core/cat/fastq/tests/tags.yml new file mode 100644 index 000000000..6ac436140 --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/tags.yml @@ -0,0 +1,2 @@ +cat/fastq: + - modules/nf-core/cat/fastq/** diff --git a/modules/nf-core/circexplorer2/annotate/environment.yml b/modules/nf-core/circexplorer2/annotate/environment.yml new file mode 100644 index 000000000..def886e93 --- /dev/null +++ b/modules/nf-core/circexplorer2/annotate/environment.yml @@ -0,0 +1,7 @@ +name: circexplorer2_annotate +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::circexplorer2=2.3.8 diff --git a/modules/nf-core/circexplorer2/annotate/main.nf b/modules/nf-core/circexplorer2/annotate/main.nf index bf836bbfa..0e9fa0a02 100644 --- a/modules/nf-core/circexplorer2/annotate/main.nf +++ b/modules/nf-core/circexplorer2/annotate/main.nf @@ -2,10 +2,10 @@ process CIRCEXPLORER2_ANNOTATE { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::circexplorer2=2.3.8" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/circexplorer2:2.3.8--pyh864c0ab_1': - 'quay.io/biocontainers/circexplorer2:2.3.8--pyh864c0ab_1' }" + 'biocontainers/circexplorer2:2.3.8--pyh864c0ab_1' }" input: tuple val(meta), path(junctions) diff --git a/modules/nf-core/circexplorer2/annotate/meta.yml b/modules/nf-core/circexplorer2/annotate/meta.yml index ba388581c..e11df81c2 100644 --- a/modules/nf-core/circexplorer2/annotate/meta.yml +++ b/modules/nf-core/circexplorer2/annotate/meta.yml @@ -10,7 +10,6 @@ tools: documentation: "https://circexplorer2.readthedocs.io/en/latest/" doi: "10.1101/gr.202895.115" licence: "['MIT License']" - input: - meta: type: map @@ -29,7 +28,6 @@ input: type: file description: Reformatted GTF file for CIRCexplorer2 pattern: "*.{txt}" - output: - meta: type: map @@ -44,6 +42,7 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@BarryDigby" +maintainers: + - "@BarryDigby" diff --git a/modules/nf-core/circexplorer2/parse/environment.yml b/modules/nf-core/circexplorer2/parse/environment.yml new file mode 100644 index 000000000..52e172e7e --- /dev/null +++ b/modules/nf-core/circexplorer2/parse/environment.yml @@ -0,0 +1,7 @@ +name: circexplorer2_parse +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::circexplorer2=2.3.8 diff --git a/modules/nf-core/circexplorer2/parse/main.nf b/modules/nf-core/circexplorer2/parse/main.nf index 8d9dbb805..db7a0063f 100644 --- a/modules/nf-core/circexplorer2/parse/main.nf +++ b/modules/nf-core/circexplorer2/parse/main.nf @@ -2,10 +2,10 @@ process CIRCEXPLORER2_PARSE { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::circexplorer2=2.3.8" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/circexplorer2:2.3.8--pyh864c0ab_1': - 'quay.io/biocontainers/circexplorer2:2.3.8--pyh864c0ab_1' }" + 'biocontainers/circexplorer2:2.3.8--pyh864c0ab_1' }" input: tuple val(meta), path(fusions) diff --git a/modules/nf-core/circexplorer2/parse/meta.yml b/modules/nf-core/circexplorer2/parse/meta.yml index 61e5657cc..ef3ebf856 100644 --- a/modules/nf-core/circexplorer2/parse/meta.yml +++ b/modules/nf-core/circexplorer2/parse/meta.yml @@ -11,7 +11,6 @@ tools: documentation: "https://circexplorer2.readthedocs.io/en/latest/" doi: "10.1101/gr.202895.115" licence: "['MIT License']" - input: - meta: type: map @@ -22,7 +21,6 @@ input: type: file description: BAM (BWA), BED (Segemehl), TXT (MapSplice), or Junction (STAR) file. Aligner will be autodetected based on file suffix. pattern: "*.{bam,junction,bed,txt}" - output: - meta: type: map @@ -37,6 +35,7 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@BarryDigby" +maintainers: + - "@BarryDigby" diff --git a/modules/nf-core/csvtk/join/environment.yml b/modules/nf-core/csvtk/join/environment.yml new file mode 100644 index 000000000..5b6c6468f --- /dev/null +++ b/modules/nf-core/csvtk/join/environment.yml @@ -0,0 +1,7 @@ +name: csvtk_join +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::csvtk=0.30.0 diff --git a/modules/nf-core/csvtk/join/main.nf b/modules/nf-core/csvtk/join/main.nf new file mode 100644 index 000000000..5f3afeeae --- /dev/null +++ b/modules/nf-core/csvtk/join/main.nf @@ -0,0 +1,49 @@ +process CSVTK_JOIN { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/csvtk:0.30.0--h9ee0642_0': + 'biocontainers/csvtk:0.30.0--h9ee0642_0' }" + + input: + tuple val(meta), path(csv) + + output: + tuple val(meta), path("${prefix}.${out_extension}"), emit: csv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + out_extension = args.contains('--out-delimiter "\t"') || args.contains('-D "\t"') || args.contains("-D \$'\t'") ? "tsv" : "csv" + """ + csvtk \\ + join \\ + $args \\ + --num-cpus $task.cpus \\ + --out-file ${prefix}.${out_extension} \\ + $csv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + csvtk: \$(echo \$( csvtk version | sed -e "s/csvtk v//g" )) + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + out_extension = args.contains('--out-delimiter "\t"') || args.contains('-D "\t"') || args.contains("-D \$'\t'") ? "tsv" : "csv" + """ + touch ${prefix}.${out_extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + csvtk: \$(echo \$( csvtk version | sed -e "s/csvtk v//g" )) + END_VERSIONS + """ +} diff --git a/modules/nf-core/csvtk/join/meta.yml b/modules/nf-core/csvtk/join/meta.yml new file mode 100644 index 000000000..a75ec40f0 --- /dev/null +++ b/modules/nf-core/csvtk/join/meta.yml @@ -0,0 +1,41 @@ +name: csvtk_join +description: Join two or more CSV (or TSV) tables by selected fields into a single table +keywords: + - join + - tsv + - csv +tools: + - csvtk: + description: A cross-platform, efficient, practical CSV/TSV toolkit + homepage: http://bioinf.shenwei.me/csvtk + documentation: http://bioinf.shenwei.me/csvtk + tool_dev_url: https://github.com/shenwei356/csvtk + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - csv: + type: file + description: CSV/TSV formatted files + pattern: "*.{csv,tsv}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "version.yml" + - csv: + type: file + description: Joined CSV/TSV file + pattern: "*.{csv,tsv}" +authors: + - "@anoronh4" +maintainers: + - "@anoronh4" diff --git a/modules/nf-core/csvtk/join/tests/main.nf.test b/modules/nf-core/csvtk/join/tests/main.nf.test new file mode 100644 index 000000000..3cf178c4f --- /dev/null +++ b/modules/nf-core/csvtk/join/tests/main.nf.test @@ -0,0 +1,64 @@ +nextflow_process { + + name "Test Process CSVTK_JOIN" + script "../main.nf" + process "CSVTK_JOIN" + + tag "modules" + tag "modules_nfcore" + tag "csvtk" + tag "csvtk/join" + + test("join - csv") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file("https://github.com/nf-core/test-datasets/raw/bacass/bacass_hybrid.csv", checkIfExists: true), + file("https://github.com/nf-core/test-datasets/raw/bacass/bacass_short.csv", checkIfExists: true), + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("join - csv - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file("https://github.com/nf-core/test-datasets/raw/bacass/bacass_hybrid.csv", checkIfExists: true), + file("https://github.com/nf-core/test-datasets/raw/bacass/bacass_short.csv", checkIfExists: true), + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/csvtk/join/tests/main.nf.test.snap b/modules/nf-core/csvtk/join/tests/main.nf.test.snap new file mode 100644 index 000000000..b124788bb --- /dev/null +++ b/modules/nf-core/csvtk/join/tests/main.nf.test.snap @@ -0,0 +1,60 @@ +{ + "join - csv": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.csv:md5,d0ad82ca096c7e05eb9f9a04194c9e30" + ] + ], + "1": [ + "versions.yml:md5,e76147e4eca968d23543e7007522f1d3" + ], + "csv": [ + [ + { + "id": "test" + }, + "test.csv:md5,d0ad82ca096c7e05eb9f9a04194c9e30" + ] + ], + "versions": [ + "versions.yml:md5,e76147e4eca968d23543e7007522f1d3" + ] + } + ], + "timestamp": "2024-05-21T15:45:44.045434" + }, + "join - csv - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,e76147e4eca968d23543e7007522f1d3" + ], + "csv": [ + [ + { + "id": "test" + }, + "test.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e76147e4eca968d23543e7007522f1d3" + ] + } + ], + "timestamp": "2024-05-21T15:45:55.59201" + } +} \ No newline at end of file diff --git a/modules/nf-core/csvtk/join/tests/nextflow.config b/modules/nf-core/csvtk/join/tests/nextflow.config new file mode 100644 index 000000000..1b14393a9 --- /dev/null +++ b/modules/nf-core/csvtk/join/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: CSVTK_JOIN { + ext.args = "--fields 'ID;ID' -p -e -d \"\t\" -D \",\"" + } +} diff --git a/modules/nf-core/csvtk/join/tests/tags.yml b/modules/nf-core/csvtk/join/tests/tags.yml new file mode 100644 index 000000000..6c3a0fa6b --- /dev/null +++ b/modules/nf-core/csvtk/join/tests/tags.yml @@ -0,0 +1,2 @@ +csvtk/join: + - "modules/nf-core/csvtk/join/**" diff --git a/modules/nf-core/csvtk/split/environment.yml b/modules/nf-core/csvtk/split/environment.yml new file mode 100644 index 000000000..ec08bb439 --- /dev/null +++ b/modules/nf-core/csvtk/split/environment.yml @@ -0,0 +1,7 @@ +name: csvtk_split +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::csvtk=0.30.0 diff --git a/modules/nf-core/csvtk/split/main.nf b/modules/nf-core/csvtk/split/main.nf new file mode 100644 index 000000000..1b7d5dd15 --- /dev/null +++ b/modules/nf-core/csvtk/split/main.nf @@ -0,0 +1,56 @@ +process CSVTK_SPLIT { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/csvtk:0.30.0--h9ee0642_0' : + 'biocontainers/csvtk:0.30.0--h9ee0642_0' }" + + input: + tuple val(meta), path(csv) + val in_format + val out_format + + output: + tuple val(meta), path("*.${out_extension}"), emit: split_csv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def delimiter = in_format == "tsv" ? "--tabs" : (in_format == "csv" ? "--delimiter ',' " : in_format) + def out_delimiter = out_format == "tsv" ? "--out-tabs" : (out_format == "csv" ? "--out-delimiter ',' " : out_format) + out_extension = out_format == "tsv" ? 'tsv' : 'csv' + """ + sed -i.bak '/^##/d' $csv + csvtk \\ + split \\ + $args \\ + --num-cpus $task.cpus \\ + $delimiter \\ + $out_delimiter \\ + $csv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + csvtk: \$(echo \$( csvtk version | sed -e 's/csvtk v//g' )) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + out_extension = args.contains('--out-delimiter "\t"') || args.contains('-D "\t"') || args.contains("-D \$'\t'") ? "tsv" : "csv" + """ + touch ${prefix}.${out_extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + csvtk: \$(echo \$( csvtk version | sed -e "s/csvtk v//g" )) + END_VERSIONS + """ +} diff --git a/modules/nf-core/csvtk/split/meta.yml b/modules/nf-core/csvtk/split/meta.yml new file mode 100644 index 000000000..6ff78aa01 --- /dev/null +++ b/modules/nf-core/csvtk/split/meta.yml @@ -0,0 +1,49 @@ +name: csvtk_split +description: Splits CSV/TSV into multiple files according to column values +keywords: + - split + - csv + - tsv +tools: + - csvtk: + description: CSVTK is a cross-platform, efficient and practical CSV/TSV toolkit that allows rapid data investigation and manipulation. + homepage: https://bioinf.shenwei.me/csvtk/ + documentation: https://bioinf.shenwei.me/csvtk/ + tool_dev_url: https://github.com/shenwei356/csvtk + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - csv: + type: file + description: CSV/TSV file + pattern: "*.{csv,tsv}" + - in_format: + type: string + description: Input format (csv, tab, or a delimiting character) + pattern: "*" + - out_format: + type: string + description: Output format (csv, tab, or a delimiting character) + pattern: "*" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - split_csv: + type: file + description: Split CSV/TSV file + pattern: "*.{csv,tsv}" +authors: + - "@SusiJo" +maintainers: + - "@SusiJo" diff --git a/modules/nf-core/csvtk/split/tests/main.nf.test b/modules/nf-core/csvtk/split/tests/main.nf.test new file mode 100644 index 000000000..f3c499266 --- /dev/null +++ b/modules/nf-core/csvtk/split/tests/main.nf.test @@ -0,0 +1,62 @@ +nextflow_process { + + name "Test Process CSVTK_SPLIT" + script "../main.nf" + process "CSVTK_SPLIT" + + tag "modules" + tag "modules_nfcore" + tag "csvtk" + tag "csvtk/split" + + test("split - csv") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + '/generic/tsv/test.tsv', checkIfExists: true) ] + ] + input[1] = "tsv" + input[2] = "tsv" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("split - csv - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + '/generic/tsv/test.tsv', checkIfExists: true) ] + ] + input[1] = "tsv" + input[2] = "tsv" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/csvtk/split/tests/main.nf.test.snap b/modules/nf-core/csvtk/split/tests/main.nf.test.snap new file mode 100644 index 000000000..f0ec9def0 --- /dev/null +++ b/modules/nf-core/csvtk/split/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "split - csv - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,b17a61b0c41b19f7df3740979d68a8a0" + ], + "split_csv": [ + [ + { + "id": "test" + }, + "test.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,b17a61b0c41b19f7df3740979d68a8a0" + ] + } + ], + "timestamp": "2024-05-22T10:02:46.053585" + }, + "split - csv": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "test-1.tsv:md5,2827284f1a6f41dd14ef82fb6a36ebad", + "test-11.tsv:md5,6c5555d689c4e685d35d6e394ad6e1e6", + "test-2.tsv:md5,589a2add7f0b8e998d4959e5d883e7d5", + "test-4.tsv:md5,e51cd0bfc35f5353d1fb75f723772ed0", + "test-NA.tsv:md5,20afd42832c6cf5821f9862d285c9350" + ] + ] + ], + "1": [ + "versions.yml:md5,b17a61b0c41b19f7df3740979d68a8a0" + ], + "split_csv": [ + [ + { + "id": "test" + }, + [ + "test-1.tsv:md5,2827284f1a6f41dd14ef82fb6a36ebad", + "test-11.tsv:md5,6c5555d689c4e685d35d6e394ad6e1e6", + "test-2.tsv:md5,589a2add7f0b8e998d4959e5d883e7d5", + "test-4.tsv:md5,e51cd0bfc35f5353d1fb75f723772ed0", + "test-NA.tsv:md5,20afd42832c6cf5821f9862d285c9350" + ] + ] + ], + "versions": [ + "versions.yml:md5,b17a61b0c41b19f7df3740979d68a8a0" + ] + } + ], + "timestamp": "2024-05-22T10:02:35.8578" + } +} \ No newline at end of file diff --git a/modules/nf-core/csvtk/split/tests/nextflow.config b/modules/nf-core/csvtk/split/tests/nextflow.config new file mode 100644 index 000000000..8f5a6f7ee --- /dev/null +++ b/modules/nf-core/csvtk/split/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: CSVTK_SPLIT { + ext.args = "-C \'&\' --fields \'first_name\' " + } +} diff --git a/modules/nf-core/csvtk/split/tests/tags.yml b/modules/nf-core/csvtk/split/tests/tags.yml new file mode 100644 index 000000000..0d7dc029d --- /dev/null +++ b/modules/nf-core/csvtk/split/tests/tags.yml @@ -0,0 +1,2 @@ +csvtk/split: + - "modules/nf-core/csvtk/split/**" diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml new file mode 100644 index 000000000..b48ced269 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml @@ -0,0 +1,7 @@ +name: custom_dumpsoftwareversions +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.20 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index cebb6e058..105f9265a 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.20--pyhdfd78af_0' : + 'biocontainers/multiqc:1.20--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml index 60b546a01..5f15a5fde 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -1,7 +1,9 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: custom_dumpsoftwareversions description: Custom module used to dump software versions within the nf-core pipeline template keywords: - custom + - dump - version tools: - custom: @@ -14,7 +16,6 @@ input: type: file description: YML file containing software versions pattern: "*.yml" - output: - yml: type: file @@ -28,7 +29,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@drpatelh" - "@grst" +maintainers: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test new file mode 100644 index 000000000..b1e1630bb --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test @@ -0,0 +1,43 @@ +nextflow_process { + + name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS" + script "../main.nf" + process "CUSTOM_DUMPSOFTWAREVERSIONS" + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "dumpsoftwareversions" + tag "custom/dumpsoftwareversions" + + test("Should run without failures") { + when { + process { + """ + def tool1_version = ''' + TOOL1: + tool1: 0.11.9 + '''.stripIndent() + + def tool2_version = ''' + TOOL2: + tool2: 1.9 + '''.stripIndent() + + input[0] = Channel.of(tool1_version, tool2_version).collectFile() + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + file(process.out.mqc_yml[0]).readLines()[0..10], + file(process.out.yml[0]).readLines()[0..7] + ).match() + } + ) + } + } +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap new file mode 100644 index 000000000..5f59a936d --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap @@ -0,0 +1,33 @@ +{ + "Should run without failures": { + "content": [ + [ + "versions.yml:md5,76d454d92244589d32455833f7c1ba6d" + ], + [ + "data: \"\\n\\n \\n \\n \\n \\n \\n \\n \\n\\", + " \\n\\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n \\n \\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n\\n\\n \\n\\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\" + ], + [ + "CUSTOM_DUMPSOFTWAREVERSIONS:", + " python: 3.11.7", + " yaml: 5.4.1", + "TOOL1:", + " tool1: 0.11.9", + "TOOL2:", + " tool2: '1.9'", + "Workflow:" + ] + ], + "timestamp": "2024-01-09T23:01:18.710682" + } +} \ No newline at end of file diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml new file mode 100644 index 000000000..405aa24ae --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml @@ -0,0 +1,2 @@ +custom/dumpsoftwareversions: + - modules/nf-core/custom/dumpsoftwareversions/** diff --git a/modules/nf-core/custom/gtffilter/environment.yml b/modules/nf-core/custom/gtffilter/environment.yml new file mode 100644 index 000000000..115f41235 --- /dev/null +++ b/modules/nf-core/custom/gtffilter/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "custom_gtffilter" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "conda-forge::python=3.9.5" diff --git a/modules/nf-core/custom/gtffilter/main.nf b/modules/nf-core/custom/gtffilter/main.nf new file mode 100644 index 000000000..b682ff8c5 --- /dev/null +++ b/modules/nf-core/custom/gtffilter/main.nf @@ -0,0 +1,37 @@ +process CUSTOM_GTFFILTER { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.9--1' : + 'biocontainers/python:3.9--1' }" + + input: + tuple val(meta), path(gtf) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path("${prefix}.${suffix}"), emit: gtf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "gtf" + (gtf.extension == 'gz' ? '.gz' : '') + template 'gtffilter.py' + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "gtf" + (gtf.extension == 'gz' ? '.gz' : '') + """ + touch ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version 2>&1 | cut -d ' ' -f 2) + END_VERSIONS + """ +} diff --git a/modules/nf-core/custom/gtffilter/meta.yml b/modules/nf-core/custom/gtffilter/meta.yml new file mode 100644 index 000000000..2c8692218 --- /dev/null +++ b/modules/nf-core/custom/gtffilter/meta.yml @@ -0,0 +1,51 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "custom_gtffilter" +description: Filter a gtf file to keep only regions that are located on a chromosome represented in a given fasta file +keywords: + - gtf + - fasta + - filter +tools: + - "gtffilter": + description: "Filter a gtf file to keep only regions that are located on a chromosome represented in a given fasta file" + tool_dev_url: "https://github.com/nf-core/modules/blob/master/modules/nf-core/custom/gtffilter/main.nf" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + + - gtf: + type: file + description: GTF file + pattern: "*.{gtf}" + + - fasta: + type: file + description: Genome fasta file + pattern: "*.{fasta,fa}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + + - gtf: + type: file + description: Filtered GTF file + pattern: "*.{gtf}" + + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@nictru" +maintainers: + - "@nictru" diff --git a/modules/nf-core/custom/gtffilter/templates/gtffilter.py b/modules/nf-core/custom/gtffilter/templates/gtffilter.py new file mode 100644 index 000000000..764ec2eff --- /dev/null +++ b/modules/nf-core/custom/gtffilter/templates/gtffilter.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python + +# Written by Olga Botvinnik with subsequent reworking by Jonathan Manning and Nico Trummer. + +# MIT License + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import logging +import re +import gzip +import statistics +import platform +from typing import Set + +# Create a logger +logging.basicConfig(format="%(name)s - %(asctime)s %(levelname)s: %(message)s") +logger = logging.getLogger("fasta_gtf_filter") +logger.setLevel(logging.INFO) + + +def format_yaml_like(data: dict, indent: int = 0) -> str: + """Formats a dictionary to a YAML-like string. + + Args: + data (dict): The dictionary to format. + indent (int): The current indentation level. + + Returns: + str: A string formatted as YAML. + """ + yaml_str = "" + for key, value in data.items(): + spaces = " " * indent + if isinstance(value, dict): + yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}" + else: + yaml_str += f"{spaces}{key}: {value}\\n" + return yaml_str + + +def extract_fasta_seq_names(fasta_name: str) -> Set[str]: + """Extracts the sequence names from a FASTA file.""" + + is_gz = fasta_name.endswith(".gz") + open_fn = gzip.open if is_gz else open + + with open_fn(fasta_name) as fasta: + sequences = set() + for line in fasta: + line = line.decode("utf-8") if is_gz else line + if line.startswith(">"): + sequences.add(line[1:].split(None, 1)[0]) + + return sequences + + +def tab_delimited(file: str) -> float: + """Check if file is tab-delimited and return median number of tabs.""" + with open(file, "r") as f: + data = f.read(102400) + return statistics.median(line.count("\\t") for line in data.split("\\n")) + + +def filter_gtf( + fasta: str, gtf_in: str, filtered_gtf_out: str, skip_transcript_id_check: bool +) -> None: + """Filter GTF file based on FASTA sequence names.""" + if tab_delimited(gtf_in) != 8: + raise ValueError("Invalid GTF file: Expected 9 tab-separated columns.") + + seq_names_in_genome = extract_fasta_seq_names(fasta) + logger.info(f"Extracted chromosome sequence names from {fasta}") + logger.debug( + "All sequence IDs from FASTA: " + ", ".join(sorted(seq_names_in_genome)) + ) + + seq_names_in_gtf = set() + try: + is_gz = gtf_in.endswith(".gz") + open_fn = gzip.open if is_gz else open + with open_fn(gtf_in) as gtf, open_fn(filtered_gtf_out, "wb" if is_gz else "w") as out: + line_count = 0 + for line in gtf: + line = line.decode("utf-8") if is_gz else line + seq_name = line.split("\\t")[0] + seq_names_in_gtf.add(seq_name) # Add sequence name to the set + + if seq_name in seq_names_in_genome: + if skip_transcript_id_check or re.search( + r'transcript_id "([^"]+)"', line + ): + out.write(line.encode() if is_gz else line) + line_count += 1 + + if line_count == 0: + raise ValueError("All GTF lines removed by filters") + + except IOError as e: + logger.error(f"File operation failed: {e}") + return + + logger.debug("All sequence IDs from GTF: " + ", ".join(sorted(seq_names_in_gtf))) + logger.info( + f"Extracted {line_count} matching sequences from {gtf_in} into {filtered_gtf_out}" + ) + + +filter_gtf("${fasta}", "${gtf}", "${prefix}.${suffix}", False) + +# Versions + +versions = {"${task.process}": {"python": platform.python_version()}} + +with open("versions.yml", "w") as f: + f.write(format_yaml_like(versions)) diff --git a/modules/nf-core/custom/gtffilter/tests/main.nf.test b/modules/nf-core/custom/gtffilter/tests/main.nf.test new file mode 100644 index 000000000..252d11a16 --- /dev/null +++ b/modules/nf-core/custom/gtffilter/tests/main.nf.test @@ -0,0 +1,115 @@ +nextflow_process { + + name "Test Process CUSTOM_GTFFILTER" + script "../main.nf" + process "CUSTOM_GTFFILTER" + + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "custom/gtffilter" + + test("test_custom_gtffilter") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true) + ] + input[1] = [ + [ id: 'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_custom_gtffilter_gzip") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true) + ] + input[1] = [ + [ id: 'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_custom_gtffilter - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true) + ] + input[1] = [ + [ id: 'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_custom_gtffilter_gzip - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true) + ] + input[1] = [ + [ id: 'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/custom/gtffilter/tests/main.nf.test.snap b/modules/nf-core/custom/gtffilter/tests/main.nf.test.snap new file mode 100644 index 000000000..787dd42e1 --- /dev/null +++ b/modules/nf-core/custom/gtffilter/tests/main.nf.test.snap @@ -0,0 +1,134 @@ +{ + "test_custom_gtffilter_gzip": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.gtf:md5,aa8b2aa1e0b5fbbba3b04d471e1b0535" + ] + ], + "1": [ + "versions.yml:md5,39c43040514c93566d2e3dca39e54cf2" + ], + "gtf": [ + [ + { + "id": "test" + }, + "test.gtf:md5,aa8b2aa1e0b5fbbba3b04d471e1b0535" + ] + ], + "versions": [ + "versions.yml:md5,39c43040514c93566d2e3dca39e54cf2" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-15T14:23:11.091273747" + }, + "test_custom_gtffilter": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.gtf:md5,aa8b2aa1e0b5fbbba3b04d471e1b0535" + ] + ], + "1": [ + "versions.yml:md5,39c43040514c93566d2e3dca39e54cf2" + ], + "gtf": [ + [ + { + "id": "test" + }, + "test.gtf:md5,aa8b2aa1e0b5fbbba3b04d471e1b0535" + ] + ], + "versions": [ + "versions.yml:md5,39c43040514c93566d2e3dca39e54cf2" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-15T14:23:03.654104046" + }, + "test_custom_gtffilter_gzip - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,4547ffaa530b6d65b2dd1f607d7f85e3" + ], + "gtf": [ + [ + { + "id": "test" + }, + "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,4547ffaa530b6d65b2dd1f607d7f85e3" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-15T14:23:24.216284615" + }, + "test_custom_gtffilter - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,4547ffaa530b6d65b2dd1f607d7f85e3" + ], + "gtf": [ + [ + { + "id": "test" + }, + "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,4547ffaa530b6d65b2dd1f607d7f85e3" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-15T14:23:17.765499066" + } +} \ No newline at end of file diff --git a/modules/nf-core/custom/gtffilter/tests/tags.yml b/modules/nf-core/custom/gtffilter/tests/tags.yml new file mode 100644 index 000000000..34dda2178 --- /dev/null +++ b/modules/nf-core/custom/gtffilter/tests/tags.yml @@ -0,0 +1,2 @@ +custom/gtffilter: + - "modules/nf-core/custom/gtffilter/**" diff --git a/modules/nf-core/custom/tx2gene/environment.yml b/modules/nf-core/custom/tx2gene/environment.yml new file mode 100644 index 000000000..a859dc881 --- /dev/null +++ b/modules/nf-core/custom/tx2gene/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "custom_tx2gene" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - python=3.9.5 diff --git a/modules/nf-core/custom/tx2gene/main.nf b/modules/nf-core/custom/tx2gene/main.nf new file mode 100644 index 000000000..99c00aa06 --- /dev/null +++ b/modules/nf-core/custom/tx2gene/main.nf @@ -0,0 +1,36 @@ +process CUSTOM_TX2GENE { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.9--1' : + 'biocontainers/python:3.9--1' }" + + input: + tuple val(meta), path(gtf) + tuple val(meta2), path ("quants/*") + val quant_type + val id + val extra + + output: + tuple val(meta), path("*tx2gene.tsv"), emit: tx2gene + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + template 'tx2gene.py' + + stub: + """ + touch ${meta.id}.tx2gene.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/custom/tx2gene/meta.yml b/modules/nf-core/custom/tx2gene/meta.yml new file mode 100644 index 000000000..d991bf1be --- /dev/null +++ b/modules/nf-core/custom/tx2gene/meta.yml @@ -0,0 +1,65 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "custom_tx2gene" +description: Make a transcript/gene mapping from a GTF and cross-reference with transcript quantifications. +keywords: + - gene + - gtf + - pseudoalignment + - transcript +tools: + - "custom": + description: | + "Custom module to create a transcript to gene mapping from a GTF and + check it against transcript quantifications" + tool_dev_url: "https://github.com/nf-core/modules/blob/master/modules/nf-core/custom/tx2gene/main.nf" + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing reference information related to the GTF file + e.g. `[ id:'yeast' ]` + - gtf: + type: file + description: An annotation file of the reference genome in GTF format + pattern: "*.gtf" + - meta2: + type: map + description: | + Groovy Map containing information related to the experiment as a whole + e.g. `[ id:'SRP123456' ]` + - quants: + type: directory + description: Paths to subdirectories corresponding to + sample-wise runs of Salmon or Kallisto + - quant_type: + type: string + description: Quantification type, 'kallisto' or 'salmon' + - id: + type: string + description: Gene ID attribute in the GTF file (default= gene_id) + - extra: + type: string + description: Extra gene attribute in the GTF file (default= gene_name) + +output: + - meta: + type: map + description: | + Groovy Map containing reference information related to the GTF file + e.g. `[ id:'yeast' ]` + - tx2gene: + type: file + description: A transcript/ gene mapping table in TSV format + pattern: "*.tx2gene.tsv" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@pinin4fjords" +maintainers: + - "@pinin4fjords" diff --git a/modules/nf-core/custom/tx2gene/templates/tx2gene.py b/modules/nf-core/custom/tx2gene/templates/tx2gene.py new file mode 100755 index 000000000..7fd0de64e --- /dev/null +++ b/modules/nf-core/custom/tx2gene/templates/tx2gene.py @@ -0,0 +1,187 @@ +#!/usr/bin/env python3 + +# Written by Lorena Pantano with subsequent reworking by Jonathan Manning. Released under the MIT license. + +import logging +import argparse +import glob +import os +import platform +import re +from collections import Counter, defaultdict, OrderedDict +from collections.abc import Set +from typing import Dict + +# Configure logging +logging.basicConfig(format="%(name)s - %(asctime)s %(levelname)s: %(message)s") +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +def format_yaml_like(data: dict, indent: int = 0) -> str: + """Formats a dictionary to a YAML-like string. + + Args: + data (dict): The dictionary to format. + indent (int): The current indentation level. + + Returns: + str: A string formatted as YAML. + """ + yaml_str = "" + for key, value in data.items(): + spaces = " " * indent + if isinstance(value, dict): + yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}" + else: + yaml_str += f"{spaces}{key}: {value}\\n" + return yaml_str + +def read_top_transcripts(quant_dir: str, file_pattern: str) -> Set[str]: + """ + Read the top 100 transcripts from the quantification file. + + Parameters: + quant_dir (str): Directory where quantification files are located. + file_pattern (str): Pattern to match quantification files. + + Returns: + set: A set containing the top 100 transcripts. + """ + try: + # Find the quantification file within the directory + quant_file_path = glob.glob(os.path.join(quant_dir, "*", file_pattern))[0] + with open(quant_file_path, "r") as file_handle: + # Read the file and extract the top 100 transcripts + return {line.split()[0] for i, line in enumerate(file_handle) if i > 0 and i <= 100} + except IndexError: + # Log an error and raise a FileNotFoundError if the quant file does not exist + logger.error("No quantification files found.") + raise FileNotFoundError("Quantification file not found.") + + +def discover_transcript_attribute(gtf_file: str, transcripts: Set[str]) -> str: + """ + Discover the attribute in the GTF that corresponds to transcripts, prioritizing 'transcript_id'. + + Parameters: + gtf_file (str): Path to the GTF file. + transcripts (Set[str]): A set of transcripts to match in the GTF file. + + Returns: + str: The attribute name that corresponds to transcripts in the GTF file. + """ + + votes = Counter() + with open(gtf_file) as inh: + # Read GTF file, skipping header lines + for line in filter(lambda x: not x.startswith("#"), inh): + cols = line.split("\\t") + + # Use regular expression to correctly split the attributes string + attributes_str = cols[8] + attributes = dict(re.findall(r'(\\S+) "(.*?)(? Dict[str, str]: + """ + Parse the attributes column of a GTF file. + + :param attributes_text: The attributes column as a string. + :return: A dictionary of the attributes. + """ + # Split the attributes string by semicolon and strip whitespace + attributes = attributes_text.strip().split(";") + attr_dict = OrderedDict() + + # Iterate over each attribute pair + for attribute in attributes: + # Split the attribute into key and value, ensuring there are two parts + parts = attribute.strip().split(" ", 1) + if len(parts) == 2: + key, value = parts + # Remove any double quotes from the value + value = value.replace('"', "") + attr_dict[key] = value + + return attr_dict + + +def map_transcripts_to_gene( + quant_type: str, gtf_file: str, quant_dir: str, gene_id: str, extra_id_field: str, output_file: str +) -> bool: + """ + Map transcripts to gene names and write the output to a file. + + Parameters: + quant_type (str): The quantification method used (e.g., 'salmon'). + gtf_file (str): Path to the GTF file. + quant_dir (str): Directory where quantification files are located. + gene_id (str): The gene ID attribute in the GTF file. + extra_id_field (str): Additional ID field in the GTF file. + output_file (str): The output file path. + + Returns: + bool: True if the operation was successful, False otherwise. + """ + # Read the top transcripts based on quantification type + transcripts = read_top_transcripts(quant_dir, "quant.sf" if quant_type == "salmon" else "abundance.tsv") + # Discover the attribute that corresponds to transcripts in the GTF + transcript_attribute = discover_transcript_attribute(gtf_file, transcripts) + + # Open GTF and output file to write the mappings + # Initialize the set to track seen combinations + seen = set() + + with open(gtf_file) as inh, open(output_file, "w") as output_handle: + output_handle.write(f"{transcript_attribute}\\t{gene_id}\\t{extra_id_field}\\n") + # Parse each line of the GTF, mapping transcripts to genes + for line in filter(lambda x: not x.startswith("#"), inh): + cols = line.split("\\t") + attr_dict = parse_attributes(cols[8]) + if gene_id in attr_dict and transcript_attribute in attr_dict: + # Create a unique identifier for the transcript-gene combination + transcript_gene_pair = (attr_dict[transcript_attribute], attr_dict[gene_id]) + + # Check if the combination has already been seen + if transcript_gene_pair not in seen: + # If it's a new combination, write it to the output and add to the seen set + extra_id = attr_dict.get(extra_id_field, attr_dict[gene_id]) + output_handle.write(f"{attr_dict[transcript_attribute]}\\t{attr_dict[gene_id]}\\t{extra_id}\\n") + seen.add(transcript_gene_pair) + + return True + + +# Main function to parse arguments and call the mapping function +if __name__ == "__main__": + if '${task.ext.prefix}' != "null": + prefix = "${task.ext.prefix}." + elif '$meta.id' != "null": + prefix = '${meta.id}.' + else: + prefix = '' + + if not map_transcripts_to_gene('$quant_type', '$gtf', 'quants', '$id', '$extra', f"{prefix}tx2gene.tsv"): + logger.error("Failed to map transcripts to genes.") + + # Write the versions + versions_this_module = {} + versions_this_module["${task.process}"] = {"python": platform.python_version()} + with open("versions.yml", "w") as f: + f.write(format_yaml_like(versions_this_module)) diff --git a/modules/nf-core/custom/tx2gene/tests/main.nf.test b/modules/nf-core/custom/tx2gene/tests/main.nf.test new file mode 100644 index 000000000..b15592798 --- /dev/null +++ b/modules/nf-core/custom/tx2gene/tests/main.nf.test @@ -0,0 +1,81 @@ +nextflow_process { + + name "Test Process CUSTOM_TX2GENE" + script "../main.nf" + process "CUSTOM_TX2GENE" + + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "custom/tx2gene" + tag "untar" + + setup { + + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/eukaryotes/saccharomyces_cerevisiae/kallisto_results.tar.gz', checkIfExists: true) + ]) + """ + } + } + } + + test("saccharomyces_cerevisiae - gtf") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/eukaryotes/saccharomyces_cerevisiae/genome_gfp.gtf', checkIfExists: true) + ]) + input[1] = UNTAR.out.untar.map { meta, dir -> [ meta, dir.listFiles().collect() ] } + input[2] = 'kallisto' + input[3] = 'gene_id' + input[4] = 'gene_name' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.tx2gene).match('tx2gene') }, + { assert snapshot(process.out.versions).match('versions') } + ) + } + } + + test("saccharomyces_cerevisiae - gtf - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/eukaryotes/saccharomyces_cerevisiae/genome_gfp.gtf', checkIfExists: true) + ]) + input[1] = UNTAR.out.untar.map { meta, dir -> [ meta, dir.listFiles().collect() ] } + input[2] = 'kallisto' + input[3] = 'gene_id' + input[4] = 'gene_name' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.tx2gene).match('tx2gene - stub') }, + { assert snapshot(process.out.versions).match('versions - stub') } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/custom/tx2gene/tests/main.nf.test.snap b/modules/nf-core/custom/tx2gene/tests/main.nf.test.snap new file mode 100644 index 000000000..1e76e10d6 --- /dev/null +++ b/modules/nf-core/custom/tx2gene/tests/main.nf.test.snap @@ -0,0 +1,60 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,fb8145d7fbc6043ba031249b23ecda50" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T13:14:18.218251" + }, + "tx2gene": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.tx2gene.tsv:md5,0e2418a69d2eba45097ebffc2f700bfe" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T13:14:18.21054" + }, + "tx2gene - stub": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.tx2gene.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T13:14:25.915434" + }, + "versions - stub": { + "content": [ + [ + "versions.yml:md5,5613eefbca41377128f1d8dc09b9fb60" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T13:14:25.919243" + } +} \ No newline at end of file diff --git a/modules/nf-core/custom/tx2gene/tests/tags.yml b/modules/nf-core/custom/tx2gene/tests/tags.yml new file mode 100644 index 000000000..493fbc3b1 --- /dev/null +++ b/modules/nf-core/custom/tx2gene/tests/tags.yml @@ -0,0 +1,2 @@ +custom/tx2gene: + - "modules/nf-core/custom/tx2gene/**" diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml new file mode 100644 index 000000000..1787b38a9 --- /dev/null +++ b/modules/nf-core/fastqc/environment.yml @@ -0,0 +1,7 @@ +name: fastqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fastqc=0.12.1 diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 47fd0e584..d79f1c862 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -2,10 +2,10 @@ process FASTQC { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::fastqc=0.11.9" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'quay.io/biocontainers/fastqc:0.11.9--0' }" + 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : + 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" input: tuple val(meta), path(reads) @@ -25,15 +25,25 @@ process FASTQC { def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } def rename_to = old_new_pairs*.join(' ').join(' ') def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') + + def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB') + // FastQC memory value allowed range (100 - 10000) + def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb) + """ printf "%s %s\\n" $rename_to | while read old_name new_name; do [ -f "\${new_name}" ] || ln -s \$old_name \$new_name done - fastqc $args --threads $task.cpus $renamed_files + + fastqc \\ + $args \\ + --threads $task.cpus \\ + --memory $fastqc_memory \\ + $renamed_files cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ @@ -45,7 +55,7 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ } diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml index 4da5bb5a0..ee5507e06 100644 --- a/modules/nf-core/fastqc/meta.yml +++ b/modules/nf-core/fastqc/meta.yml @@ -50,3 +50,8 @@ authors: - "@grst" - "@ewels" - "@FelixKrueger" +maintainers: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test new file mode 100644 index 000000000..70edae4d9 --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -0,0 +1,212 @@ +nextflow_process { + + name "Test Process FASTQC" + script "../main.nf" + process "FASTQC" + + tag "modules" + tag "modules_nfcore" + tag "fastqc" + + test("sarscov2 single-end [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id: 'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. + // looks like this:
    Mon 2 Oct 2023
    test.gz
    + // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_single") } + ) + } + } + + test("sarscov2 paired-end [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("") }, + { assert path(process.out.html[0][1][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_paired") } + ) + } + } + + test("sarscov2 interleaved [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_interleaved") } + ) + } + } + + test("sarscov2 paired-end [bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_bam") } + ) + } + } + + test("sarscov2 multiple [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, + { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, + { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("") }, + { assert path(process.out.html[0][1][1]).text.contains("") }, + { assert path(process.out.html[0][1][2]).text.contains("") }, + { assert path(process.out.html[0][1][3]).text.contains("") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_multiple") } + ) + } + } + + test("sarscov2 custom_prefix") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'mysample', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_custom_prefix") } + ) + } + } + + test("sarscov2 single-end [fastq] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id: 'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.html.collect { file(it[1]).getName() } + + process.out.zip.collect { file(it[1]).getName() } + + process.out.versions ).match("fastqc_stub") } + ) + } + } + +} diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap new file mode 100644 index 000000000..86f7c3115 --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -0,0 +1,88 @@ +{ + "fastqc_versions_interleaved": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:07.293713" + }, + "fastqc_stub": { + "content": [ + [ + "test.html", + "test.zip", + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:31:01.425198" + }, + "fastqc_versions_multiple": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:55.797907" + }, + "fastqc_versions_bam": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:26.795862" + }, + "fastqc_versions_single": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:39:27.043675" + }, + "fastqc_versions_paired": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:39:47.584191" + }, + "fastqc_versions_custom_prefix": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:41:14.576531" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastqc/tests/tags.yml b/modules/nf-core/fastqc/tests/tags.yml new file mode 100644 index 000000000..7834294ba --- /dev/null +++ b/modules/nf-core/fastqc/tests/tags.yml @@ -0,0 +1,2 @@ +fastqc: + - modules/nf-core/fastqc/** diff --git a/modules/nf-core/gawk/environment.yml b/modules/nf-core/gawk/environment.yml new file mode 100644 index 000000000..3d98a08b0 --- /dev/null +++ b/modules/nf-core/gawk/environment.yml @@ -0,0 +1,7 @@ +name: gawk +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::gawk=5.3.0 diff --git a/modules/nf-core/gawk/main.nf b/modules/nf-core/gawk/main.nf new file mode 100644 index 000000000..ca4689297 --- /dev/null +++ b/modules/nf-core/gawk/main.nf @@ -0,0 +1,55 @@ +process GAWK { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gawk:5.3.0' : + 'biocontainers/gawk:5.3.0' }" + + input: + tuple val(meta), path(input) + path(program_file) + + output: + tuple val(meta), path("${prefix}.${suffix}"), emit: output + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' // args is used for the main arguments of the tool + def args2 = task.ext.args2 ?: '' // args2 is used to specify a program when no program file has been given + prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "${input.getExtension()}" + + program = program_file ? "-f ${program_file}" : "${args2}" + + """ + awk \\ + ${args} \\ + ${program} \\ + ${input} \\ + > ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "${input.getExtension()}" + def create_cmd = suffix.endsWith("gz") ? "echo '' | gzip >" : "touch" + + """ + ${create_cmd} ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gawk/meta.yml b/modules/nf-core/gawk/meta.yml new file mode 100644 index 000000000..2b6033b0b --- /dev/null +++ b/modules/nf-core/gawk/meta.yml @@ -0,0 +1,50 @@ +name: "gawk" +description: | + If you are like many computer users, you would frequently like to make changes in various text files + wherever certain patterns appear, or extract data from parts of certain lines while discarding the rest. + The job is easy with awk, especially the GNU implementation gawk. +keywords: + - gawk + - awk + - txt + - text + - file parsing +tools: + - "gawk": + description: "GNU awk" + homepage: "https://www.gnu.org/software/gawk/" + documentation: "https://www.gnu.org/software/gawk/manual/" + tool_dev_url: "https://www.gnu.org/prep/ftp.html" + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: The input file - Specify the logic that needs to be executed on this file on the `ext.args2` or in the program file + pattern: "*" + - program_file: + type: file + description: Optional file containing logic for awk to execute. If you don't wish to use a file, you can use `ext.args2` to specify the logic. + pattern: "*" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - output: + type: file + description: The output file - specify the name of this file using `ext.prefix` and the extension using `ext.suffix` + pattern: "*" +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/modules/nf-core/gawk/tests/main.nf.test b/modules/nf-core/gawk/tests/main.nf.test new file mode 100644 index 000000000..fce82ca95 --- /dev/null +++ b/modules/nf-core/gawk/tests/main.nf.test @@ -0,0 +1,56 @@ +nextflow_process { + + name "Test Process GAWK" + script "../main.nf" + process "GAWK" + + tag "modules" + tag "modules_nfcore" + tag "gawk" + + test("convert fasta to bed") { + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("convert fasta to bed with program file") { + config "./nextflow_with_program_file.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[1] = Channel.of('BEGIN {FS="\t"}; {print \$1 FS "0" FS \$2}').collectFile(name:"program.txt") + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/gawk/tests/main.nf.test.snap b/modules/nf-core/gawk/tests/main.nf.test.snap new file mode 100644 index 000000000..4f3a759c6 --- /dev/null +++ b/modules/nf-core/gawk/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "convert fasta to bed with program file": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "1": [ + "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + ], + "output": [ + [ + { + "id": "test" + }, + "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "versions": [ + "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.03.0" + }, + "timestamp": "2024-05-17T15:20:02.495430346" + }, + "convert fasta to bed": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "1": [ + "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + ], + "output": [ + [ + { + "id": "test" + }, + "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "versions": [ + "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.03.0" + }, + "timestamp": "2024-05-17T15:19:53.291809648" + } +} \ No newline at end of file diff --git a/modules/nf-core/gawk/tests/nextflow.config b/modules/nf-core/gawk/tests/nextflow.config new file mode 100644 index 000000000..6e5d43a35 --- /dev/null +++ b/modules/nf-core/gawk/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + withName: GAWK { + ext.suffix = "bed" + ext.args2 = '\'BEGIN {FS="\t"}; {print \$1 FS "0" FS \$2}\'' + } +} diff --git a/modules/nf-core/gawk/tests/nextflow_with_program_file.config b/modules/nf-core/gawk/tests/nextflow_with_program_file.config new file mode 100644 index 000000000..693ad4196 --- /dev/null +++ b/modules/nf-core/gawk/tests/nextflow_with_program_file.config @@ -0,0 +1,5 @@ +process { + withName: GAWK { + ext.suffix = "bed" + } +} diff --git a/modules/nf-core/gawk/tests/tags.yml b/modules/nf-core/gawk/tests/tags.yml new file mode 100644 index 000000000..72e4531d2 --- /dev/null +++ b/modules/nf-core/gawk/tests/tags.yml @@ -0,0 +1,2 @@ +gawk: + - "modules/nf-core/gawk/**" diff --git a/modules/nf-core/gnu/sort/environment.yml b/modules/nf-core/gnu/sort/environment.yml new file mode 100644 index 000000000..eb9b77edd --- /dev/null +++ b/modules/nf-core/gnu/sort/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: gnu_sort +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::coreutils=9.3 diff --git a/modules/nf-core/gnu/sort/main.nf b/modules/nf-core/gnu/sort/main.nf new file mode 100644 index 000000000..e1167666f --- /dev/null +++ b/modules/nf-core/gnu/sort/main.nf @@ -0,0 +1,51 @@ +process GNU_SORT { + tag "$meta.id" + label "process_low" + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/coreutils:9.3': + 'biocontainers/coreutils:9.3' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), file( "${output_file}" ) , emit: sorted + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "${input.extension}" + output_file = "${prefix}.${suffix}" + def VERSION = "9.3" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + if ("$input" == "$output_file") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + sort ${args} ${input} > ${output_file} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + coreutils: $VERSION + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "${input.extension}" + output_file = "${prefix}.${suffix}" + def VERSION = "9.3" + + if ("$input" == "$output_file") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + touch ${output_file} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + coreutils: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/gnu/sort/meta.yml b/modules/nf-core/gnu/sort/meta.yml new file mode 100644 index 000000000..9d961750c --- /dev/null +++ b/modules/nf-core/gnu/sort/meta.yml @@ -0,0 +1,41 @@ +name: "gnu_sort" +description: | + Writes a sorted concatenation of file/s +keywords: + - GNU + - sort + - merge compare +tools: + - sort: + description: "Writes a sorted concatenation of file/s" + homepage: "https://github.com/vgl-hub/gfastats" + documentation: "https://www.gnu.org/software/coreutils/manual/html_node/sort-invocation.html" + licence: ["GPL"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: Draft assembly file + pattern: "*.{txt,bed,interval,genome,bins}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - sorted: + type: file + description: The sorted txt file generated by sort + pattern: "*.{txt,bed,interval,genome,bins}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@DLBPointon" +maintainers: + - "@DLBPointon" diff --git a/modules/nf-core/gnu/sort/tests/main.nf.test b/modules/nf-core/gnu/sort/tests/main.nf.test new file mode 100644 index 000000000..e40301871 --- /dev/null +++ b/modules/nf-core/gnu/sort/tests/main.nf.test @@ -0,0 +1,120 @@ +nextflow_process { + + name "Test Process GNU_SORT" + script "modules/nf-core/gnu/sort/main.nf" + process "GNU_SORT" + + tag "modules" + tag "modules_nfcore" + tag "gnu" + tag "gnu/sort" + + test("unsorted_genome_sort") { + config "./sort_simple_bed.config" + + when { + process { + """ + input[0] = [ + [id:'genome_test'], + file(params.test_data['generic']['unsorted_data']['unsorted_text']['genome_file'], + checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.sorted[0][1]).name + ).match("genome_sort") + } + ) + } + + } + + test("unsorted_intervals_sort") { + config "./sort_simple_bed.config" + when { + process { + """ + input[0] = [ + [id:'test'], + file(params.test_data['generic']['unsorted_data']['unsorted_text']['intervals'], + checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.sorted[0][1]).name + ).match("interval_sort") + } + ) + } + + } + + test("unsorted_csv_sort") { + config "./sort_complex.config" + + when { + process { + """ + input[0] = [ + [id:'test'], + file(params.test_data['generic']['unsorted_data']['unsorted_text']['numbers_csv'], + checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.sorted[0][1]).name + ).match("csv_sort") + } + ) + } + + } + + test("unsorted_csv_sort_stub") { + config "./sort_complex.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [id:'test'], + file(params.test_data['generic']['unsorted_data']['unsorted_text']['numbers_csv'], + checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + + } + +} diff --git a/modules/nf-core/gnu/sort/tests/main.nf.test.snap b/modules/nf-core/gnu/sort/tests/main.nf.test.snap new file mode 100644 index 000000000..63891bc4b --- /dev/null +++ b/modules/nf-core/gnu/sort/tests/main.nf.test.snap @@ -0,0 +1,164 @@ +{ + "unsorted_csv_sort": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.csv.sorted:md5,0b52d1b4c4a0c6e972c6f94aafd75a1d" + ] + ], + "1": [ + "versions.yml:md5,dd412503ec9dd665203e083ea44326cb" + ], + "sorted": [ + [ + { + "id": "test" + }, + "test.csv.sorted:md5,0b52d1b4c4a0c6e972c6f94aafd75a1d" + ] + ], + "versions": [ + "versions.yml:md5,dd412503ec9dd665203e083ea44326cb" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-14T11:13:44.714632791" + }, + "interval_sort": { + "content": [ + "test.bed.sorted" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-14T11:13:37.962807086" + }, + "unsorted_csv_sort_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.csv.sorted:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,dd412503ec9dd665203e083ea44326cb" + ], + "sorted": [ + [ + { + "id": "test" + }, + "test.csv.sorted:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,dd412503ec9dd665203e083ea44326cb" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-14T11:13:51.456258705" + }, + "csv_sort": { + "content": [ + "test.csv.sorted" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-14T11:13:44.725431761" + }, + "unsorted_genome_sort": { + "content": [ + { + "0": [ + [ + { + "id": "genome_test" + }, + "genome_test.bed.sorted:md5,fd97f7efafdbbfa71d9b560f10b4b048" + ] + ], + "1": [ + "versions.yml:md5,dd412503ec9dd665203e083ea44326cb" + ], + "sorted": [ + [ + { + "id": "genome_test" + }, + "genome_test.bed.sorted:md5,fd97f7efafdbbfa71d9b560f10b4b048" + ] + ], + "versions": [ + "versions.yml:md5,dd412503ec9dd665203e083ea44326cb" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-14T11:13:31.041778719" + }, + "genome_sort": { + "content": [ + "genome_test.bed.sorted" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-14T11:13:31.060201722" + }, + "unsorted_intervals_sort": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bed.sorted:md5,abbce903ef263d38b2f71856387799ab" + ] + ], + "1": [ + "versions.yml:md5,dd412503ec9dd665203e083ea44326cb" + ], + "sorted": [ + [ + { + "id": "test" + }, + "test.bed.sorted:md5,abbce903ef263d38b2f71856387799ab" + ] + ], + "versions": [ + "versions.yml:md5,dd412503ec9dd665203e083ea44326cb" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-14T11:13:37.951397547" + } +} \ No newline at end of file diff --git a/modules/nf-core/gnu/sort/tests/sort_complex.config b/modules/nf-core/gnu/sort/tests/sort_complex.config new file mode 100644 index 000000000..103eaaf6f --- /dev/null +++ b/modules/nf-core/gnu/sort/tests/sort_complex.config @@ -0,0 +1,6 @@ +process { + withName: GNU_SORT { + ext.args = { "-t ';' -g -k 1,1 -k 2,2" } + ext.suffix = { "csv.sorted" } + } +} \ No newline at end of file diff --git a/modules/nf-core/gnu/sort/tests/sort_simple_bed.config b/modules/nf-core/gnu/sort/tests/sort_simple_bed.config new file mode 100644 index 000000000..d7d52e0f2 --- /dev/null +++ b/modules/nf-core/gnu/sort/tests/sort_simple_bed.config @@ -0,0 +1,6 @@ +process { + withName: GNU_SORT { + ext.args = { "-k1,1 -k2,2n" } + ext.suffix = { "bed.sorted" } + } +} \ No newline at end of file diff --git a/modules/nf-core/gnu/sort/tests/sort_simple_genome.config b/modules/nf-core/gnu/sort/tests/sort_simple_genome.config new file mode 100644 index 000000000..4dcec3855 --- /dev/null +++ b/modules/nf-core/gnu/sort/tests/sort_simple_genome.config @@ -0,0 +1,6 @@ +process { + withName: GNU_SORT { + ext.args = { "-k1,1 -k2,2n" } + ext.suffix = { "genome.sorted" } + } +} \ No newline at end of file diff --git a/modules/nf-core/gnu/sort/tests/tags.yml b/modules/nf-core/gnu/sort/tests/tags.yml new file mode 100644 index 000000000..ac40e376d --- /dev/null +++ b/modules/nf-core/gnu/sort/tests/tags.yml @@ -0,0 +1,2 @@ +gnu/sort: + - "modules/nf-core/gnu/sort/**" diff --git a/modules/nf-core/hisat2/align/environment.yml b/modules/nf-core/hisat2/align/environment.yml new file mode 100644 index 000000000..0c1415f94 --- /dev/null +++ b/modules/nf-core/hisat2/align/environment.yml @@ -0,0 +1,8 @@ +name: hisat2_align +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hisat2=2.2.1 + - bioconda::samtools=1.16.1 diff --git a/modules/nf-core/hisat2/align/main.nf b/modules/nf-core/hisat2/align/main.nf index 697177295..2289a9fc0 100644 --- a/modules/nf-core/hisat2/align/main.nf +++ b/modules/nf-core/hisat2/align/main.nf @@ -3,15 +3,15 @@ process HISAT2_ALIGN { label 'process_high' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - conda (params.enable_conda ? "bioconda::hisat2=2.2.1 bioconda::samtools=1.16.1" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1:2cdf6bf1e92acbeb9b2834b1c58754167173a410-0' : - 'quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1:2cdf6bf1e92acbeb9b2834b1c58754167173a410-0' }" + 'biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1:2cdf6bf1e92acbeb9b2834b1c58754167173a410-0' }" input: tuple val(meta), path(reads) - path index - path splicesites + tuple val(meta2), path(index) + tuple val(meta3), path(splicesites) output: tuple val(meta), path("*.bam") , emit: bam @@ -33,6 +33,7 @@ process HISAT2_ALIGN { } else if (meta.strandedness == 'reverse') { strandedness = meta.single_end ? '--rna-strandness R' : '--rna-strandness RF' } + ss = "$splicesites" ? "--known-splicesite-infile $splicesites" : '' def seq_center = params.seq_center ? "--rg-id ${prefix} --rg SM:$prefix --rg CN:${params.seq_center.replaceAll('\\s','_')}" : "--rg-id ${prefix} --rg SM:$prefix" if (meta.single_end) { def unaligned = params.save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : '' @@ -42,7 +43,7 @@ process HISAT2_ALIGN { -x \$INDEX \\ -U $reads \\ $strandedness \\ - --known-splicesite-infile $splicesites \\ + $ss \\ --summary-file ${prefix}.hisat2.summary.log \\ --threads $task.cpus \\ $seq_center \\ @@ -65,7 +66,7 @@ process HISAT2_ALIGN { -1 ${reads[0]} \\ -2 ${reads[1]} \\ $strandedness \\ - --known-splicesite-infile $splicesites \\ + $ss \\ --summary-file ${prefix}.hisat2.summary.log \\ --threads $task.cpus \\ $seq_center \\ diff --git a/modules/nf-core/hisat2/align/meta.yml b/modules/nf-core/hisat2/align/meta.yml index 7550aefab..b23eab75b 100644 --- a/modules/nf-core/hisat2/align/meta.yml +++ b/modules/nf-core/hisat2/align/meta.yml @@ -5,7 +5,6 @@ keywords: - fasta - genome - reference - tools: - hisat2: description: HISAT2 is a fast and sensitive alignment program for mapping next-generation sequencing reads (both DNA and RNA) to a population of human genomes as well as to a single reference genome. @@ -13,7 +12,6 @@ tools: documentation: https://daehwankimlab.github.io/hisat2/manual/ doi: "10.1038/s41587-019-0201-4" licence: ["MIT"] - input: - meta: type: map @@ -25,15 +23,24 @@ input: description: | List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively. + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - index: type: file description: HISAT2 genome index file pattern: "*.ht2" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - splicesites: type: file description: Splices sites in gtf file pattern: "*.{txt}" - output: - meta: type: map @@ -52,6 +59,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@ntoda03" + - "@ramprasadn" +maintainers: + - "@ntoda03" + - "@ramprasadn" diff --git a/modules/nf-core/hisat2/align/tests/main.nf.test b/modules/nf-core/hisat2/align/tests/main.nf.test new file mode 100644 index 000000000..3a520e9a0 --- /dev/null +++ b/modules/nf-core/hisat2/align/tests/main.nf.test @@ -0,0 +1,218 @@ +nextflow_process { + + name "Test Process HISAT2_ALIGN" + script "../main.nf" + process "HISAT2_ALIGN" + tag "modules" + tag "modules_nfcore" + tag "hisat2" + tag "hisat2/align" + tag "hisat2/build" + tag "hisat2/extractsplicesites" + + test("Single-End") { + + setup { + run("HISAT2_EXTRACTSPLICESITES") { + script "../../extractsplicesites/main.nf" + process { + """ + input[0] = Channel.of([ + [id:'genome'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true) + ]) + """ + } + } + + run("HISAT2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = Channel.of([ + [id:'genome'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[1] = Channel.of([ [id:'genome'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true) + ]) + input[2] = HISAT2_EXTRACTSPLICESITES.out.txt + """ + } + } + } + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + input[1] = HISAT2_BUILD.out.index + input[2] = HISAT2_EXTRACTSPLICESITES.out.txt + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.summary).match("se_summary") }, + { assert snapshot(process.out.fastq).match("se_fastq") }, + { assert snapshot(process.out.versions).match("se_versions") } + ) + } + } + + test("Paired-End") { + + setup { + run("HISAT2_EXTRACTSPLICESITES") { + script "../../extractsplicesites/main.nf" + process { + """ + input[0] = Channel.of([ + [id:'genome'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true) + ]) + """ + } + } + + run("HISAT2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = Channel.of([ + [id:'genome'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[1] = Channel.of([ [id:'genome'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true) + ]) + input[2] = HISAT2_EXTRACTSPLICESITES.out.txt + """ + } + } + } + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = HISAT2_BUILD.out.index + input[2] = HISAT2_EXTRACTSPLICESITES.out.txt + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.summary).match("pe_summary") }, + { assert snapshot(process.out.fastq).match("pe_fastq") }, + { assert snapshot(process.out.versions).match("pe_versions") } + ) + } + } + + test("Single-End No Splice Sites") { + + setup { + run("HISAT2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = Channel.of([ + [id:'genome'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[1] = [[:],[]] + input[2] = [[:],[]] + """ + } + } + } + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = HISAT2_BUILD.out.index + input[2] = [[:],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.summary).match("se_no_ss_summary") }, + { assert snapshot(process.out.fastq).match("se_no_ss_fastq") }, + { assert snapshot(process.out.versions).match("se_no_ss_versions") } + ) + } + } + + test("Paired-End No Splice Sites") { + + setup { + run("HISAT2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = Channel.of([ + [id:'genome'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[1] = [[:],[]] + input[2] = [[:],[]] + """ + } + } + } + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = HISAT2_BUILD.out.index + input[2] = [[:],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.summary).match("pe_no_ss_summary") }, + { assert snapshot(process.out.fastq).match("pe_no_ss_fastq") }, + { assert snapshot(process.out.versions).match("pe_no_ss_versions") } + ) + } + } +} diff --git a/modules/nf-core/hisat2/align/tests/main.nf.test.snap b/modules/nf-core/hisat2/align/tests/main.nf.test.snap new file mode 100644 index 000000000..a80fa3c50 --- /dev/null +++ b/modules/nf-core/hisat2/align/tests/main.nf.test.snap @@ -0,0 +1,122 @@ +{ + "se_versions": { + "content": [ + [ + "versions.yml:md5,ceb638f44ebdaf09ba1f5c5c409585e2" + ] + ], + "timestamp": "2023-10-16T15:14:50.269895296" + }, + "se_no_ss_summary": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.hisat2.summary.log:md5,7b8a9e61b7646da1089b041333c41a87" + ] + ] + ], + "timestamp": "2023-10-16T15:15:22.897386626" + }, + "pe_no_ss_versions": { + "content": [ + [ + "versions.yml:md5,ceb638f44ebdaf09ba1f5c5c409585e2" + ] + ], + "timestamp": "2023-10-16T15:15:42.583699978" + }, + "se_no_ss_versions": { + "content": [ + [ + "versions.yml:md5,ceb638f44ebdaf09ba1f5c5c409585e2" + ] + ], + "timestamp": "2023-10-16T15:15:22.909407356" + }, + "pe_no_ss_summary": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.hisat2.summary.log:md5,9839b31db795958cc4b70711a3414e9c" + ] + ] + ], + "timestamp": "2023-10-16T15:15:42.569775538" + }, + "pe_no_ss_fastq": { + "content": [ + [ + + ] + ], + "timestamp": "2023-10-16T15:15:42.576881608" + }, + "se_summary": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.hisat2.summary.log:md5,7b8a9e61b7646da1089b041333c41a87" + ] + ] + ], + "timestamp": "2023-10-16T15:14:50.252466896" + }, + "pe_summary": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.hisat2.summary.log:md5,9839b31db795958cc4b70711a3414e9c" + ] + ] + ], + "timestamp": "2023-10-16T15:15:09.881690889" + }, + "pe_fastq": { + "content": [ + [ + + ] + ], + "timestamp": "2023-10-16T15:15:09.888696129" + }, + "se_no_ss_fastq": { + "content": [ + [ + + ] + ], + "timestamp": "2023-10-16T15:15:22.904010016" + }, + "se_fastq": { + "content": [ + [ + + ] + ], + "timestamp": "2023-10-16T15:14:50.264366105" + }, + "pe_versions": { + "content": [ + [ + "versions.yml:md5,ceb638f44ebdaf09ba1f5c5c409585e2" + ] + ], + "timestamp": "2023-10-16T15:15:09.894683308" + } +} \ No newline at end of file diff --git a/modules/nf-core/hisat2/align/tests/tags.yml b/modules/nf-core/hisat2/align/tests/tags.yml new file mode 100644 index 000000000..3a46cc896 --- /dev/null +++ b/modules/nf-core/hisat2/align/tests/tags.yml @@ -0,0 +1,4 @@ +hisat2/align: + - modules/nf-core/hisat2/align/** + - modules/nf-core/hisat2/build/** + - modules/nf-core/hisat2/extractsplicesites/** diff --git a/modules/nf-core/hisat2/build/environment.yml b/modules/nf-core/hisat2/build/environment.yml new file mode 100644 index 000000000..2e67cd3ea --- /dev/null +++ b/modules/nf-core/hisat2/build/environment.yml @@ -0,0 +1,7 @@ +name: hisat2_build +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hisat2=2.2.1 diff --git a/modules/nf-core/hisat2/build/main.nf b/modules/nf-core/hisat2/build/main.nf index fee4064ce..766e8731d 100644 --- a/modules/nf-core/hisat2/build/main.nf +++ b/modules/nf-core/hisat2/build/main.nf @@ -4,19 +4,19 @@ process HISAT2_BUILD { label 'process_high_memory' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - conda (params.enable_conda ? 'bioconda::hisat2=2.2.1' : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hisat2:2.2.1--h1b792b2_3' : - 'quay.io/biocontainers/hisat2:2.2.1--h1b792b2_3' }" + 'biocontainers/hisat2:2.2.1--h1b792b2_3' }" input: - path fasta - path gtf - path splicesites + tuple val(meta), path(fasta) + tuple val(meta2), path(gtf) + tuple val(meta3), path(splicesites) output: - path "hisat2" , emit: index - path "versions.yml" , emit: versions + tuple val(meta), path("hisat2") , emit: index + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -37,9 +37,9 @@ process HISAT2_BUILD { def hisat2_build_memory = params.hisat2_build_memory ? (params.hisat2_build_memory as nextflow.util.MemoryUnit).toGiga() : 0 if (avail_mem >= hisat2_build_memory) { log.info "[HISAT2 index build] At least ${hisat2_build_memory} GB available, so using splice sites and exons to build HISAT2 index" - extract_exons = "hisat2_extract_exons.py $gtf > ${gtf.baseName}.exons.txt" - ss = "--ss $splicesites" - exon = "--exon ${gtf.baseName}.exons.txt" + extract_exons = gtf ? "hisat2_extract_exons.py $gtf > ${gtf.baseName}.exons.txt" : "" + ss = splicesites ? "--ss $splicesites" : "" + exon = gtf ? "--exon ${gtf.baseName}.exons.txt" : "" } else { log.info "[HISAT2 index build] Less than ${hisat2_build_memory} GB available, so NOT using splice sites and exons to build HISAT2 index." log.info "[HISAT2 index build] Use --hisat2_build_memory [small number] to skip this check." diff --git a/modules/nf-core/hisat2/build/meta.yml b/modules/nf-core/hisat2/build/meta.yml index a2e1fd67e..6c28eb21c 100644 --- a/modules/nf-core/hisat2/build/meta.yml +++ b/modules/nf-core/hisat2/build/meta.yml @@ -13,30 +13,49 @@ tools: documentation: https://daehwankimlab.github.io/hisat2/manual/ doi: "10.1038/s41587-019-0201-4" licence: ["MIT"] - input: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: type: file description: Reference fasta file pattern: "*.{fa,fasta,fna}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - gtf: type: file description: Reference gtf annotation file pattern: "*.{gtf}" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - splicesites: type: file description: Splices sites in gtf file pattern: "*.{txt}" - output: - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - index: type: file description: HISAT2 genome index file pattern: "*.ht2" - + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@ntoda03" +maintainers: + - "@ntoda03" diff --git a/modules/nf-core/hisat2/build/tests/main.nf.test b/modules/nf-core/hisat2/build/tests/main.nf.test new file mode 100644 index 000000000..5b31debc4 --- /dev/null +++ b/modules/nf-core/hisat2/build/tests/main.nf.test @@ -0,0 +1,53 @@ +nextflow_process { + + name "Test Process HISAT2_BUILD" + script "../main.nf" + process "HISAT2_BUILD" + tag "modules" + tag "modules_nfcore" + tag "hisat2" + tag "hisat2/build" + tag "hisat2/extractsplicesites" + + test("Should run without failures") { + + setup { + run("HISAT2_EXTRACTSPLICESITES") { + script "../../extractsplicesites/main.nf" + process { + """ + input[0] = Channel.of([ + [id:'genome'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true) + ]) + """ + } + } + } + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [id:'genome'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[1] = Channel.of([ [id:'genome'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true) + ]) + input[2] = HISAT2_EXTRACTSPLICESITES.out.txt + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/hisat2/build/tests/main.nf.test.snap b/modules/nf-core/hisat2/build/tests/main.nf.test.snap new file mode 100644 index 000000000..c7d364dbc --- /dev/null +++ b/modules/nf-core/hisat2/build/tests/main.nf.test.snap @@ -0,0 +1,49 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + [ + { + "id": "genome" + }, + [ + "genome.1.ht2:md5,057cfa8a22b97ee9cff4c8d342498803", + "genome.2.ht2:md5,47b153cd1319abc88dda532462651fcf", + "genome.3.ht2:md5,4ed93abba181d8dfab2e303e33114777", + "genome.4.ht2:md5,c25be5f8b0378abf7a58c8a880b87626", + "genome.5.ht2:md5,91198831aaba993acac1734138c5f173", + "genome.6.ht2:md5,265e1284ce85686516fae5d35540994a", + "genome.7.ht2:md5,9013eccd91ad614d7893c739275a394f", + "genome.8.ht2:md5,33cdeccccebe80329f1fdbee7f5874cb" + ] + ] + ], + "1": [ + "versions.yml:md5,e36ef3cd73d19ccf2378c9358fe942c0" + ], + "index": [ + [ + { + "id": "genome" + }, + [ + "genome.1.ht2:md5,057cfa8a22b97ee9cff4c8d342498803", + "genome.2.ht2:md5,47b153cd1319abc88dda532462651fcf", + "genome.3.ht2:md5,4ed93abba181d8dfab2e303e33114777", + "genome.4.ht2:md5,c25be5f8b0378abf7a58c8a880b87626", + "genome.5.ht2:md5,91198831aaba993acac1734138c5f173", + "genome.6.ht2:md5,265e1284ce85686516fae5d35540994a", + "genome.7.ht2:md5,9013eccd91ad614d7893c739275a394f", + "genome.8.ht2:md5,33cdeccccebe80329f1fdbee7f5874cb" + ] + ] + ], + "versions": [ + "versions.yml:md5,e36ef3cd73d19ccf2378c9358fe942c0" + ] + } + ], + "timestamp": "2023-10-16T14:42:22.381609786" + } +} \ No newline at end of file diff --git a/modules/nf-core/hisat2/build/tests/tags.yml b/modules/nf-core/hisat2/build/tests/tags.yml new file mode 100644 index 000000000..a7faecb27 --- /dev/null +++ b/modules/nf-core/hisat2/build/tests/tags.yml @@ -0,0 +1,3 @@ +hisat2/build: + - modules/nf-core/hisat2/build/** + - modules/nf-core/hisat2/extractsplicesites/** diff --git a/modules/nf-core/hisat2/extractsplicesites/environment.yml b/modules/nf-core/hisat2/extractsplicesites/environment.yml new file mode 100644 index 000000000..4b03e5e46 --- /dev/null +++ b/modules/nf-core/hisat2/extractsplicesites/environment.yml @@ -0,0 +1,7 @@ +name: hisat2_extractsplicesites +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hisat2=2.2.1 diff --git a/modules/nf-core/hisat2/extractsplicesites/main.nf b/modules/nf-core/hisat2/extractsplicesites/main.nf index 1423f25cd..b0c8513aa 100644 --- a/modules/nf-core/hisat2/extractsplicesites/main.nf +++ b/modules/nf-core/hisat2/extractsplicesites/main.nf @@ -3,17 +3,17 @@ process HISAT2_EXTRACTSPLICESITES { label 'process_medium' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - conda (params.enable_conda ? 'bioconda::hisat2=2.2.1' : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hisat2:2.2.1--h1b792b2_3' : - 'quay.io/biocontainers/hisat2:2.2.1--h1b792b2_3' }" + 'biocontainers/hisat2:2.2.1--h1b792b2_3' }" input: - path gtf + tuple val(meta), path(gtf) output: - path "*.splice_sites.txt", emit: txt - path "versions.yml" , emit: versions + tuple val(meta), path("*.splice_sites.txt"), emit: txt + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/hisat2/extractsplicesites/meta.yml b/modules/nf-core/hisat2/extractsplicesites/meta.yml index 7dc1bac81..40d77ce00 100644 --- a/modules/nf-core/hisat2/extractsplicesites/meta.yml +++ b/modules/nf-core/hisat2/extractsplicesites/meta.yml @@ -5,7 +5,6 @@ keywords: - gtf - genome - reference - tools: - hisat2: description: HISAT2 is a fast and sensitive alignment program for mapping next-generation sequencing reads (both DNA and RNA) to a population of human genomes as well as to a single reference genome. @@ -13,14 +12,22 @@ tools: documentation: https://daehwankimlab.github.io/hisat2/manual/ doi: "10.1038/s41587-019-0201-4" licence: ["MIT"] - input: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - gtf: type: file description: Reference gtf annotation file pattern: "*.{gtf}" - output: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - versions: type: file description: File containing software versions @@ -29,6 +36,9 @@ output: type: file description: Splices sites in gtf file pattern: "*.{splice_sites.txt}" - authors: - "@ntoda03" + - "@ramprasadn" +maintainers: + - "@ntoda03" + - "@ramprasadn" diff --git a/modules/nf-core/hisat2/extractsplicesites/tests/main.nf.test b/modules/nf-core/hisat2/extractsplicesites/tests/main.nf.test new file mode 100644 index 000000000..72eb6d53b --- /dev/null +++ b/modules/nf-core/hisat2/extractsplicesites/tests/main.nf.test @@ -0,0 +1,35 @@ +nextflow_process { + + name "Test Process HISAT2_EXTRACTSPLICESITES" + script "../main.nf" + process "HISAT2_EXTRACTSPLICESITES" + tag "modules" + tag "modules_nfcore" + tag "hisat2" + tag "hisat2/extractsplicesites" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [id:'genome'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path("${process.out.txt[0][1]}").exists() }, + { assert snapshot(process.out.versions).match() } + ) + } + } +} diff --git a/modules/nf-core/hisat2/extractsplicesites/tests/main.nf.test.snap b/modules/nf-core/hisat2/extractsplicesites/tests/main.nf.test.snap new file mode 100644 index 000000000..17f1c8ebf --- /dev/null +++ b/modules/nf-core/hisat2/extractsplicesites/tests/main.nf.test.snap @@ -0,0 +1,10 @@ +{ + "Should run without failures": { + "content": [ + [ + "versions.yml:md5,eeea7231fe197810659b8bad4133aff2" + ] + ], + "timestamp": "2024-01-18T20:56:30.71763" + } +} \ No newline at end of file diff --git a/modules/nf-core/hisat2/extractsplicesites/tests/tags.yml b/modules/nf-core/hisat2/extractsplicesites/tests/tags.yml new file mode 100644 index 000000000..4b0ed4010 --- /dev/null +++ b/modules/nf-core/hisat2/extractsplicesites/tests/tags.yml @@ -0,0 +1,2 @@ +hisat2/extractsplicesites: + - modules/nf-core/hisat2/extractsplicesites/** diff --git a/modules/nf-core/miranda/environment.yml b/modules/nf-core/miranda/environment.yml new file mode 100644 index 000000000..a04ca7f84 --- /dev/null +++ b/modules/nf-core/miranda/environment.yml @@ -0,0 +1,7 @@ +name: miranda +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::miranda=3.3a diff --git a/modules/nf-core/miranda/main.nf b/modules/nf-core/miranda/main.nf index a050c4b62..47a98253e 100644 --- a/modules/nf-core/miranda/main.nf +++ b/modules/nf-core/miranda/main.nf @@ -2,10 +2,10 @@ process MIRANDA { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::miranda=3.3a" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/miranda:3.3a--h779adbc_3': - 'quay.io/biocontainers/miranda:3.3a--h779adbc_3' }" + 'biocontainers/miranda:3.3a--h779adbc_3' }" input: tuple val(meta), path(query) diff --git a/modules/nf-core/miranda/meta.yml b/modules/nf-core/miranda/meta.yml index 1aca25783..d3950fd19 100644 --- a/modules/nf-core/miranda/meta.yml +++ b/modules/nf-core/miranda/meta.yml @@ -11,7 +11,6 @@ tools: documentation: "https://cbio.mskcc.org/miRNA2003/miranda.html" doi: "10.1186/gb-2003-5-1-r1" licence: "GNU Public License" - input: - meta: type: map @@ -26,7 +25,6 @@ input: type: file description: FASTA file containing the sequence(s) to be scanned pattern: "*.{fa,fasta}" - output: - meta: type: map @@ -41,6 +39,7 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@BarryDigby" +maintainers: + - "@BarryDigby" diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml new file mode 100644 index 000000000..ca39fb67e --- /dev/null +++ b/modules/nf-core/multiqc/environment.yml @@ -0,0 +1,7 @@ +name: multiqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.21 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index a8159a57b..47ac352f9 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_single' - conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.21--pyhdfd78af_0' : + 'biocontainers/multiqc:1.21--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" @@ -25,12 +25,14 @@ process MULTIQC { def args = task.ext.args ?: '' def config = multiqc_config ? "--config $multiqc_config" : '' def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' """ multiqc \\ --force \\ $args \\ $config \\ $extra_config \\ + $logo \\ . cat <<-END_VERSIONS > versions.yml @@ -41,7 +43,7 @@ process MULTIQC { stub: """ - touch multiqc_data + mkdir multiqc_data touch multiqc_plots touch multiqc_report.html diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index ebc29b279..45a9bc35e 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,4 +1,4 @@ -name: MultiQC +name: multiqc description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: - QC @@ -12,7 +12,6 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] - input: - multiqc_files: type: file @@ -30,14 +29,13 @@ input: type: file description: Optional logo file for MultiQC pattern: "*.{png}" - output: - report: type: file description: MultiQC report file pattern: "multiqc_report.html" - data: - type: dir + type: directory description: MultiQC data dir pattern: "multiqc_data" - plots: @@ -53,3 +51,8 @@ authors: - "@bunop" - "@drpatelh" - "@jfy133" +maintainers: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test new file mode 100644 index 000000000..f1c4242ef --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -0,0 +1,84 @@ +nextflow_process { + + name "Test Process MULTIQC" + script "../main.nf" + process "MULTIQC" + + tag "modules" + tag "modules_nfcore" + tag "multiqc" + + test("sarscov2 single-end [fastqc]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_single") } + ) + } + + } + + test("sarscov2 single-end [fastqc] [config]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_config") } + ) + } + } + + test("sarscov2 single-end [fastqc] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.report.collect { file(it).getName() } + + process.out.data.collect { file(it).getName() } + + process.out.plots.collect { file(it).getName() } + + process.out.versions ).match("multiqc_stub") } + ) + } + + } +} diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap new file mode 100644 index 000000000..bfebd8029 --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -0,0 +1,41 @@ +{ + "multiqc_versions_single": { + "content": [ + [ + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:48:55.657331" + }, + "multiqc_stub": { + "content": [ + [ + "multiqc_report.html", + "multiqc_data", + "multiqc_plots", + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:49:49.071937" + }, + "multiqc_versions_config": { + "content": [ + [ + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:49:25.457567" + } +} \ No newline at end of file diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml new file mode 100644 index 000000000..bea6c0d37 --- /dev/null +++ b/modules/nf-core/multiqc/tests/tags.yml @@ -0,0 +1,2 @@ +multiqc: + - modules/nf-core/multiqc/** diff --git a/modules/nf-core/samtools/faidx/environment.yml b/modules/nf-core/samtools/faidx/environment.yml new file mode 100644 index 000000000..f8450fa56 --- /dev/null +++ b/modules/nf-core/samtools/faidx/environment.yml @@ -0,0 +1,10 @@ +name: samtools_faidx + +channels: + - conda-forge + - bioconda + - defaults + +dependencies: + - bioconda::htslib=1.20 + - bioconda::samtools=1.20 diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf new file mode 100644 index 000000000..bdcdbc954 --- /dev/null +++ b/modules/nf-core/samtools/faidx/main.nf @@ -0,0 +1,50 @@ +process SAMTOOLS_FAIDX { + tag "$fasta" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : + 'biocontainers/samtools:1.20--h50ea8bc_0' }" + + input: + tuple val(meta), path(fasta) + tuple val(meta2), path(fai) + + output: + tuple val(meta), path ("*.{fa,fasta}") , emit: fa , optional: true + tuple val(meta), path ("*.fai") , emit: fai, optional: true + tuple val(meta), path ("*.gzi") , emit: gzi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + faidx \\ + $fasta \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def match = (task.ext.args =~ /-o(?:utput)?\s(.*)\s?/).findAll() + def fastacmd = match[0] ? "touch ${match[0][1]}" : '' + """ + ${fastacmd} + touch ${fasta}.fai + + cat <<-END_VERSIONS > versions.yml + + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml new file mode 100644 index 000000000..f3c25de20 --- /dev/null +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -0,0 +1,65 @@ +name: samtools_faidx +description: Index FASTA file +keywords: + - index + - fasta + - faidx +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: FASTA file + pattern: "*.{fa,fasta}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fa: + type: file + description: FASTA file + pattern: "*.{fa}" + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" + - gzi: + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@phue" +maintainers: + - "@drpatelh" + - "@ewels" + - "@phue" diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test b/modules/nf-core/samtools/faidx/tests/main.nf.test new file mode 100644 index 000000000..17244ef2e --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test @@ -0,0 +1,122 @@ +nextflow_process { + + name "Test Process SAMTOOLS_FAIDX" + script "../main.nf" + process "SAMTOOLS_FAIDX" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/faidx" + + test("test_samtools_faidx") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_bgzip") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)] + + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_fasta") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_stub_fasta") { + + config "./nextflow2.config" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_stub_fai") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test.snap b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap new file mode 100644 index 000000000..3223b72bc --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap @@ -0,0 +1,249 @@ +{ + "test_samtools_faidx": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,2db78952923a61e05d50b95518b21856" + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,2db78952923a61e05d50b95518b21856" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:42:14.779784761" + }, + "test_samtools_faidx_bgzip": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "3": [ + "versions.yml:md5,2db78952923a61e05d50b95518b21856" + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "versions": [ + "versions.yml:md5,2db78952923a61e05d50b95518b21856" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:42:20.256633877" + }, + "test_samtools_faidx_fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,2db78952923a61e05d50b95518b21856" + ], + "fa": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36" + ] + ], + "fai": [ + + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,2db78952923a61e05d50b95518b21856" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:42:25.632577273" + }, + "test_samtools_faidx_stub_fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,2db78952923a61e05d50b95518b21856" + ], + "fa": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "fai": [ + + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,2db78952923a61e05d50b95518b21856" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:42:31.058424849" + }, + "test_samtools_faidx_stub_fai": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,2db78952923a61e05d50b95518b21856" + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,2db78952923a61e05d50b95518b21856" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:42:36.479929617" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/faidx/tests/nextflow.config b/modules/nf-core/samtools/faidx/tests/nextflow.config new file mode 100644 index 000000000..f76a3ba09 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_FAIDX { + ext.args = 'MT192765.1 -o extract.fa' + } + +} diff --git a/modules/nf-core/samtools/faidx/tests/nextflow2.config b/modules/nf-core/samtools/faidx/tests/nextflow2.config new file mode 100644 index 000000000..33ebbd5df --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/nextflow2.config @@ -0,0 +1,6 @@ +process { + + withName: SAMTOOLS_FAIDX { + ext.args = '-o extract.fa' + } +} diff --git a/modules/nf-core/samtools/faidx/tests/tags.yml b/modules/nf-core/samtools/faidx/tests/tags.yml new file mode 100644 index 000000000..e4a839481 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/faidx: + - modules/nf-core/samtools/faidx/** diff --git a/modules/nf-core/samtools/flagstat/environment.yml b/modules/nf-core/samtools/flagstat/environment.yml new file mode 100644 index 000000000..68b81558e --- /dev/null +++ b/modules/nf-core/samtools/flagstat/environment.yml @@ -0,0 +1,8 @@ +name: samtools_flagstat +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.20 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf new file mode 100644 index 000000000..754d84b73 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/main.nf @@ -0,0 +1,46 @@ +process SAMTOOLS_FLAGSTAT { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : + 'biocontainers/samtools:1.20--h50ea8bc_0' }" + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.flagstat"), emit: flagstat + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + samtools \\ + flagstat \\ + --threads ${task.cpus} \\ + $bam \\ + > ${prefix}.flagstat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.flagstat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/flagstat/meta.yml b/modules/nf-core/samtools/flagstat/meta.yml new file mode 100644 index 000000000..97991358e --- /dev/null +++ b/modules/nf-core/samtools/flagstat/meta.yml @@ -0,0 +1,51 @@ +name: samtools_flagstat +description: Counts the number of alignments in a BAM/CRAM/SAM file for each FLAG type +keywords: + - stats + - mapping + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Index for BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - flagstat: + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test b/modules/nf-core/samtools/flagstat/tests/main.nf.test new file mode 100644 index 000000000..3b648a37d --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test @@ -0,0 +1,56 @@ +nextflow_process { + + name "Test Process SAMTOOLS_FLAGSTAT" + script "../main.nf" + process "SAMTOOLS_FLAGSTAT" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/flagstat" + + test("BAM") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("BAM - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap new file mode 100644 index 000000000..23989c612 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "BAM - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,f606681ef971cbb548a4d9e3fbabdbc2" + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,f606681ef971cbb548a4d9e3fbabdbc2" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T14:17:28.002887" + }, + "BAM": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ], + "1": [ + "versions.yml:md5,f606681ef971cbb548a4d9e3fbabdbc2" + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ], + "versions": [ + "versions.yml:md5,f606681ef971cbb548a4d9e3fbabdbc2" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T14:17:13.330971" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/flagstat/tests/tags.yml b/modules/nf-core/samtools/flagstat/tests/tags.yml new file mode 100644 index 000000000..2d2b7255e --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/flagstat: + - modules/nf-core/samtools/flagstat/** diff --git a/modules/nf-core/samtools/idxstats/environment.yml b/modules/nf-core/samtools/idxstats/environment.yml new file mode 100644 index 000000000..eb6c88099 --- /dev/null +++ b/modules/nf-core/samtools/idxstats/environment.yml @@ -0,0 +1,8 @@ +name: samtools_idxstats +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.20 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/samtools/idxstats/main.nf b/modules/nf-core/samtools/idxstats/main.nf new file mode 100644 index 000000000..2ea2a5ccd --- /dev/null +++ b/modules/nf-core/samtools/idxstats/main.nf @@ -0,0 +1,48 @@ +process SAMTOOLS_IDXSTATS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : + 'biocontainers/samtools:1.20--h50ea8bc_0' }" + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.idxstats"), emit: idxstats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + samtools \\ + idxstats \\ + --threads ${task.cpus-1} \\ + $bam \\ + > ${prefix}.idxstats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.idxstats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/idxstats/meta.yml b/modules/nf-core/samtools/idxstats/meta.yml new file mode 100644 index 000000000..344e92a3f --- /dev/null +++ b/modules/nf-core/samtools/idxstats/meta.yml @@ -0,0 +1,52 @@ +name: samtools_idxstats +description: Reports alignment summary statistics for a BAM/CRAM/SAM file +keywords: + - stats + - mapping + - counts + - chromosome + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Index for BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - idxstats: + type: file + description: File containing samtools idxstats output + pattern: "*.{idxstats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/modules/nf-core/samtools/idxstats/tests/main.nf.test b/modules/nf-core/samtools/idxstats/tests/main.nf.test new file mode 100644 index 000000000..5fd1fc78e --- /dev/null +++ b/modules/nf-core/samtools/idxstats/tests/main.nf.test @@ -0,0 +1,53 @@ +nextflow_process { + + name "Test Process SAMTOOLS_IDXSTATS" + script "../main.nf" + process "SAMTOOLS_IDXSTATS" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/idxstats" + + test("bam") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("bam - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + }} diff --git a/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap b/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap new file mode 100644 index 000000000..a5ac8104e --- /dev/null +++ b/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,7acbcb2a8ec6436ba7b2916d3ff13351" + ], + "idxstats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,7acbcb2a8ec6436ba7b2916d3ff13351" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T14:17:56.180093" + }, + "bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ], + "1": [ + "versions.yml:md5,7acbcb2a8ec6436ba7b2916d3ff13351" + ], + "idxstats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ], + "versions": [ + "versions.yml:md5,7acbcb2a8ec6436ba7b2916d3ff13351" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T14:17:41.408704" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/idxstats/tests/tags.yml b/modules/nf-core/samtools/idxstats/tests/tags.yml new file mode 100644 index 000000000..d3057c61f --- /dev/null +++ b/modules/nf-core/samtools/idxstats/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/idxstats: + - modules/nf-core/samtools/idxstats/** diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml new file mode 100644 index 000000000..260d516be --- /dev/null +++ b/modules/nf-core/samtools/index/environment.yml @@ -0,0 +1,8 @@ +name: samtools_index +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.20 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf index 8577dc9df..e002585b9 100644 --- a/modules/nf-core/samtools/index/main.nf +++ b/modules/nf-core/samtools/index/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_INDEX { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::samtools=1.16.1" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : + 'biocontainers/samtools:1.20--h50ea8bc_0' }" input: tuple val(meta), path(input) @@ -35,10 +35,11 @@ process SAMTOOLS_INDEX { """ stub: + def args = task.ext.args ?: '' + def extension = file(input).getExtension() == 'cram' ? + "crai" : args.contains("-c") ? "csi" : "bai" """ - touch ${input}.bai - touch ${input}.crai - touch ${input}.csi + touch ${input}.${extension} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml index e5cadbc24..01a4ee03e 100644 --- a/modules/nf-core/samtools/index/meta.yml +++ b/modules/nf-core/samtools/index/meta.yml @@ -12,7 +12,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: @@ -51,3 +51,7 @@ authors: - "@drpatelh" - "@ewels" - "@maxulysse" +maintainers: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core/samtools/index/tests/csi.nextflow.config b/modules/nf-core/samtools/index/tests/csi.nextflow.config new file mode 100644 index 000000000..0ed260efa --- /dev/null +++ b/modules/nf-core/samtools/index/tests/csi.nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_INDEX { + ext.args = '-c' + } + +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test new file mode 100644 index 000000000..ca34fb5cd --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test @@ -0,0 +1,140 @@ +nextflow_process { + + name "Test Process SAMTOOLS_INDEX" + script "../main.nf" + process "SAMTOOLS_INDEX" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/index" + + test("bai") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("crai") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("csi") { + config "./csi.nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.csi[0][1]).name, + process.out.versions + ).match() } + ) + } + } + + test("bai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("crai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("csi - stub") { + options "-stub" + config "./csi.nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap new file mode 100644 index 000000000..799d199ce --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -0,0 +1,250 @@ +{ + "csi - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" + ], + "bai": [ + + ], + "crai": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T16:51:53.9057" + }, + "crai - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" + ], + "bai": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T16:51:45.931558" + }, + "bai - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T16:51:34.807525" + }, + "csi": { + "content": [ + "test.paired_end.sorted.bam.csi", + [ + "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T16:52:55.688799" + }, + "crai": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], + "3": [ + "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" + ], + "bai": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T16:51:17.609533" + }, + "bai": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], + "crai": [ + + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T16:51:04.16585" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/index/tests/tags.yml b/modules/nf-core/samtools/index/tests/tags.yml new file mode 100644 index 000000000..e0f58a7a3 --- /dev/null +++ b/modules/nf-core/samtools/index/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/index: + - modules/nf-core/samtools/index/** diff --git a/modules/nf-core/samtools/sort/environment.yml b/modules/nf-core/samtools/sort/environment.yml new file mode 100644 index 000000000..36a12eab0 --- /dev/null +++ b/modules/nf-core/samtools/sort/environment.yml @@ -0,0 +1,8 @@ +name: samtools_sort +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.20 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf index ac43e67a3..8e019099c 100644 --- a/modules/nf-core/samtools/sort/main.nf +++ b/modules/nf-core/samtools/sort/main.nf @@ -2,17 +2,20 @@ process SAMTOOLS_SORT { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::samtools=1.16.1" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : + 'biocontainers/samtools:1.20--h50ea8bc_0' }" input: - tuple val(meta), path(bam) + tuple val(meta) , path(bam) + tuple val(meta2), path(fasta) output: - tuple val(meta), path("*.bam"), emit: bam - tuple val(meta), path("*.csi"), emit: csi, optional: true + tuple val(meta), path("*.bam"), emit: bam, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + tuple val(meta), path("*.crai"), emit: crai, optional: true + tuple val(meta), path("*.csi"), emit: csi, optional: true path "versions.yml" , emit: versions when: @@ -21,9 +24,25 @@ process SAMTOOLS_SORT { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt cram") ? "cram" : + "bam" + def reference = fasta ? "--reference ${fasta}" : "" if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ - samtools sort $args -@ $task.cpus -o ${prefix}.bam -T $prefix $bam + samtools cat \\ + --threads $task.cpus \\ + ${bam} \\ + | \\ + samtools sort \\ + $args \\ + -T ${prefix} \\ + --threads $task.cpus \\ + ${reference} \\ + -o ${prefix}.${extension} \\ + - + cat <<-END_VERSIONS > versions.yml "${task.process}": samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') @@ -31,9 +50,20 @@ process SAMTOOLS_SORT { """ stub: + def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt cram") ? "cram" : + "bam" """ - touch ${prefix}.bam + touch ${prefix}.${extension} + if [ "${extension}" == "bam" ]; + then + touch ${prefix}.${extension}.csi + elif [ "${extension}" == "cram" ]; + then + touch ${prefix}.${extension}.crai + fi cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/samtools/sort/meta.yml b/modules/nf-core/samtools/sort/meta.yml index 092897512..341a7d0eb 100644 --- a/modules/nf-core/samtools/sort/meta.yml +++ b/modules/nf-core/samtools/sort/meta.yml @@ -12,7 +12,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: @@ -23,8 +23,18 @@ input: e.g. [ id:'test', single_end:false ] - bam: type: file - description: BAM/CRAM/SAM file + description: BAM/CRAM/SAM file(s) pattern: "*.{bam,cram,sam}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fa,fasta,fna}" + optional: true output: - meta: type: map @@ -33,16 +43,29 @@ output: e.g. [ id:'test', single_end:false ] - bam: type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - versions: + description: Sorted BAM file + pattern: "*.{bam}" + - cram: type: file - description: File containing software versions - pattern: "versions.yml" + description: Sorted CRAM file + pattern: "*.{cram}" + - crai: + type: file + description: CRAM index file (optional) + pattern: "*.crai" - csi: type: file description: BAM index file (optional) pattern: "*.csi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" - "@ewels" + - "@matthdsm" +maintainers: + - "@drpatelh" + - "@ewels" + - "@matthdsm" diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test b/modules/nf-core/samtools/sort/tests/main.nf.test new file mode 100644 index 000000000..c2ea9c72a --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/main.nf.test @@ -0,0 +1,128 @@ +nextflow_process { + + name "Test Process SAMTOOLS_SORT" + script "../main.nf" + process "SAMTOOLS_SORT" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/sort" + + test("bam") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match()} + ) + } + } + + test("cram") { + + config "./nextflow_cram.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.cram.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.crai.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match()} + ) + } + } + + test("bam - stub") { + + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("cram - stub") { + + options "-stub" + config "./nextflow_cram.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test.snap b/modules/nf-core/samtools/sort/tests/main.nf.test.snap new file mode 100644 index 000000000..da38d5d15 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/main.nf.test.snap @@ -0,0 +1,192 @@ +{ + "cram": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram.crai" + ] + ], + [ + "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T17:19:37.196205" + }, + "bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T15:54:46.580756" + }, + "cram - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + + ], + "4": [ + "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062" + ], + "bam": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cram": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T15:57:30.505698" + }, + "bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,21c992d59615936b99f2ad008aa54400" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi" + ] + ], + [ + "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T15:54:25.872954" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/sort/tests/nextflow.config b/modules/nf-core/samtools/sort/tests/nextflow.config new file mode 100644 index 000000000..f642771f5 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + + withName: SAMTOOLS_SORT { + ext.prefix = { "${meta.id}.sorted" } + ext.args = "--write-index" + } + +} diff --git a/modules/nf-core/samtools/sort/tests/nextflow_cram.config b/modules/nf-core/samtools/sort/tests/nextflow_cram.config new file mode 100644 index 000000000..3a8c0188b --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/nextflow_cram.config @@ -0,0 +1,8 @@ +process { + + withName: SAMTOOLS_SORT { + ext.prefix = { "${meta.id}.sorted" } + ext.args = "--write-index --output-fmt cram" + } + +} diff --git a/modules/nf-core/samtools/sort/tests/tags.yml b/modules/nf-core/samtools/sort/tests/tags.yml new file mode 100644 index 000000000..cd63ea208 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/tags.yml @@ -0,0 +1,3 @@ +samtools/sort: + - modules/nf-core/samtools/sort/** + - tests/modules/nf-core/samtools/sort/** diff --git a/modules/nf-core/samtools/stats/environment.yml b/modules/nf-core/samtools/stats/environment.yml new file mode 100644 index 000000000..1cc83bd95 --- /dev/null +++ b/modules/nf-core/samtools/stats/environment.yml @@ -0,0 +1,8 @@ +name: samtools_stats +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.20 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf new file mode 100644 index 000000000..982bc28e7 --- /dev/null +++ b/modules/nf-core/samtools/stats/main.nf @@ -0,0 +1,49 @@ +process SAMTOOLS_STATS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : + 'biocontainers/samtools:1.20--h50ea8bc_0' }" + + input: + tuple val(meta), path(input), path(input_index) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path("*.stats"), emit: stats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + """ + samtools \\ + stats \\ + --threads ${task.cpus} \\ + ${reference} \\ + ${input} \\ + > ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/stats/meta.yml b/modules/nf-core/samtools/stats/meta.yml new file mode 100644 index 000000000..735ff8122 --- /dev/null +++ b/modules/nf-core/samtools/stats/meta.yml @@ -0,0 +1,63 @@ +name: samtools_stats +description: Produces comprehensive statistics from SAM/BAM/CRAM file +keywords: + - statistics + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - stats: + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@FriederikeHanssen" + - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test b/modules/nf-core/samtools/stats/tests/main.nf.test new file mode 100644 index 000000000..28a77db28 --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/main.nf.test @@ -0,0 +1,112 @@ +nextflow_process { + + name "Test Process SAMTOOLS_STATS" + script "../main.nf" + process "SAMTOOLS_STATS" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/stats" + + test("bam") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + } + + test("cram") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + } + + test("bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + } + + test("cram - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + } +} diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test.snap b/modules/nf-core/samtools/stats/tests/main.nf.test.snap new file mode 100644 index 000000000..3828f3788 --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/main.nf.test.snap @@ -0,0 +1,142 @@ +{ + "cram": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,c9d39b38c22de2057fc2f89949090975" + ] + ], + "1": [ + "versions.yml:md5,b3b70b126f867fdbb7dcea5e36e49d4a" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,c9d39b38c22de2057fc2f89949090975" + ] + ], + "versions": [ + "versions.yml:md5,b3b70b126f867fdbb7dcea5e36e49d4a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T14:20:24.885816" + }, + "bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,b3b70b126f867fdbb7dcea5e36e49d4a" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,b3b70b126f867fdbb7dcea5e36e49d4a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T14:20:39.310713" + }, + "cram - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,b3b70b126f867fdbb7dcea5e36e49d4a" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,b3b70b126f867fdbb7dcea5e36e49d4a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T14:21:04.771199" + }, + "bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d522a1fa016b259d6a55620ae53dcd63" + ] + ], + "1": [ + "versions.yml:md5,b3b70b126f867fdbb7dcea5e36e49d4a" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d522a1fa016b259d6a55620ae53dcd63" + ] + ], + "versions": [ + "versions.yml:md5,b3b70b126f867fdbb7dcea5e36e49d4a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T14:19:06.645466" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/stats/tests/tags.yml b/modules/nf-core/samtools/stats/tests/tags.yml new file mode 100644 index 000000000..7c28e30f3 --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/stats: + - modules/nf-core/samtools/stats/** diff --git a/modules/nf-core/samtools/view/environment.yml b/modules/nf-core/samtools/view/environment.yml new file mode 100644 index 000000000..150c37777 --- /dev/null +++ b/modules/nf-core/samtools/view/environment.yml @@ -0,0 +1,8 @@ +name: samtools_view +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.20 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/samtools/view/main.nf b/modules/nf-core/samtools/view/main.nf index 314c8b729..dc611448c 100644 --- a/modules/nf-core/samtools/view/main.nf +++ b/modules/nf-core/samtools/view/main.nf @@ -2,24 +2,26 @@ process SAMTOOLS_VIEW { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::samtools=1.16.1" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : + 'biocontainers/samtools:1.20--h50ea8bc_0' }" input: tuple val(meta), path(input), path(index) - path fasta + tuple val(meta2), path(fasta) path qname output: - tuple val(meta), path("*.bam"), emit: bam, optional: true - tuple val(meta), path("*.cram"), emit: cram, optional: true - tuple val(meta), path("*.sam"), emit: sam, optional: true - tuple val(meta), path("*.bai"), emit: bai, optional: true - tuple val(meta), path("*.csi"), emit: csi, optional: true - tuple val(meta), path("*.crai"), emit: crai, optional: true - path "versions.yml", emit: versions + tuple val(meta), path("${prefix}.bam"), emit: bam, optional: true + tuple val(meta), path("${prefix}.cram"), emit: cram, optional: true + tuple val(meta), path("${prefix}.sam"), emit: sam, optional: true + tuple val(meta), path("${prefix}.${file_type}.bai"), emit: bai, optional: true + tuple val(meta), path("${prefix}.${file_type}.csi"), emit: csi, optional: true + tuple val(meta), path("${prefix}.${file_type}.crai"), emit: crai, optional: true + tuple val(meta), path("${prefix}.unselected.${file_type}"), emit: unselected, optional: true + tuple val(meta), path("${prefix}.unselected.${file_type}.{bai,csi,crsi}"), emit: unselected_index, optional: true + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -27,13 +29,13 @@ process SAMTOOLS_VIEW { script: def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" def reference = fasta ? "--reference ${fasta}" : "" - def readnames = qname ? "--qname-file ${qname}": "" - def file_type = args.contains("--output-fmt sam") ? "sam" : - args.contains("--output-fmt bam") ? "bam" : - args.contains("--output-fmt cram") ? "cram" : - input.getExtension() + file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() + readnames = qname ? "--qname-file ${qname} --output-unselected ${prefix}.unselected.${file_type}": "" if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ samtools \\ @@ -53,10 +55,19 @@ process SAMTOOLS_VIEW { """ stub: - def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() + if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + + def index = args.contains("--write-index") ? "touch ${prefix}.${file_type}.csi" : "" + """ - touch ${prefix}.bam - touch ${prefix}.cram + touch ${prefix}.${file_type} + ${index} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/samtools/view/meta.yml b/modules/nf-core/samtools/view/meta.yml index a52e4f8d8..27be60d08 100644 --- a/modules/nf-core/samtools/view/meta.yml +++ b/modules/nf-core/samtools/view/meta.yml @@ -12,7 +12,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: @@ -26,12 +26,17 @@ input: description: BAM/CRAM/SAM file pattern: "*.{bam,cram,sam}" - index: - type: optional file - description: BAM.BAI/CRAM.CRAI file - pattern: "*.{.bai,.crai}" + type: file + description: BAM.BAI/BAM.CSI/CRAM.CRAI file (optional) + pattern: "*.{.bai,.csi,.crai}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] - fasta: - type: optional file - description: Reference file the CRAM was created with + type: file + description: Reference file the CRAM was created with (optional) pattern: "*.{fasta,fa}" - qname: type: file @@ -68,6 +73,15 @@ output: type: file description: optional CRAM file index pattern: "*.{crai}" + # unselected and unselected_index are created when passing a qname + - unselected: + type: file + description: optional file with unselected alignments + pattern: "*.unselected.{bam,cram,sam}" + - unselected_index: + type: file + description: index for the "unselected" file + pattern: "*.unselected.{bai,csi,crai}" - versions: type: file description: File containing software versions @@ -77,3 +91,8 @@ authors: - "@joseespinosa" - "@FriederikeHanssen" - "@priyanka-surana" +maintainers: + - "@drpatelh" + - "@joseespinosa" + - "@FriederikeHanssen" + - "@priyanka-surana" diff --git a/modules/nf-core/samtools/view/tests/bam.config b/modules/nf-core/samtools/view/tests/bam.config new file mode 100644 index 000000000..c10d10811 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/bam.config @@ -0,0 +1,3 @@ +process { + ext.args = "--output-fmt bam" +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/bam_index.config b/modules/nf-core/samtools/view/tests/bam_index.config new file mode 100644 index 000000000..771ae033a --- /dev/null +++ b/modules/nf-core/samtools/view/tests/bam_index.config @@ -0,0 +1,3 @@ +process { + ext.args = "--output-fmt bam --write-index" +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/main.nf.test b/modules/nf-core/samtools/view/tests/main.nf.test new file mode 100644 index 000000000..37b81a916 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/main.nf.test @@ -0,0 +1,214 @@ +nextflow_process { + + name "Test Process SAMTOOLS_VIEW" + script "../main.nf" + process "SAMTOOLS_VIEW" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/view" + + test("bam") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true), + [] + ]) + input[1] = [[],[]] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_bam") }, + { assert snapshot(process.out.bai).match("bam_bai") }, + { assert snapshot(process.out.crai).match("bam_crai") }, + { assert snapshot(process.out.cram).match("bam_cram") }, + { assert snapshot(process.out.csi).match("bam_csi") }, + { assert snapshot(process.out.sam).match("bam_sam") }, + { assert snapshot(process.out.versions).match("bam_versions") } + ) + } + } + + test("cram") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.cram[0][1]).name).match("cram_cram") }, + { assert snapshot(process.out.bai).match("cram_bai") }, + { assert snapshot(process.out.bam).match("cram_bam") }, + { assert snapshot(process.out.crai).match("cram_crai") }, + { assert snapshot(process.out.csi).match("cram_csi") }, + { assert snapshot(process.out.sam).match("cram_sam") }, + { assert snapshot(process.out.versions).match("cram_versions") } + ) + } + } + + test("cram_to_bam") { + + config "./bam.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + [] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_bam") }, + { assert snapshot(process.out.bai).match("cram_to_bam_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_cram") }, + { assert snapshot(process.out.csi).match("cram_to_bam_csi") }, + { assert snapshot(process.out.sam).match("cram_to_bam_sam") }, + { assert snapshot(process.out.versions).match("cram_to_bam_versions") } + ) + } + } + + test("cram_to_bam_index") { + + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + [] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_index_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("cram_to_bam_index_csi") }, + { assert snapshot(process.out.bai).match("cram_to_bam_index_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_index_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_index_cram") }, + { assert snapshot(process.out.sam).match("cram_to_bam_index_sam") }, + { assert snapshot(process.out.versions).match("cram_to_bam_index_versions") } + ) + } + } + + test("cram_to_bam_index_qname") { + + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + [] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of("testN:2817", "testN:2814").collectFile(name: "readnames.list", newLine: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_index_qname_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("cram_to_bam_index_qname_csi") }, + { assert snapshot(process.out.bai).match("cram_to_bam_index_qname_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_index_qname_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_index_qname_cram") }, + { assert snapshot(process.out.sam).match("cram_to_bam_index_qname_sam") }, + { assert snapshot(file(process.out.unselected[0][1]).name).match("cram_to_bam_index_qname_unselected") }, + { assert snapshot(file(process.out.unselected_index[0][1]).name).match("cram_to_bam_index_qname_unselected_csi") }, + { assert snapshot(process.out.versions).match("cram_to_bam_index_qname_versions") } + ) + } + } + + test("bam_stub") { + + options "-stub" + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true), + [] + ]) + input[1] = [[],[]] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_stub_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("bam_stub_csi") }, + { assert snapshot(process.out.bai).match("bam_stub_bai") }, + { assert snapshot(process.out.crai).match("bam_stub_crai") }, + { assert snapshot(process.out.cram).match("bam_stub_cram") }, + { assert snapshot(process.out.sam).match("bam_stub_sam") }, + { assert snapshot(process.out.versions).match("bam_stub_versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/view/tests/main.nf.test.snap b/modules/nf-core/samtools/view/tests/main.nf.test.snap new file mode 100644 index 000000000..6bcce9fea --- /dev/null +++ b/modules/nf-core/samtools/view/tests/main.nf.test.snap @@ -0,0 +1,508 @@ +{ + "bam_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.256068" + }, + "cram_to_bam_index_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.958617" + }, + "bam_stub_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.065301" + }, + "bam_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.258578" + }, + "bam_stub_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.071284" + }, + "bam_stub_versions": { + "content": [ + [ + "versions.yml:md5,6cd41a9a3b4a95271ec011ea990a2838" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:43:20.390692583" + }, + "cram_to_bam_index_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.972288" + }, + "cram_to_bam_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.999247" + }, + "cram_to_bam_index_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.976457" + }, + "cram_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.497581" + }, + "cram_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.50038" + }, + "cram_to_bam_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.992239" + }, + "cram_to_bam_index_qname_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.325496" + }, + "bam_stub_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.079529" + }, + "cram_cram": { + "content": [ + "test.cram" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.490286" + }, + "bam_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.262882" + }, + "cram_to_bam_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.989247" + }, + "cram_to_bam_index_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.967681" + }, + "cram_to_bam_index_qname_versions": { + "content": [ + [ + "versions.yml:md5,6cd41a9a3b4a95271ec011ea990a2838" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:43:15.007493874" + }, + "cram_to_bam_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.982361" + }, + "cram_to_bam_index_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.95456" + }, + "cram_to_bam_index_versions": { + "content": [ + [ + "versions.yml:md5,6cd41a9a3b4a95271ec011ea990a2838" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:43:09.472376824" + }, + "cram_to_bam_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.98601" + }, + "cram_to_bam_versions": { + "content": [ + [ + "versions.yml:md5,6cd41a9a3b4a95271ec011ea990a2838" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:43:04.080050906" + }, + "cram_bam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.495512" + }, + "bam_stub_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.076908" + }, + "cram_to_bam_index_qname_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.328458" + }, + "cram_to_bam_index_qname_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.330789" + }, + "cram_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.493129" + }, + "bam_stub_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.074313" + }, + "cram_to_bam_index_qname_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.322874" + }, + "cram_to_bam_index_qname_unselected": { + "content": [ + "test.unselected.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.322874" + }, + "cram_to_bam_index_qname_unselected_csi": { + "content": [ + "test.unselected.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.328458" + }, + "bam_versions": { + "content": [ + [ + "versions.yml:md5,6cd41a9a3b4a95271ec011ea990a2838" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:42:52.978954857" + }, + "cram_to_bam_index_qname_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.333248" + }, + "bam_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.259774" + }, + "bam_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.261287" + }, + "cram_to_bam_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.995454" + }, + "cram_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.502625" + }, + "cram_versions": { + "content": [ + [ + "versions.yml:md5,6cd41a9a3b4a95271ec011ea990a2838" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:42:58.400776109" + }, + "bam_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.264651" + }, + "cram_to_bam_index_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.962863" + }, + "cram_to_bam_index_qname_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.337634" + }, + "bam_stub_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.068596" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/tags.yml b/modules/nf-core/samtools/view/tests/tags.yml new file mode 100644 index 000000000..4fdf1dd12 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/view: + - "modules/nf-core/samtools/view/**" diff --git a/modules/nf-core/segemehl/align/environment.yml b/modules/nf-core/segemehl/align/environment.yml new file mode 100644 index 000000000..e7dbc6628 --- /dev/null +++ b/modules/nf-core/segemehl/align/environment.yml @@ -0,0 +1,7 @@ +name: segemehl_align +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::segemehl=0.3.4 diff --git a/modules/nf-core/segemehl/align/main.nf b/modules/nf-core/segemehl/align/main.nf index 2253efac3..fa829a73a 100644 --- a/modules/nf-core/segemehl/align/main.nf +++ b/modules/nf-core/segemehl/align/main.nf @@ -2,10 +2,10 @@ process SEGEMEHL_ALIGN { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::segemehl=0.3.4" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/segemehl:0.3.4--hc2ea5fd_5': - 'quay.io/biocontainers/segemehl:0.3.4--hc2ea5fd_5' }" + 'biocontainers/segemehl:0.3.4--hc2ea5fd_5' }" input: tuple val(meta), path(reads) @@ -13,8 +13,11 @@ process SEGEMEHL_ALIGN { path(index) output: - tuple val(meta), path("${prefix}/*"), emit: results - path "versions.yml" , emit: versions + tuple val(meta), path("${prefix}/${prefix}.${suffix}"), emit: alignment + tuple val(meta), path("${prefix}/${prefix}.trns.txt") , emit: trans_alignments, optional: true + tuple val(meta), path("${prefix}/${prefix}.mult.bed") , emit: multi_bed, optional: true + tuple val(meta), path("${prefix}/${prefix}.sngl.bed") , emit: single_bed, optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -23,7 +26,7 @@ process SEGEMEHL_ALIGN { def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" def reads = meta.single_end ? "-q ${reads}" : "-q ${reads[0]} -p ${reads[1]}" - def suffix = ( args.contains("-b") || args.contains("--bamabafixoida") ) ? "bam" : "sam" + suffix = ( args.contains("-b") || args.contains("--bamabafixoida") ) ? "bam" : "sam" """ mkdir -p $prefix @@ -43,7 +46,7 @@ process SEGEMEHL_ALIGN { stub: prefix = task.ext.prefix ?: "${meta.id}" - def suffix = ( args.contains("-b") || args.contains("--bamabafixoida") ) ? "bam" : "sam" + suffix = ( args.contains("-b") || args.contains("--bamabafixoida") ) ? "bam" : "sam" """ mkdir -p $prefix touch ${prefix}/${prefix}.${suffix} diff --git a/modules/nf-core/segemehl/align/meta.yml b/modules/nf-core/segemehl/align/meta.yml index bb5ac49ff..fc8e43bab 100644 --- a/modules/nf-core/segemehl/align/meta.yml +++ b/modules/nf-core/segemehl/align/meta.yml @@ -11,8 +11,7 @@ tools: homepage: "https://www.bioinf.uni-leipzig.de/Software/segemehl/" documentation: "https://www.bioinf.uni-leipzig.de/Software/segemehl/" doi: "10.1186/gb-2014-15-2-r34" - licence: "GPL v3" - + licence: ["GPL v3"] input: - meta: type: map @@ -31,25 +30,44 @@ input: type: file description: Segemehl Index file from SEGEMEHL_INDEX pattern: "*.idx" - output: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - results: - type: folder + - alignment: + type: file description: | - Directory containing genomic alignments in SAM format + File containing genomic alignments in SAM format (please add "-b" flag to task.ext.args for BAM) - In addition to split-read alignments files when -S parameter used. - [ *.{sam,bam}, *.trns.txt, *.mult.bed, *.sngl.bed ] - pattern: "${meta.id}*" + pattern: "*.{sam,bam}" + - trans_alignments: + type: file + description: | + Custom text file containing all single split alignments predicted to be in trans + (optional, only if -S flag is set in task.ext.args) + pattern: "*.trns.txt" + - single_bed: + type: file + description: | + Bed file containing all single splice events predicted + in the split read alignments. + (optional, only if -S flag is set in task.ext.args) + pattern: "*.sngl.bed" + - multi_bed: + type: file + description: | + Bed file containing all splice events predicted + in the split read alignments. + (optional, only if -S flag is set in task.ext.args) + pattern: "*.mult.bed" - versions: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@BarryDigby" + - "@nictru" +maintainers: + - "@nictru" diff --git a/modules/nf-core/segemehl/align/tests/main.nf.test b/modules/nf-core/segemehl/align/tests/main.nf.test new file mode 100644 index 000000000..c1b4921e7 --- /dev/null +++ b/modules/nf-core/segemehl/align/tests/main.nf.test @@ -0,0 +1,140 @@ +nextflow_process { + + name "Test Process SEGEMEHL_ALIGN" + script "../main.nf" + process "SEGEMEHL_ALIGN" + tag "modules" + tag "modules_nfcore" + tag "segemehl" + tag "segemehl/align" + tag "segemehl/index" + + setup { + run("SEGEMEHL_INDEX") { + script "../../../segemehl/index/main.nf" + process { + """ + input[0] = Channel.of([ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + } + + test("homo_sapiens - single_end") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = SEGEMEHL_INDEX.out.index + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.alignment[0][1]).exists() }, + { assert snapshot(process.out.versions).match("homo_sapiens - single_end - versions") } + ) + } + } + + test("homo_sapiens - paired_end") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = SEGEMEHL_INDEX.out.index + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.alignment[0][1]).exists() }, + { assert snapshot(process.out.versions).match("homo_sapiens - paired_end - versions") } + ) + } + } + + test("homo_sapiens - split - single_end") { + config "./split.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = SEGEMEHL_INDEX.out.index + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.alignment[0][1]).exists() }, + { assert path(process.out.trans_alignments[0][1]).exists() }, + { assert path(process.out.multi_bed[0][1]).exists() }, + { assert path(process.out.single_bed[0][1]).exists() }, + { assert snapshot(process.out.versions).match("homo_sapiens - split - single_end - versions") } + ) + } + } + + test("homo_sapiens - split - paired_end") { + config "./split.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = SEGEMEHL_INDEX.out.index + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.alignment[0][1]).exists() }, + { assert path(process.out.trans_alignments[0][1]).exists() }, + { assert path(process.out.multi_bed[0][1]).exists() }, + { assert path(process.out.single_bed[0][1]).exists() }, + { assert snapshot(process.out.versions).match("homo_sapiens - split - paired_end - versions") } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/segemehl/align/tests/main.nf.test.snap b/modules/nf-core/segemehl/align/tests/main.nf.test.snap new file mode 100644 index 000000000..c914bc3d1 --- /dev/null +++ b/modules/nf-core/segemehl/align/tests/main.nf.test.snap @@ -0,0 +1,50 @@ +{ + "homo_sapiens - paired_end - versions": { + "content": [ + [ + "versions.yml:md5,0c6afcd6ae65e27a0ea87f5b42c853eb" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-05-30T12:58:05.434115758" + }, + "homo_sapiens - single_end - versions": { + "content": [ + [ + "versions.yml:md5,0c6afcd6ae65e27a0ea87f5b42c853eb" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-05-30T12:57:56.488707635" + }, + "homo_sapiens - split - single_end - versions": { + "content": [ + [ + "versions.yml:md5,0c6afcd6ae65e27a0ea87f5b42c853eb" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-05-30T13:06:11.217385877" + }, + "homo_sapiens - split - paired_end - versions": { + "content": [ + [ + "versions.yml:md5,0c6afcd6ae65e27a0ea87f5b42c853eb" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-05-30T13:06:29.757385118" + } +} \ No newline at end of file diff --git a/modules/nf-core/segemehl/align/tests/split.config b/modules/nf-core/segemehl/align/tests/split.config new file mode 100644 index 000000000..d4f6aab83 --- /dev/null +++ b/modules/nf-core/segemehl/align/tests/split.config @@ -0,0 +1,5 @@ +process{ + withName: SEGEMEHL_ALIGN { + ext.args = "-S" + } +} \ No newline at end of file diff --git a/modules/nf-core/segemehl/align/tests/tags.yml b/modules/nf-core/segemehl/align/tests/tags.yml new file mode 100644 index 000000000..6e7bf26ee --- /dev/null +++ b/modules/nf-core/segemehl/align/tests/tags.yml @@ -0,0 +1,2 @@ +segemehl/align: + - modules/nf-core/segemehl/align/** diff --git a/modules/nf-core/segemehl/index/environment.yml b/modules/nf-core/segemehl/index/environment.yml new file mode 100644 index 000000000..c06413305 --- /dev/null +++ b/modules/nf-core/segemehl/index/environment.yml @@ -0,0 +1,7 @@ +name: segemehl_index +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::segemehl=0.3.4 diff --git a/modules/nf-core/segemehl/index/main.nf b/modules/nf-core/segemehl/index/main.nf index 07cac955b..ea912c6ef 100644 --- a/modules/nf-core/segemehl/index/main.nf +++ b/modules/nf-core/segemehl/index/main.nf @@ -2,10 +2,10 @@ process SEGEMEHL_INDEX { tag "$fasta" label 'process_high' - conda (params.enable_conda ? "bioconda::segemehl=0.3.4" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/segemehl:0.3.4--hc2ea5fd_5': - 'quay.io/biocontainers/segemehl:0.3.4--hc2ea5fd_5' }" + 'biocontainers/segemehl:0.3.4--hc2ea5fd_5' }" input: path fasta diff --git a/modules/nf-core/segemehl/index/meta.yml b/modules/nf-core/segemehl/index/meta.yml index a154dad79..f2b9eb225 100644 --- a/modules/nf-core/segemehl/index/meta.yml +++ b/modules/nf-core/segemehl/index/meta.yml @@ -1,5 +1,6 @@ name: "segemehl_index" description: Generate genome indices for segemehl align +keywords: - index - circrna - splicing @@ -11,13 +12,11 @@ tools: documentation: "https://www.bioinf.uni-leipzig.de/Software/segemehl/" doi: "10.1186/gb-2014-15-2-r34" licence: "GPL v3" - input: - fasta: type: file description: Reference genome FASTA file pattern: "*.{fa,fasta}" - output: - index: type: file @@ -27,6 +26,7 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@BarryDigby" +maintainers: + - "@BarryDigby" diff --git a/modules/nf-core/star/align/environment.yml b/modules/nf-core/star/align/environment.yml new file mode 100644 index 000000000..8bd58cff5 --- /dev/null +++ b/modules/nf-core/star/align/environment.yml @@ -0,0 +1,10 @@ +name: star_align +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::star=2.7.10a + - bioconda::samtools=1.18 + - bioconda::htslib=1.18 + - conda-forge::gawk=5.1.0 diff --git a/modules/nf-core/star/align/main.nf b/modules/nf-core/star/align/main.nf index 8b0f9d89e..8e9c48b1c 100644 --- a/modules/nf-core/star/align/main.nf +++ b/modules/nf-core/star/align/main.nf @@ -2,33 +2,37 @@ process STAR_ALIGN { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::star=2.7.10a bioconda::samtools=1.16.1 conda-forge::gawk=5.1.0" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' : - 'quay.io/biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' : + 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' }" input: - tuple val(meta), path(reads) - path index - path gtf + tuple val(meta), path(reads, stageAs: "input*/*") + tuple val(meta2), path(index) + tuple val(meta3), path(gtf) val star_ignore_sjdbgtf val seq_platform val seq_center output: - tuple val(meta), path('*d.out.bam') , emit: bam tuple val(meta), path('*Log.final.out') , emit: log_final tuple val(meta), path('*Log.out') , emit: log_out tuple val(meta), path('*Log.progress.out'), emit: log_progress path "versions.yml" , emit: versions + tuple val(meta), path('*d.out.bam') , optional:true, emit: bam tuple val(meta), path('*sortedByCoord.out.bam') , optional:true, emit: bam_sorted tuple val(meta), path('*toTranscriptome.out.bam'), optional:true, emit: bam_transcript tuple val(meta), path('*Aligned.unsort.out.bam') , optional:true, emit: bam_unsorted tuple val(meta), path('*fastq.gz') , optional:true, emit: fastq tuple val(meta), path('*.tab') , optional:true, emit: tab + tuple val(meta), path('*.SJ.out.tab') , optional:true, emit: spl_junc_tab + tuple val(meta), path('*.ReadsPerGene.out.tab') , optional:true, emit: read_per_gene_tab tuple val(meta), path('*.out.junction') , optional:true, emit: junction tuple val(meta), path('*.out.sam') , optional:true, emit: sam + tuple val(meta), path('*.wig') , optional:true, emit: wig + tuple val(meta), path('*.bg') , optional:true, emit: bedgraph when: task.ext.when == null || task.ext.when @@ -36,20 +40,23 @@ process STAR_ALIGN { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def reads1 = [], reads2 = [] + meta.single_end ? [reads].flatten().each{reads1 << it} : reads.eachWithIndex{ v, ix -> ( ix & 1 ? reads2 : reads1) << v } def ignore_gtf = star_ignore_sjdbgtf ? '' : "--sjdbGTFfile $gtf" def seq_platform = seq_platform ? "'PL:$seq_platform'" : "" - def seq_center = seq_center ? "--outSAMattrRGline ID:$prefix 'CN:$seq_center' 'SM:$prefix' $seq_platform " : "--outSAMattrRGline ID:$prefix 'SM:$prefix' $seq_platform " + def seq_center = seq_center ? "'CN:$seq_center'" : "" + def attrRG = args.contains("--outSAMattrRGline") ? "" : "--outSAMattrRGline 'ID:$prefix' $seq_center 'SM:$prefix' $seq_platform" def out_sam_type = (args.contains('--outSAMtype')) ? '' : '--outSAMtype BAM Unsorted' def mv_unsorted_bam = (args.contains('--outSAMtype BAM Unsorted SortedByCoordinate')) ? "mv ${prefix}.Aligned.out.bam ${prefix}.Aligned.unsort.out.bam" : '' """ STAR \\ --genomeDir $index \\ - --readFilesIn $reads \\ + --readFilesIn ${reads1.join(",")} ${reads2.join(",")} \\ --runThreadN $task.cpus \\ --outFileNamePrefix $prefix. \\ $out_sam_type \\ $ignore_gtf \\ - $seq_center \\ + $attrRG \\ $args $mv_unsorted_bam @@ -81,11 +88,16 @@ process STAR_ALIGN { touch ${prefix}.sortedByCoord.out.bam touch ${prefix}.toTranscriptome.out.bam touch ${prefix}.Aligned.unsort.out.bam + touch ${prefix}.Aligned.sortedByCoord.out.bam touch ${prefix}.unmapped_1.fastq.gz touch ${prefix}.unmapped_2.fastq.gz touch ${prefix}.tab + touch ${prefix}.SJ.out.tab + touch ${prefix}.ReadsPerGene.out.tab touch ${prefix}.Chimeric.out.junction touch ${prefix}.out.sam + touch ${prefix}.Signal.UniqueMultiple.str1.out.wig + touch ${prefix}.Signal.UniqueMultiple.str1.out.bg cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/star/align/meta.yml b/modules/nf-core/star/align/meta.yml index 7ee10f1cc..e80dbb7dd 100644 --- a/modules/nf-core/star/align/meta.yml +++ b/modules/nf-core/star/align/meta.yml @@ -25,10 +25,33 @@ input: description: | List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively. + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] - index: type: directory description: STAR genome index pattern: "star" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - gtf: + type: file + description: Annotation GTF file + pattern: "*.{gtf}" + - star_ignore_sjdbgtf: + type: boolean + description: Ignore annotation GTF file + - seq_platform: + type: string + description: Sequencing platform + - seq_center: + type: string + description: Sequencing center output: - bam: type: file @@ -74,8 +97,19 @@ output: type: file description: STAR chimeric junction output file (optional) pattern: "*.out.junction" - + - wig: + type: file + description: STAR output wiggle format file(s) (optional) + pattern: "*.wig" + - bedgraph: + type: file + description: STAR output bedGraph format file(s) (optional) + pattern: "*.bg" authors: - "@kevinmenden" - "@drpatelh" - "@praveenraj2018" +maintainers: + - "@kevinmenden" + - "@drpatelh" + - "@praveenraj2018" diff --git a/modules/nf-core/star/align/tests/main.nf.test b/modules/nf-core/star/align/tests/main.nf.test new file mode 100644 index 000000000..6ecd77863 --- /dev/null +++ b/modules/nf-core/star/align/tests/main.nf.test @@ -0,0 +1,268 @@ +nextflow_process { + + name "Test Process STAR_ALIGN" + script "../main.nf" + process "STAR_ALIGN" + tag "modules" + tag "modules_nfcore" + tag "star" + tag "star/align" + tag "star/genomegenerate" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + test("homo_sapiens - single_end") { + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - single_end - log_final") }, + { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - single_end - log_out") }, + { assert snapshot(process.out.bam).match("homo_sapiens - single_end - bam") }, + { assert snapshot(process.out.bam_sorted).match("homo_sapiens - single_end - bam_sorted") }, + { assert snapshot(process.out.bam_transcript).match("homo_sapiens - single_end - bam_transcript") }, + { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - single_end - bam_unsorted") }, + { assert snapshot(process.out.bedgraph).match("homo_sapiens - single_end - bedgraph") }, + { assert snapshot(process.out.fastq).match("homo_sapiens - single_end - fastq") }, + { assert snapshot(process.out.junction).match("homo_sapiens - single_end - junction") }, + { assert snapshot(process.out.log_progress).match("homo_sapiens - single_end - log_progress") }, + { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - single_end - read_per_gene_tab") }, + { assert snapshot(process.out.sam).match("homo_sapiens - single_end - sam") }, + { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - single_end - spl_junc_tab") }, + { assert snapshot(process.out.tab).match("homo_sapiens - single_end - tab") }, + { assert snapshot(process.out.wig).match("homo_sapiens - single_end - wig") }, + { assert snapshot(process.out.versions).match("homo_sapiens - single_end - versions") } + ) + } + } + + test("homo_sapiens - paired_end") { + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - paired_end - log_final") }, + { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - paired_end - log_out") }, + { assert snapshot(process.out.bam).match("homo_sapiens - paired_end - bam") }, + { assert snapshot(process.out.bam_sorted).match("homo_sapiens - paired_end - bam_sorted") }, + { assert snapshot(process.out.bam_transcript).match("homo_sapiens - paired_end - bam_transcript") }, + { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - paired_end - bam_unsorted") }, + { assert snapshot(process.out.bedgraph).match("homo_sapiens - paired_end - bedgraph") }, + { assert snapshot(process.out.fastq).match("homo_sapiens - paired_end - fastq") }, + { assert snapshot(process.out.junction).match("homo_sapiens - paired_end - junction") }, + { assert snapshot(process.out.log_progress).match("homo_sapiens - paired_end - log_progress") }, + { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - paired_end - read_per_gene_tab") }, + { assert snapshot(process.out.sam).match("homo_sapiens - paired_end - sam") }, + { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - paired_end - spl_junc_tab") }, + { assert snapshot(process.out.tab).match("homo_sapiens - paired_end - tab") }, + { assert snapshot(process.out.wig).match("homo_sapiens - paired_end - wig") }, + { assert snapshot(process.out.versions).match("homo_sapiens - paired_end - versions") } + ) + } + } + + test("homo_sapiens - paired_end - arriba") { + config "./nextflow.arriba.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - paired_end - arriba - log_final") }, + { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - paired_end - arriba - log_out") }, + { assert snapshot(file(process.out.log_progress[0][1]).name).match("homo_sapiens - paired_end - arriba - log_progress") }, + { assert snapshot(process.out.bam).match("homo_sapiens - paired_end - arriba - bam") }, + { assert snapshot(process.out.bam_sorted).match("homo_sapiens - paired_end - arriba - bam_sorted") }, + { assert snapshot(process.out.bam_transcript).match("homo_sapiens - paired_end - arriba - bam_transcript") }, + { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - paired_end - arriba - bam_unsorted") }, + { assert snapshot(process.out.bedgraph).match("homo_sapiens - paired_end - arriba - bedgraph") }, + { assert snapshot(process.out.fastq).match("homo_sapiens - paired_end - arriba - fastq") }, + { assert snapshot(process.out.junction).match("homo_sapiens - paired_end - arriba - junction") }, + { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - paired_end - arriba - read_per_gene_tab") }, + { assert snapshot(process.out.sam).match("homo_sapiens - paired_end - arriba - sam") }, + { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - paired_end - arriba - spl_junc_tab") }, + { assert snapshot(process.out.tab).match("homo_sapiens - paired_end - arriba - tab") }, + { assert snapshot(process.out.wig).match("homo_sapiens - paired_end - arriba - wig") }, + { assert snapshot(process.out.versions).match("homo_sapiens - paired_end - arriba - versions") } + ) + } + } + + test("homo_sapiens - paired_end - starfusion") { + config "./nextflow.starfusion.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - paired_end - starfusion - log_final") }, + { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - paired_end - starfusion - log_out") }, + { assert snapshot(file(process.out.log_progress[0][1]).name).match("homo_sapiens - paired_end - starfusion - log_progress") }, + { assert snapshot(process.out.bam).match("homo_sapiens - paired_end - starfusion - bam") }, + { assert snapshot(process.out.bam_sorted).match("homo_sapiens - paired_end - starfusion - bam_sorted") }, + { assert snapshot(process.out.bam_transcript).match("homo_sapiens - paired_end - starfusion - bam_transcript") }, + { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - paired_end - starfusion - bam_unsorted") }, + { assert snapshot(process.out.bedgraph).match("homo_sapiens - paired_end - starfusion - bedgraph") }, + { assert snapshot(process.out.fastq).match("homo_sapiens - paired_end - starfusion - fastq") }, + { assert snapshot(process.out.junction).match("homo_sapiens - paired_end - starfusion - junction") }, + { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - paired_end - starfusion - read_per_gene_tab") }, + { assert snapshot(process.out.sam).match("homo_sapiens - paired_end - starfusion - sam") }, + { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - paired_end - starfusion - spl_junc_tab") }, + { assert snapshot(process.out.tab).match("homo_sapiens - paired_end - starfusion - tab") }, + { assert snapshot(process.out.wig).match("homo_sapiens - paired_end - starfusion - wig") }, + { assert snapshot(process.out.versions).match("homo_sapiens - paired_end - starfusion - versions") } + ) + } + } + + test("homo_sapiens - paired_end - multiple") { + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - paired_end - multiple - log_final") }, + { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - paired_end - multiple - log_out") }, + { assert snapshot(file(process.out.log_progress[0][1]).name).match("homo_sapiens - paired_end - multiple - log_progress") }, + { assert snapshot(process.out.bam).match("homo_sapiens - paired_end - multiple - bam") }, + { assert snapshot(process.out.bam_sorted).match("homo_sapiens - paired_end - multiple - bam_sorted") }, + { assert snapshot(process.out.bam_transcript).match("homo_sapiens - paired_end - multiple - bam_transcript") }, + { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - paired_end - multiple - bam_unsorted") }, + { assert snapshot(process.out.bedgraph).match("homo_sapiens - paired_end - multiple - bedgraph") }, + { assert snapshot(process.out.fastq).match("homo_sapiens - paired_end - multiple - fastq") }, + { assert snapshot(process.out.junction).match("homo_sapiens - paired_end - multiple - junction") }, + { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - paired_end - multiple - read_per_gene_tab") }, + { assert snapshot(process.out.sam).match("homo_sapiens - paired_end - multiple - sam") }, + { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - paired_end - multiple - spl_junc_tab") }, + { assert snapshot(process.out.tab).match("homo_sapiens - paired_end - multiple - tab") }, + { assert snapshot(process.out.wig).match("homo_sapiens - paired_end - multiple - wig") }, + { assert snapshot(process.out.versions).match("homo_sapiens - paired_end - multiple - versions") } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/star/align/tests/main.nf.test.snap b/modules/nf-core/star/align/tests/main.nf.test.snap new file mode 100644 index 000000000..08edb914b --- /dev/null +++ b/modules/nf-core/star/align/tests/main.nf.test.snap @@ -0,0 +1,769 @@ +{ + "homo_sapiens - paired_end - multiple - bam_sorted": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,ab07c21d63ab0a6c07d171d213c81d5a" + ] + ] + ], + "timestamp": "2023-12-04T18:01:19.968225733" + }, + "homo_sapiens - paired_end - multiple - wig": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:29:01.857804" + }, + "homo_sapiens - paired_end - arriba - tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,5155c9fd1f787ad6d7d80987fb06219c" + ] + ] + ], + "timestamp": "2023-12-04T17:56:12.347549723" + }, + "homo_sapiens - single_end - wig": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:22:55.24701" + }, + "homo_sapiens - paired_end - sam": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:23:33.383818" + }, + "homo_sapiens - paired_end - arriba - versions": { + "content": [ + [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ] + ], + "timestamp": "2023-12-04T17:56:12.431212643" + }, + "homo_sapiens - paired_end - multiple - bedgraph": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Signal.Unique.str1.out.bg:md5,d7bf8b70b436ca048a62513e1d0ece3a", + "test.Signal.UniqueMultiple.str1.out.bg:md5,686d58493b9eb445b56ace4d67f76ef6" + ] + ] + ] + ], + "timestamp": "2023-12-04T18:01:20.07119229" + }, + "homo_sapiens - paired_end - read_per_gene_tab": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:23:33.368841" + }, + "homo_sapiens - paired_end - arriba - bedgraph": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:07.102537" + }, + "homo_sapiens - single_end - junction": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:22:55.185369" + }, + "homo_sapiens - paired_end - arriba - spl_junc_tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,5155c9fd1f787ad6d7d80987fb06219c" + ] + ] + ], + "timestamp": "2023-12-04T17:56:12.268388251" + }, + "homo_sapiens - single_end - sam": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:22:55.216183" + }, + "homo_sapiens - paired_end - fastq": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:23:33.327236" + }, + "homo_sapiens - single_end - versions": { + "content": [ + [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ] + ], + "timestamp": "2023-12-04T17:53:26.664210196" + }, + "homo_sapiens - paired_end - multiple - log_out": { + "content": [ + "test.Log.out" + ], + "timestamp": "2023-11-23T13:29:01.022176" + }, + "homo_sapiens - paired_end - arriba - fastq": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:07.15277" + }, + "homo_sapiens - paired_end - multiple - junction": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:29:01.52923" + }, + "homo_sapiens - paired_end - multiple - spl_junc_tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,069877e053714e23010fe4e1c003b4a2" + ] + ] + ], + "timestamp": "2023-12-04T18:01:20.189486201" + }, + "homo_sapiens - paired_end - starfusion - log_final": { + "content": [ + "test.Log.final.out" + ], + "timestamp": "2023-11-23T13:27:55.905883" + }, + "homo_sapiens - paired_end - starfusion - fastq": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:27:56.192302" + }, + "homo_sapiens - paired_end - multiple - sam": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:29:01.661837" + }, + "homo_sapiens - paired_end - multiple - log_final": { + "content": [ + "test.Log.final.out" + ], + "timestamp": "2023-11-23T13:29:00.966417" + }, + "homo_sapiens - paired_end - starfusion - bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.out.bam:md5,bcad07b838f6762fc01eea52b5cd3f84" + ] + ] + ], + "timestamp": "2023-12-04T17:59:58.53235164" + }, + "homo_sapiens - paired_end - arriba - junction": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:07.202776" + }, + "homo_sapiens - single_end - bedgraph": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.Signal.Unique.str1.out.bg:md5,c56fc1472776fb927eaf62d973da5f9a", + "test.Signal.UniqueMultiple.str1.out.bg:md5,e93373cf6f2a2a9506e2efdb260cdd4f" + ] + ] + ] + ], + "timestamp": "2023-12-04T17:53:26.394863748" + }, + "homo_sapiens - paired_end - arriba - read_per_gene_tab": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:07.251962" + }, + "homo_sapiens - paired_end - starfusion - bam_sorted": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:27:56.040843" + }, + "homo_sapiens - single_end - bam_unsorted": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:22:55.154172" + }, + "homo_sapiens - paired_end - bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,b9ee1c607e07323bc1652ef3babb543f" + ] + ] + ], + "timestamp": "2023-12-04T17:54:11.934832258" + }, + "homo_sapiens - paired_end - arriba - bam_transcript": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:06.998817" + }, + "homo_sapiens - paired_end - log_out": { + "content": [ + "test.Log.out" + ], + "timestamp": "2023-11-23T13:23:33.259699" + }, + "homo_sapiens - paired_end - arriba - log_out": { + "content": [ + "test.Log.out" + ], + "timestamp": "2023-11-23T13:25:06.849451" + }, + "homo_sapiens - paired_end - multiple - versions": { + "content": [ + [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ] + ], + "timestamp": "2023-12-04T18:01:20.393705142" + }, + "homo_sapiens - paired_end - starfusion - bam_transcript": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:27:56.082408" + }, + "homo_sapiens - paired_end - starfusion - tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,19c3faa1bfa9a0cc5e4c45f17065b53a" + ] + ] + ], + "timestamp": "2023-12-04T17:59:58.818041322" + }, + "homo_sapiens - single_end - fastq": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:22:55.175307" + }, + "homo_sapiens - paired_end - tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,844af19ab0fc8cd9a3f75228445aca0d" + ] + ] + ], + "timestamp": "2023-12-04T17:54:12.255481058" + }, + "homo_sapiens - paired_end - starfusion - bedgraph": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:27:56.155413" + }, + "homo_sapiens - single_end - bam_transcript": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:22:55.144852" + }, + "homo_sapiens - paired_end - versions": { + "content": [ + [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ] + ], + "timestamp": "2023-12-04T17:54:12.343840482" + }, + "homo_sapiens - paired_end - multiple - tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,069877e053714e23010fe4e1c003b4a2" + ] + ] + ], + "timestamp": "2023-12-04T18:01:20.291692062" + }, + "homo_sapiens - single_end - bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.Aligned.sortedByCoord.out.bam:md5,c6cfaccaf91bc7fdabed3cfe236d4535" + ] + ] + ], + "timestamp": "2023-12-04T17:53:26.265642675" + }, + "homo_sapiens - paired_end - arriba - wig": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:07.444214" + }, + "homo_sapiens - paired_end - log_progress": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,b2bd061d6cbaaf3d6d3b1fed547f69b8" + ] + ] + ], + "timestamp": "2023-12-04T17:54:12.126063825" + }, + "homo_sapiens - paired_end - arriba - log_final": { + "content": [ + "test.Log.final.out" + ], + "timestamp": "2023-11-23T13:25:06.829799" + }, + "homo_sapiens - paired_end - bam_unsorted": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:23:33.300509" + }, + "homo_sapiens - paired_end - arriba - sam": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:07.300383" + }, + "homo_sapiens - paired_end - multiple - bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,ab07c21d63ab0a6c07d171d213c81d5a" + ] + ] + ], + "timestamp": "2023-12-04T18:01:19.851247126" + }, + "homo_sapiens - paired_end - multiple - fastq": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:29:01.462257" + }, + "homo_sapiens - single_end - bam_sorted": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.Aligned.sortedByCoord.out.bam:md5,c6cfaccaf91bc7fdabed3cfe236d4535" + ] + ] + ], + "timestamp": "2023-12-04T17:53:26.335457371" + }, + "homo_sapiens - paired_end - arriba - bam_sorted": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:06.94699" + }, + "homo_sapiens - paired_end - starfusion - junction": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,c10ef219f4a30e83711b995bc5e40dba" + ] + ] + ], + "timestamp": "2023-12-04T17:59:58.641115828" + }, + "homo_sapiens - single_end - tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.SJ.out.tab:md5,75a516ab950fb958f40b29996474949c" + ] + ] + ], + "timestamp": "2023-12-04T17:53:26.580593434" + }, + "homo_sapiens - paired_end - starfusion - versions": { + "content": [ + [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ] + ], + "timestamp": "2023-12-04T17:59:58.907317103" + }, + "homo_sapiens - paired_end - multiple - bam_unsorted": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:29:01.330463" + }, + "homo_sapiens - paired_end - arriba - log_progress": { + "content": [ + "test.Log.progress.out" + ], + "timestamp": "2023-11-23T13:25:06.86866" + }, + "homo_sapiens - paired_end - bedgraph": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Signal.Unique.str1.out.bg:md5,d7bf8b70b436ca048a62513e1d0ece3a", + "test.Signal.UniqueMultiple.str1.out.bg:md5,686d58493b9eb445b56ace4d67f76ef6" + ] + ] + ] + ], + "timestamp": "2023-12-04T17:54:12.064121304" + }, + "homo_sapiens - paired_end - starfusion - bam_unsorted": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:27:56.118974" + }, + "homo_sapiens - paired_end - starfusion - read_per_gene_tab": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:27:56.264699" + }, + "homo_sapiens - paired_end - multiple - log_progress": { + "content": [ + "test.Log.progress.out" + ], + "timestamp": "2023-11-23T13:29:01.076947" + }, + "homo_sapiens - paired_end - arriba - bam_unsorted": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:07.050409" + }, + "homo_sapiens - paired_end - bam_sorted": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,b9ee1c607e07323bc1652ef3babb543f" + ] + ] + ], + "timestamp": "2023-12-04T17:54:12.002180537" + }, + "homo_sapiens - single_end - spl_junc_tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.SJ.out.tab:md5,75a516ab950fb958f40b29996474949c" + ] + ] + ], + "timestamp": "2023-12-04T17:53:26.50932751" + }, + "homo_sapiens - paired_end - starfusion - spl_junc_tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,19c3faa1bfa9a0cc5e4c45f17065b53a" + ] + ] + ], + "timestamp": "2023-12-04T17:59:58.731699486" + }, + "homo_sapiens - single_end - log_out": { + "content": [ + "test.Log.out" + ], + "timestamp": "2023-11-23T13:22:55.126286" + }, + "homo_sapiens - paired_end - log_final": { + "content": [ + "test.Log.final.out" + ], + "timestamp": "2023-11-23T13:23:33.253884" + }, + "homo_sapiens - single_end - log_final": { + "content": [ + "test.Log.final.out" + ], + "timestamp": "2023-11-23T13:22:55.11799" + }, + "homo_sapiens - paired_end - bam_transcript": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:23:33.287684" + }, + "homo_sapiens - paired_end - starfusion - log_progress": { + "content": [ + "test.Log.progress.out" + ], + "timestamp": "2023-11-23T13:27:55.971484" + }, + "homo_sapiens - paired_end - multiple - bam_transcript": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:29:01.264176" + }, + "homo_sapiens - paired_end - multiple - read_per_gene_tab": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:29:01.596406" + }, + "homo_sapiens - single_end - read_per_gene_tab": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:22:55.205936" + }, + "homo_sapiens - paired_end - junction": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:23:33.340653" + }, + "homo_sapiens - paired_end - spl_junc_tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,844af19ab0fc8cd9a3f75228445aca0d" + ] + ] + ], + "timestamp": "2023-12-04T17:54:12.185730856" + }, + "homo_sapiens - paired_end - starfusion - sam": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:27:56.300637" + }, + "homo_sapiens - paired_end - arriba - bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.out.bam:md5,c1b1747f5873f2d17762725636e891d5" + ] + ] + ], + "timestamp": "2023-12-04T17:56:12.190560178" + }, + "homo_sapiens - single_end - log_progress": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.progress.out:md5,b2bd061d6cbaaf3d6d3b1fed547f69b8" + ] + ] + ], + "timestamp": "2023-12-04T17:53:26.450352138" + }, + "homo_sapiens - paired_end - starfusion - wig": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:27:56.422018" + }, + "homo_sapiens - paired_end - wig": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:23:33.429457" + }, + "homo_sapiens - paired_end - starfusion - log_out": { + "content": [ + "test.Log.out" + ], + "timestamp": "2023-11-23T13:27:55.93945" + } +} \ No newline at end of file diff --git a/modules/nf-core/star/align/tests/nextflow.arriba.config b/modules/nf-core/star/align/tests/nextflow.arriba.config new file mode 100644 index 000000000..2324b9e58 --- /dev/null +++ b/modules/nf-core/star/align/tests/nextflow.arriba.config @@ -0,0 +1,14 @@ +process { + + withName: STAR_GENOMEGENERATE { + ext.args = '--genomeSAindexNbases 9' + } + + withName: STAR_ALIGN { + ext.args = '--readFilesCommand zcat --outSAMtype BAM Unsorted --outSAMunmapped Within --outBAMcompression 0 --outFilterMultimapNmax 50 --peOverlapNbasesMin 10 --alignSplicedMateMapLminOverLmate 0.5 --alignSJstitchMismatchNmax 5 -1 5 5 --chimSegmentMin 10 --chimOutType WithinBAM HardClip --chimJunctionOverhangMin 10 --chimScoreDropMax 30 --chimScoreJunctionNonGTAG 0 --chimScoreSeparation 1 --chimSegmentReadGapMax 3 --chimMultimapNmax 50' + } + +} + +// Fix chown issue for the output star folder +docker.runOptions = '--platform=linux/amd64 -u $(id -u):$(id -g)' diff --git a/modules/nf-core/star/align/tests/nextflow.config b/modules/nf-core/star/align/tests/nextflow.config new file mode 100644 index 000000000..c4ac58088 --- /dev/null +++ b/modules/nf-core/star/align/tests/nextflow.config @@ -0,0 +1,14 @@ +process { + + withName: STAR_GENOMEGENERATE { + ext.args = '--genomeSAindexNbases 9' + } + + withName: STAR_ALIGN { + ext.args = '--readFilesCommand zcat --outSAMtype BAM SortedByCoordinate --outWigType bedGraph --outWigStrand Unstranded' + } + +} + +// Fix chown issue for the output star folder +docker.runOptions = '--platform=linux/amd64 -u $(id -u):$(id -g)' diff --git a/modules/nf-core/star/align/tests/nextflow.starfusion.config b/modules/nf-core/star/align/tests/nextflow.starfusion.config new file mode 100644 index 000000000..467b64977 --- /dev/null +++ b/modules/nf-core/star/align/tests/nextflow.starfusion.config @@ -0,0 +1,14 @@ +process { + + withName: STAR_GENOMEGENERATE { + ext.args = '--genomeSAindexNbases 9' + } + + withName: STAR_ALIGN { + ext.args = '--readFilesCommand zcat --outSAMtype BAM Unsorted --outReadsUnmapped None --twopassMode Basic --outSAMstrandField intronMotif --outSAMunmapped Within --chimSegmentMin 12 --chimJunctionOverhangMin 8 --chimOutJunctionFormat 1 --alignSJDBoverhangMin 10 --alignMatesGapMax 100000 --alignIntronMax 100000 --alignSJstitchMismatchNmax 5 -1 5 5 --chimMultimapScoreRange 3 --chimScoreJunctionNonGTAG -4 --chimMultimapNmax 20 --chimNonchimScoreDropMin 10 --peOverlapNbasesMin 12 --peOverlapMMp 0.1 --alignInsertionFlush Right --alignSplicedMateMapLminOverLmate 0 --alignSplicedMateMapLmin 30' + } + +} + +// Fix chown issue for the output star folder +docker.runOptions = '--platform=linux/amd64 -u $(id -u):$(id -g)' diff --git a/modules/nf-core/star/align/tests/tags.yml b/modules/nf-core/star/align/tests/tags.yml new file mode 100644 index 000000000..8beace16e --- /dev/null +++ b/modules/nf-core/star/align/tests/tags.yml @@ -0,0 +1,2 @@ +star/align: + - modules/nf-core/star/align/** diff --git a/modules/nf-core/star/genomegenerate/environment.yml b/modules/nf-core/star/genomegenerate/environment.yml new file mode 100644 index 000000000..791f255e5 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/environment.yml @@ -0,0 +1,10 @@ +name: star_genomegenerate +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.18 + - bioconda::htslib=1.18 + - bioconda::star=2.7.10a + - conda-forge::gawk=5.1.0 diff --git a/modules/nf-core/star/genomegenerate/main.nf b/modules/nf-core/star/genomegenerate/main.nf index 0fe88cbfc..b8855715b 100644 --- a/modules/nf-core/star/genomegenerate/main.nf +++ b/modules/nf-core/star/genomegenerate/main.nf @@ -2,26 +2,27 @@ process STAR_GENOMEGENERATE { tag "$fasta" label 'process_high' - conda (params.enable_conda ? "bioconda::star=2.7.10a bioconda::samtools=1.16.1 conda-forge::gawk=5.1.0" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' : - 'quay.io/biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' : + 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' }" input: - path fasta - path gtf + tuple val(meta), path(fasta) + tuple val(meta2), path(gtf) output: - path "star" , emit: index - path "versions.yml", emit: versions + tuple val(meta), path("star") , emit: index + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def args_list = args.tokenize() - def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' + def args = task.ext.args ?: '' + def args_list = args.tokenize() + def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' + def include_gtf = gtf ? "--sjdbGTFfile $gtf" : '' if (args_list.contains('--genomeSAindexNbases')) { """ mkdir star @@ -29,7 +30,7 @@ process STAR_GENOMEGENERATE { --runMode genomeGenerate \\ --genomeDir star/ \\ --genomeFastaFiles $fasta \\ - --sjdbGTFfile $gtf \\ + $include_gtf \\ --runThreadN $task.cpus \\ $memory \\ $args @@ -51,7 +52,7 @@ process STAR_GENOMEGENERATE { --runMode genomeGenerate \\ --genomeDir star/ \\ --genomeFastaFiles $fasta \\ - --sjdbGTFfile $gtf \\ + $include_gtf \\ --runThreadN $task.cpus \\ --genomeSAindexNbases \$NUM_BASES \\ $memory \\ @@ -67,30 +68,52 @@ process STAR_GENOMEGENERATE { } stub: - """ - mkdir star - touch star/Genome - touch star/Log.out - touch star/SA - touch star/SAindex - touch star/chrLength.txt - touch star/chrName.txt - touch star/chrNameLength.txt - touch star/chrStart.txt - touch star/exonGeTrInfo.tab - touch star/exonInfo.tab - touch star/geneInfo.tab - touch star/genomeParameters.txt - touch star/sjdbInfo.txt - touch star/sjdbList.fromGTF.out.tab - touch star/sjdbList.out.tab - touch star/transcriptInfo.tab + if (gtf) { + """ + mkdir star + touch star/Genome + touch star/Log.out + touch star/SA + touch star/SAindex + touch star/chrLength.txt + touch star/chrName.txt + touch star/chrNameLength.txt + touch star/chrStart.txt + touch star/exonGeTrInfo.tab + touch star/exonInfo.tab + touch star/geneInfo.tab + touch star/genomeParameters.txt + touch star/sjdbInfo.txt + touch star/sjdbList.fromGTF.out.tab + touch star/sjdbList.out.tab + touch star/transcriptInfo.tab - cat <<-END_VERSIONS > versions.yml - "${task.process}": - star: \$(STAR --version | sed -e "s/STAR_//g") - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') - END_VERSIONS - """ + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } else { + """ + mkdir star + touch star/Genome + touch star/Log.out + touch star/SA + touch star/SAindex + touch star/chrLength.txt + touch star/chrName.txt + touch star/chrNameLength.txt + touch star/chrStart.txt + touch star/genomeParameters.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } } diff --git a/modules/nf-core/star/genomegenerate/meta.yml b/modules/nf-core/star/genomegenerate/meta.yml index 8181157a1..1061e1b8d 100644 --- a/modules/nf-core/star/genomegenerate/meta.yml +++ b/modules/nf-core/star/genomegenerate/meta.yml @@ -15,14 +15,28 @@ tools: doi: 10.1093/bioinformatics/bts635 licence: ["MIT"] input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - fasta: type: file description: Fasta file of the reference genome + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] - gtf: type: file description: GTF file of the reference genome - output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - index: type: directory description: Folder containing the star index files @@ -31,7 +45,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@kevinmenden" - "@drpatelh" +maintainers: + - "@kevinmenden" + - "@drpatelh" diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test b/modules/nf-core/star/genomegenerate/tests/main.nf.test new file mode 100644 index 000000000..c17c8ba45 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test @@ -0,0 +1,115 @@ +nextflow_process { + + name "Test Process STAR_GENOMEGENERATE" + script "../main.nf" + process "STAR_GENOMEGENERATE" + tag "modules" + tag "modules_nfcore" + tag "star" + tag "star/genomegenerate" + + test("fasta_gtf") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("fasta_gtf_index") }, + { assert snapshot(process.out.versions).match("fasta_gtf_versions") } + ) + } + } + + test("fasta_gtf_stub") { + + options '-stub' + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("fasta_gtf_stub_index") }, + { assert snapshot(process.out.versions).match("fasta_gtf_stub_versions") } + ) + } + } + + test("fasta") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ [], [] ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("fasta_index") }, + { assert snapshot(process.out.versions).match("fasta_versions") } + ) + } + + } + + test("fasta_stub") { + + options '-stub' + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ [], [] ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("fasta_stub_index") }, + { assert snapshot(process.out.versions).match("fasta_stub_versions") } + ) + } + + } + +} diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap new file mode 100644 index 000000000..5653d6e6c --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap @@ -0,0 +1,90 @@ +{ + "fasta_gtf_versions": { + "content": [ + [ + "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-01T15:54:31.798555" + }, + "fasta_stub_versions": { + "content": [ + [ + "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-01T15:55:07.521209" + }, + "fasta_gtf_stub_index": { + "content": [ + "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, exonGeTrInfo.tab, exonInfo.tab, geneInfo.tab, genomeParameters.txt, sjdbInfo.txt, sjdbList.fromGTF.out.tab, sjdbList.out.tab, transcriptInfo.tab]" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-01T15:54:46.478098" + }, + "fasta_gtf_stub_versions": { + "content": [ + [ + "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-01T15:54:46.491657" + }, + "fasta_index": { + "content": [ + "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, genomeParameters.txt]" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-01T15:54:57.552329" + }, + "fasta_versions": { + "content": [ + [ + "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-01T15:54:57.560541" + }, + "fasta_gtf_index": { + "content": [ + "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, exonGeTrInfo.tab, exonInfo.tab, geneInfo.tab, genomeParameters.txt, sjdbInfo.txt, sjdbList.fromGTF.out.tab, sjdbList.out.tab, transcriptInfo.tab]" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-01T15:54:31.786814" + }, + "fasta_stub_index": { + "content": [ + "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, genomeParameters.txt]" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-01T15:55:07.517472" + } +} \ No newline at end of file diff --git a/modules/nf-core/star/genomegenerate/tests/tags.yml b/modules/nf-core/star/genomegenerate/tests/tags.yml new file mode 100644 index 000000000..79f619bfe --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/tags.yml @@ -0,0 +1,2 @@ +star/genomegenerate: + - modules/nf-core/star/genomegenerate/** diff --git a/modules/nf-core/stringtie/stringtie/environment.yml b/modules/nf-core/stringtie/stringtie/environment.yml new file mode 100644 index 000000000..7a0eccdb8 --- /dev/null +++ b/modules/nf-core/stringtie/stringtie/environment.yml @@ -0,0 +1,7 @@ +name: stringtie_stringtie +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::stringtie=2.2.1 diff --git a/modules/nf-core/stringtie/stringtie/main.nf b/modules/nf-core/stringtie/stringtie/main.nf index b403edee6..6e25ba27d 100644 --- a/modules/nf-core/stringtie/stringtie/main.nf +++ b/modules/nf-core/stringtie/stringtie/main.nf @@ -2,10 +2,10 @@ process STRINGTIE_STRINGTIE { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::stringtie=2.2.1" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/stringtie:2.2.1--hecb563c_2' : - 'quay.io/biocontainers/stringtie:2.2.1--hecb563c_2' }" + 'biocontainers/stringtie:2.2.1--hecb563c_2' }" input: tuple val(meta), path(bam) diff --git a/modules/nf-core/stringtie/stringtie/meta.yml b/modules/nf-core/stringtie/stringtie/meta.yml index 75518470b..d8ebdd88a 100644 --- a/modules/nf-core/stringtie/stringtie/meta.yml +++ b/modules/nf-core/stringtie/stringtie/meta.yml @@ -5,7 +5,6 @@ keywords: - assembly - quantification - gtf - tools: - stringtie2: description: | @@ -55,3 +54,5 @@ output: pattern: "versions.yml" authors: - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/modules/nf-core/stringtie/stringtie/tests/main.nf.test b/modules/nf-core/stringtie/stringtie/tests/main.nf.test new file mode 100644 index 000000000..00efe8f1a --- /dev/null +++ b/modules/nf-core/stringtie/stringtie/tests/main.nf.test @@ -0,0 +1,116 @@ +nextflow_process { + + name "Test Process STRINGTIE_STRINGTIE" + script "../main.nf" + process "STRINGTIE_STRINGTIE" + tag "modules" + tag "modules_nfcore" + tag "stringtie" + tag "stringtie/stringtie" + + test("sarscov2 [bam] - forward strandedness") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test', strandedness:'forward' ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) ] + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.transcript_gtf).match("fs_transcript_gtf") }, + { assert snapshot(process.out.abundance).match("fs_abundance") }, + { assert snapshot(process.out.versions).match("fs_versions") } + ) + } + } + + test("sarscov2 [bam] - forward strandedness + reference annotation") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test', strandedness:'forward' ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) ] + ] + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gtf", checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.transcript_gtf).match("fs_gtf_transcript_gtf") }, + { assert snapshot(process.out.abundance).match("fs_gtf_abundance") }, + { assert snapshot(process.out.ballgown).match("fs_gtf_ballgown") }, + { assert snapshot(process.out.versions).match("fs_gtf_versions") } + ) + } + } + + test("sarscov2 [bam] - reverse strandedness") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test', strandedness:'reverse' ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) ] + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.transcript_gtf).match("rs_transcript_gtf") }, + { assert snapshot(process.out.abundance).match("rs_abundance") }, + { assert snapshot(process.out.versions).match("rs_versions") } + ) + } + } + + test("sarscov2 [bam] - reverse strandedness + reference annotation") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test', strandedness:'reverse' ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) ] + ] + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gtf", checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.transcript_gtf).match("rs_gtf_transcript_gtf") }, + { assert snapshot(process.out.abundance).match("rs_gtf_abundance") }, + { assert snapshot(process.out.ballgown).match("rs_gtf_ballgown") }, + { assert snapshot(process.out.versions).match("rs_gtf_versions") } + ) + } + } +} diff --git a/modules/nf-core/stringtie/stringtie/tests/main.nf.test.snap b/modules/nf-core/stringtie/stringtie/tests/main.nf.test.snap new file mode 100644 index 000000000..bf7516364 --- /dev/null +++ b/modules/nf-core/stringtie/stringtie/tests/main.nf.test.snap @@ -0,0 +1,186 @@ +{ + "fs_abundance": { + "content": [ + [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.gene.abundance.txt:md5,d6f5c8cadb8458f1df0427cf790246e3" + ] + ] + ], + "timestamp": "2023-11-23T13:55:41.032044613" + }, + "fs_transcript_gtf": { + "content": [ + [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.transcripts.gtf:md5,569137af5be452413086b50653a97203" + ] + ] + ], + "timestamp": "2023-11-23T13:55:41.017978904" + }, + "rs_abundance": { + "content": [ + [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.gene.abundance.txt:md5,d6f5c8cadb8458f1df0427cf790246e3" + ] + ] + ], + "timestamp": "2023-11-23T13:56:13.601112933" + }, + "fs_gtf_versions": { + "content": [ + [ + "versions.yml:md5,3410e8ac349d18c85ddee89337851d38" + ] + ], + "timestamp": "2023-11-23T13:56:00.523797974" + }, + "fs_gtf_transcript_gtf": { + "content": [ + [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.transcripts.gtf:md5,f56cf8aba2c4a5673bc7963ba5f12d04" + ] + ] + ], + "timestamp": "2023-11-23T13:56:00.475164879" + }, + "rs_versions": { + "content": [ + [ + "versions.yml:md5,3410e8ac349d18c85ddee89337851d38" + ] + ], + "timestamp": "2023-11-23T13:56:13.623892691" + }, + "rs_gtf_transcript_gtf": { + "content": [ + [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.transcripts.gtf:md5,bb346053a8c156b803b055133376c7fa" + ] + ] + ], + "timestamp": "2023-11-23T13:56:22.693599559" + }, + "fs_gtf_abundance": { + "content": [ + [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.gene.abundance.txt:md5,7d8bce7f2a922e367cedccae7267c22e" + ] + ] + ], + "timestamp": "2023-11-23T13:56:00.482135418" + }, + "rs_gtf_ballgown": { + "content": [ + [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + [ + "e2t.ctab:md5,e981c0038295ae54b63cedb1083f1540", + "e_data.ctab:md5,879b6696029d19c4737b562e9d149218", + "i2t.ctab:md5,8a117c8aa4334b4c2d4711932b006fb4", + "i_data.ctab:md5,be3abe09740603213f83d50dcf81427f", + "t_data.ctab:md5,3b66c065da73ae0dd41cc332eff6a818" + ] + ] + ] + ], + "timestamp": "2023-11-23T13:56:22.715698347" + }, + "rs_transcript_gtf": { + "content": [ + [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.transcripts.gtf:md5,31c34aec2bf36bb0ea3c16c2afeeeb1f" + ] + ] + ], + "timestamp": "2023-11-23T13:56:13.590054035" + }, + "rs_gtf_versions": { + "content": [ + [ + "versions.yml:md5,3410e8ac349d18c85ddee89337851d38" + ] + ], + "timestamp": "2023-11-23T13:56:22.725513476" + }, + "fs_gtf_ballgown": { + "content": [ + [ + [ + { + "id": "test", + "strandedness": "forward" + }, + [ + "e2t.ctab:md5,e981c0038295ae54b63cedb1083f1540", + "e_data.ctab:md5,6b4cf69bc03f3f69890f972a0e8b7471", + "i2t.ctab:md5,8a117c8aa4334b4c2d4711932b006fb4", + "i_data.ctab:md5,be3abe09740603213f83d50dcf81427f", + "t_data.ctab:md5,3b66c065da73ae0dd41cc332eff6a818" + ] + ] + ] + ], + "timestamp": "2023-11-23T13:56:00.494299817" + }, + "fs_versions": { + "content": [ + [ + "versions.yml:md5,3410e8ac349d18c85ddee89337851d38" + ] + ], + "timestamp": "2023-11-23T13:55:41.049417582" + }, + "rs_gtf_abundance": { + "content": [ + [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.gene.abundance.txt:md5,7385b870b955dae2c2ab78a70cf05cce" + ] + ] + ], + "timestamp": "2023-11-23T13:56:22.701059059" + } +} diff --git a/modules/nf-core/stringtie/stringtie/tests/nextflow.config b/modules/nf-core/stringtie/stringtie/tests/nextflow.config new file mode 100644 index 000000000..e3aaa0999 --- /dev/null +++ b/modules/nf-core/stringtie/stringtie/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'STRINGTIE_STRINGTIE' { + ext.args = '' + } +} diff --git a/modules/nf-core/stringtie/stringtie/tests/tags.yml b/modules/nf-core/stringtie/stringtie/tests/tags.yml new file mode 100644 index 000000000..da9b051c3 --- /dev/null +++ b/modules/nf-core/stringtie/stringtie/tests/tags.yml @@ -0,0 +1,2 @@ +stringtie/stringtie: + - modules/nf-core/stringtie/stringtie/** diff --git a/modules/nf-core/trimgalore/environment.yml b/modules/nf-core/trimgalore/environment.yml new file mode 100644 index 000000000..0981320c1 --- /dev/null +++ b/modules/nf-core/trimgalore/environment.yml @@ -0,0 +1,10 @@ +name: trimgalore + +channels: + - conda-forge + - bioconda + - defaults + +dependencies: + - bioconda::cutadapt=3.4 + - bioconda::trim-galore=0.6.7 diff --git a/modules/nf-core/trimgalore/main.nf b/modules/nf-core/trimgalore/main.nf index 5b45e4d7f..24ead8714 100644 --- a/modules/nf-core/trimgalore/main.nf +++ b/modules/nf-core/trimgalore/main.nf @@ -2,22 +2,21 @@ process TRIMGALORE { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? 'bioconda::trim-galore=0.6.7' : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/trim-galore:0.6.7--hdfd78af_0' : - 'quay.io/biocontainers/trim-galore:0.6.7--hdfd78af_0' }" + 'biocontainers/trim-galore:0.6.7--hdfd78af_0' }" input: tuple val(meta), path(reads) output: - tuple val(meta), path("*{trimmed,val}*.fq.gz"), emit: reads - tuple val(meta), path("*report.txt") , emit: log - path "versions.yml" , emit: versions - - tuple val(meta), path("*unpaired*.fq.gz") , emit: unpaired, optional: true - tuple val(meta), path("*.html") , emit: html , optional: true - tuple val(meta), path("*.zip") , emit: zip , optional: true + tuple val(meta), path("*{3prime,5prime,trimmed,val}*.fq.gz"), emit: reads + tuple val(meta), path("*report.txt") , emit: log , optional: true + tuple val(meta), path("*unpaired*.fq.gz") , emit: unpaired, optional: true + tuple val(meta), path("*.html") , emit: html , optional: true + tuple val(meta), path("*.zip") , emit: zip , optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -38,10 +37,12 @@ process TRIMGALORE { // Added soft-links to original fastqs for consistent naming in MultiQC def prefix = task.ext.prefix ?: "${meta.id}" if (meta.single_end) { + def args_list = args.split("\\s(?=--)").toList() + args_list.removeAll { it.toLowerCase().contains('_r2 ') } """ [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz trim_galore \\ - $args \\ + ${args_list.join(' ')} \\ --cores $cores \\ --gzip \\ ${prefix}.fastq.gz diff --git a/modules/nf-core/trimgalore/meta.yml b/modules/nf-core/trimgalore/meta.yml index 439f566df..e649088ce 100644 --- a/modules/nf-core/trimgalore/meta.yml +++ b/modules/nf-core/trimgalore/meta.yml @@ -36,7 +36,7 @@ output: description: | List of input adapter trimmed FastQ files of size 1 and 2 for single-end and paired-end data, respectively. - pattern: "*.{fq.gz}" + pattern: "*{3prime,5prime,trimmed,val}*.fq.gz" - unpaired: type: file description: | @@ -62,3 +62,7 @@ authors: - "@drpatelh" - "@ewels" - "@FelixKrueger" +maintainers: + - "@drpatelh" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/trimgalore/tests/main.nf.test b/modules/nf-core/trimgalore/tests/main.nf.test new file mode 100644 index 000000000..43904ac32 --- /dev/null +++ b/modules/nf-core/trimgalore/tests/main.nf.test @@ -0,0 +1,103 @@ +nextflow_process { + + name "Test Process TRIMGALORE" + script "../main.nf" + process "TRIMGALORE" + tag "modules" + tag "modules_nfcore" + tag "trimgalore" + + test("test_trimgalore_single_end") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true) ] + ] + """ + } + } + + then { + def read_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } + } + }, + { report1_lines.each { report1_line -> + { assert path(process.out.log.get(0).get(1)).getText().contains(report1_line) } + } + }, + { assert snapshot(process.out.versions).match() } + ) + } + } + + test("test_trimgalore_paired_end") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) + ] + ] + """ + } + } + + then { + def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { report1_lines.each { report1_line -> + { assert path(process.out.log.get(0).get(1).get(0)).getText().contains(report1_line) } + } + }, + { report2_lines.each { report2_line -> + { assert path(process.out.log.get(0).get(1).get(1)).getText().contains(report2_line) } + } + }, + { assert snapshot(process.out.versions).match() } + ) + } + } +} diff --git a/modules/nf-core/trimgalore/tests/main.nf.test.snap b/modules/nf-core/trimgalore/tests/main.nf.test.snap new file mode 100644 index 000000000..082c55004 --- /dev/null +++ b/modules/nf-core/trimgalore/tests/main.nf.test.snap @@ -0,0 +1,26 @@ +{ + "test_trimgalore_single_end": { + "content": [ + [ + "versions.yml:md5,47d966cbb31c80eb8f7fe860d55659b7" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-29T16:33:20.401347" + }, + "test_trimgalore_paired_end": { + "content": [ + [ + "versions.yml:md5,47d966cbb31c80eb8f7fe860d55659b7" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-29T16:33:28.960497" + } +} \ No newline at end of file diff --git a/modules/nf-core/trimgalore/tests/tags.yml b/modules/nf-core/trimgalore/tests/tags.yml new file mode 100644 index 000000000..e9937691a --- /dev/null +++ b/modules/nf-core/trimgalore/tests/tags.yml @@ -0,0 +1,2 @@ +trimgalore: + - modules/nf-core/trimgalore/** diff --git a/modules/nf-core/tximeta/tximport/environment.yml b/modules/nf-core/tximeta/tximport/environment.yml new file mode 100644 index 000000000..24b202222 --- /dev/null +++ b/modules/nf-core/tximeta/tximport/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "tximeta_tximport" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::bioconductor-tximeta=1.20.1" diff --git a/modules/nf-core/tximeta/tximport/main.nf b/modules/nf-core/tximeta/tximport/main.nf new file mode 100644 index 000000000..b0cce8536 --- /dev/null +++ b/modules/nf-core/tximeta/tximport/main.nf @@ -0,0 +1,47 @@ +process TXIMETA_TXIMPORT { + label "process_medium" + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bioconductor-tximeta%3A1.20.1--r43hdfd78af_0' : + 'biocontainers/bioconductor-tximeta:1.20.1--r43hdfd78af_0' }" + + input: + tuple val(meta), path("quants/*") + tuple val(meta2), path(tx2gene) + val quant_type + + output: + tuple val(meta), path("*gene_tpm.tsv") , emit: tpm_gene + tuple val(meta), path("*gene_counts.tsv") , emit: counts_gene + tuple val(meta), path("*gene_counts_length_scaled.tsv"), emit: counts_gene_length_scaled + tuple val(meta), path("*gene_counts_scaled.tsv") , emit: counts_gene_scaled + tuple val(meta), path("*gene_lengths.tsv") , emit: lengths_gene + tuple val(meta), path("*transcript_tpm.tsv") , emit: tpm_transcript + tuple val(meta), path("*transcript_counts.tsv") , emit: counts_transcript + tuple val(meta), path("*transcript_lengths.tsv") , emit: lengths_transcript + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + template 'tximport.r' + + stub: + """ + touch ${meta.id}.gene_tpm.tsv + touch ${meta.id}.gene_counts.tsv + touch ${meta.id}.gene_counts_length_scaled.tsv + touch ${meta.id}.gene_counts_scaled.tsv + touch ${meta.id}.gene_lengths.tsv + touch ${meta.id}.transcript_tpm.tsv + touch ${meta.id}.transcript_counts.tsv + touch ${meta.id}.transcript_lengths.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bioconductor-tximeta: \$(Rscript -e "library(tximeta); cat(as.character(packageVersion('tximeta')))") + END_VERSIONS + """ +} diff --git a/modules/nf-core/tximeta/tximport/meta.yml b/modules/nf-core/tximeta/tximport/meta.yml new file mode 100644 index 000000000..9ee5fd365 --- /dev/null +++ b/modules/nf-core/tximeta/tximport/meta.yml @@ -0,0 +1,120 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "tximeta_tximport" +description: | + Import transcript-level abundances and estimated counts for gene-level + analysis packages +keywords: + - gene + - kallisto + - pseudoalignment + - salmon + - transcript +tools: + - "tximeta": + description: "Transcript Quantification Import with Automatic Metadata" + homepage: "https://bioconductor.org/packages/release/bioc/html/tximeta.html" + documentation: "https://bioconductor.org/packages/release/bioc/vignettes/tximeta/inst/doc/tximeta.html" + tool_dev_url: "https://github.com/thelovelab/tximeta" + doi: "10.1371/journal.pcbi.1007664" + licence: ["GPL-2"] + +input: + - meta: + type: map + description: | + Groovy Map containing information related to the experiment as a whole + e.g. `[ id:'SRP123456' ]` + - quants: + type: directory + description: Paths to subdirectories corresponding to + sample-wise runs of Salmon or Kallisto + - meta2: + type: map + description: | + Groovy Map containing reference information related to the species + reference e.g. `[ id:'yeast' ]` + - tx2gene: + type: file + description: A transcript to gene mapping table such as those generated + by custom/tx2gene + pattern: "*.{csv,tsv}" + - meta3: + type: map + description: | + Groovy Map containing information related to the experiment as a whole + e.g. `[ id:'SRP123456' ]` + - coldata: + type: file + description: | + Optional 'coldata' file equivalent to a sample sheet where the first + column corresponds to the sample names (directory names in the input + salmon/ kallisto results) + pattern: "*.{csv,tsv}" + - quant_type: + type: string + description: Quantification type, 'kallisto' or 'salmon' + +output: + - meta: + type: map + description: | + Groovy Map containing information related to the experiment as a whole + e.g. `[ id:'SRP123456' ]` + - tpm_gene: + type: file + description: | + Abundance (TPM) values derived from tximport output after + summarizeToGene(), without a 'countsFromAbundance' specification + pattern: "*gene_tpm.tsv" + - counts_gene: + type: file + description: | + Count values derived from tximport output after + summarizeToGene(), without a 'countsFromAbundance' specification + pattern: "*gene_counts.tsv" + - counts_gene_length_scaled: + type: file + description: | + Count values derived from tximport output after summarizeToGene(), with + a 'countsFromAbundance' specification of 'lengthScaledTPM' + pattern: "*gene_counts_length_scaled.tsv" + - counts_gene_scaled: + type: file + description: | + Count values derived from tximport output after summarizeToGene(), with + a 'countsFromAbundance' specification of 'scaledTPM' + pattern: "*gene_counts_scaled.tsv" + - lengths_gene: + type: file + description: | + Length values derived from tximport output after summarizeToGene(), + without a 'countsFromAbundance' specification + pattern: "*gene_lengths.tsv" + - tpm_transcript: + type: file + description: | + Abundance (TPM) values derived from tximport output without + summarizeToGene(), without a 'countsFromAbundance' specification + pattern: "*transcript_tpm.tsv" + - counts_transcript: + type: file + description: | + Count values derived from tximport output without + summarizeToGene(), without a 'countsFromAbundance' specification + pattern: "*transcript_counts.tsv" + - lengths_transcript: + type: file + description: | + Length values derived from tximport output without summarizeToGene(), + without a 'countsFromAbundance' specification + pattern: "*gene_lengths.tsv" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@pinin4fjords" +maintainers: + - "@pinin4fjords" diff --git a/modules/nf-core/tximeta/tximport/templates/tximport.r b/modules/nf-core/tximeta/tximport/templates/tximport.r new file mode 100755 index 000000000..40d79eb93 --- /dev/null +++ b/modules/nf-core/tximeta/tximport/templates/tximport.r @@ -0,0 +1,218 @@ +#!/usr/bin/env Rscript --vanilla + +# Script for importing and processing transcript-level quantifications. +# Written by Lorena Pantano, later modified by Jonathan Manning, and released +# under the MIT license. + +# Loading required libraries +library(SummarizedExperiment) +library(tximport) + +################################################ +################################################ +## Functions ## +################################################ +################################################ + +#' Build a table from a SummarizedExperiment object +#' +#' This function takes a SummarizedExperiment object and a specific slot name to extract +#' assay data. It then combines the first two columns of the rowData with the specified +#' assay data slot into a new data table. +#' +#' @param se.obj A SummarizedExperiment object from which to build the table. +#' @param slot The name of the slot in the assays list from which to extract data. +#' +#' @return A data frame combining the first two columns of the rowData with the assay data from the specified slot. + +build_table <- function(se.obj, slot) { + cbind(rowData(se.obj)[,1:2], assays(se.obj)[[slot]]) +} + +#' Write a table to a file from a SummarizedExperiment object with given parameters +#' +#' This function generates a table from a SummarizedExperiment object using specified parameters +#' and writes the resulting table to a file. The file name is constructed using a prefix and a +#' suffix from the parameters, and the table is written with tab separation, without quoting text, +#' and without row names. +#' +#' @param params A list containing the parameters needed for file generation and table writing. +#' The list should include: +#' - `obj`: A SummarizedExperiment object from which to build the table. +#' - `slot`: The name of the slot in the assays list from which to extract data. +#' - `suffix`: Suffix to use for generating the file name. +#' +#' @return NULL The function is called for its side effect of writing a file and does not return anything. + +write_se_table <- function(params, prefix) { + file_name <- paste0(prefix, ".", params\$suffix) + write.table(build_table(params\$obj, params\$slot), file_name, + sep="\t", quote=FALSE, row.names = FALSE) +} + +#' Read Transcript Metadata from a Given Path +#' +#' This function reads transcript metadata from a specified file path. The file is expected to +#' be a tab-separated values file without headers, containing transcript information. The function +#' checks if the file is empty and stops execution with an error message if so. It reads the file +#' into a data frame, expecting columns for transcript IDs, gene IDs, and gene names. Additional +#' processing is done to ensure compatibility with a predefined data structure (e.g., `txi[[1]]`), +#' including adding missing entries and reordering based on the transcript IDs found in `txi[[1]]`. +#' +#' @param tinfo_path The file path to the transcript information file. +#' +#' @return A list containing three elements: +#' - `transcript`: A data frame with transcript IDs, gene IDs, and gene names, indexed by transcript IDs. +#' - `gene`: A data frame with unique gene IDs and gene names. +#' - `tx2gene`: A data frame mapping transcript IDs to gene IDs. + +read_transcript_info <- function(tinfo_path){ + info <- file.info(tinfo_path) + if (info\$size == 0) { + stop("tx2gene file is empty") + } + + transcript_info <- read.csv(tinfo_path, sep="\t", header = TRUE, + col.names = c("tx", "gene_id", "gene_name")) + + extra <- setdiff(rownames(txi[[1]]), as.character(transcript_info[["tx"]])) + transcript_info <- rbind(transcript_info, data.frame(tx=extra, gene_id=extra, gene_name=extra)) + transcript_info <- transcript_info[match(rownames(txi[[1]]), transcript_info[["tx"]]), ] + rownames(transcript_info) <- transcript_info[["tx"]] + + list(transcript = transcript_info, + gene = unique(transcript_info[,2:3]), + tx2gene = transcript_info[,1:2]) +} + +#' Create a SummarizedExperiment Object +#' +#' Constructs a SummarizedExperiment object using provided matrices for counts, abundance, and length, +#' along with metadata for columns and rows. This function facilitates the organization of experimental +#' data (e.g., RNA-seq or other high-throughput data) in a structured format that is convenient for +#' further analyses and visualization. +#' +#' @param counts A matrix or DataFrame containing counts data, with rows as features (e.g., genes) and +#' columns as samples. +#' @param abundance A matrix or DataFrame containing abundance data (e.g., TPM or FPKM) with the same +#' dimensions and row/column names as the counts data. +#' @param length A matrix or DataFrame containing feature lengths, matching the dimensions and row/column +#' names of the counts data. +#' @param col_data A DataFrame containing sample-level metadata, with rows corresponding to columns in the +#' counts, abundance, and length matrices. +#' @param row_data A DataFrame containing feature-level metadata, with rows corresponding to features in +#' the counts, abundance, and length matrices. +#' +#' @return A SummarizedExperiment object containing the supplied data and metadata. + +create_summarized_experiment <- function(counts, abundance, length, col_data, row_data) { + SummarizedExperiment(assays = list(counts = counts, abundance = abundance, length = length), + colData = col_data, + rowData = row_data) +} + +################################################ +################################################ +## Main script starts here ## +################################################ +################################################ + +# Define pattern for file names based on quantification type +pattern <- ifelse('$quant_type' == "kallisto", "abundance.tsv", "quant.sf") +fns <- list.files('quants', pattern = pattern, recursive = T, full.names = T) +names <- basename(dirname(fns)) +names(fns) <- names +dropInfReps <- '$quant_type' == "kallisto" + +# Import transcript-level quantifications +txi <- tximport(fns, type = '$quant_type', txOut = TRUE, dropInfReps = dropInfReps) + +# Read transcript and sample data +transcript_info <- read_transcript_info('$tx2gene') + +# Make coldata just to appease the summarizedexperiment +coldata <- data.frame(files = fns, names = names) +rownames(coldata) <- coldata[["names"]] + +# Create initial SummarizedExperiment object +se <- create_summarized_experiment(txi[["counts"]], txi[["abundance"]], txi[["length"]], + DataFrame(coldata), transcript_info\$transcript) + +# Setting parameters for writing tables +params <- list( + list(obj = se, slot = "abundance", suffix = "transcript_tpm.tsv"), + list(obj = se, slot = "counts", suffix = "transcript_counts.tsv"), + list(obj = se, slot = "length", suffix = "transcript_lengths.tsv") +) + +# Process gene-level data if tx2gene mapping is available +if ("tx2gene" %in% names(transcript_info) && !is.null(transcript_info\$tx2gene)) { + tx2gene <- transcript_info\$tx2gene + gi <- summarizeToGene(txi, tx2gene = tx2gene) + gi.ls <- summarizeToGene(txi, tx2gene = tx2gene, countsFromAbundance = "lengthScaledTPM") + gi.s <- summarizeToGene(txi, tx2gene = tx2gene, countsFromAbundance = "scaledTPM") + + gene_info <- transcript_info\$gene[match(rownames(gi[[1]]), transcript_info\$gene[["gene_id"]]),] + rownames(gene_info) <- gene_info[["tx"]] + + col_data_frame <- DataFrame(coldata) + + # Create gene-level SummarizedExperiment objects + gse <- create_summarized_experiment(gi[["counts"]], gi[["abundance"]], gi[["length"]], + col_data_frame, gene_info) + gse.ls <- create_summarized_experiment(gi.ls[["counts"]], gi.ls[["abundance"]], gi.ls[["length"]], + col_data_frame, gene_info) + gse.s <- create_summarized_experiment(gi.s[["counts"]], gi.s[["abundance"]], gi.s[["length"]], + col_data_frame, gene_info) + + params <- c(params, list( + list(obj = gse, slot = "length", suffix = "gene_lengths.tsv"), + list(obj = gse, slot = "abundance", suffix = "gene_tpm.tsv"), + list(obj = gse, slot = "counts", suffix = "gene_counts.tsv"), + list(obj = gse.ls, slot = "counts", suffix = "gene_counts_length_scaled.tsv"), + list(obj = gse.s, slot = "counts", suffix = "gene_counts_scaled.tsv") + )) +} + +# Writing tables for each set of parameters + +prefix <- '' +if ('$task.ext.prefix' != 'null'){ + prefix = '$task.ext.prefix' +} else if ('$meta.id' != 'null'){ + prefix = '$meta.id' +} + +done <- lapply(params, write_se_table, prefix) + +################################################ +################################################ +## R SESSION INFO ## +################################################ +################################################ + +sink(paste(prefix, "R_sessionInfo.log", sep = '.')) +citation("tximeta") +print(sessionInfo()) +sink() + +################################################ +################################################ +## VERSIONS FILE ## +################################################ +################################################ + +r.version <- strsplit(version[['version.string']], ' ')[[1]][3] +tximeta.version <- as.character(packageVersion('tximeta')) + +writeLines( + c( + '"${task.process}":', + paste(' bioconductor-tximeta:', tximeta.version) + ), +'versions.yml') + +################################################ +################################################ +################################################ +################################################ diff --git a/modules/nf-core/tximeta/tximport/tests/main.nf.test b/modules/nf-core/tximeta/tximport/tests/main.nf.test new file mode 100644 index 000000000..5cf6af83e --- /dev/null +++ b/modules/nf-core/tximeta/tximport/tests/main.nf.test @@ -0,0 +1,193 @@ +nextflow_process { + + name "Test Process TXIMETA_TXIMPORT" + script "../main.nf" + process "TXIMETA_TXIMPORT" + + tag "modules" + tag "modules_nfcore" + tag "custom/tx2gene" + tag "tximeta" + tag "tximeta/tximport" + tag "untar" + + test("saccharomyces_cerevisiae - kallisto - gtf") { + + setup { + + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/eukaryotes/saccharomyces_cerevisiae/kallisto_results.tar.gz', checkIfExists: true) + ]) + """ + } + } + run("CUSTOM_TX2GENE") { + script "../../../custom/tx2gene/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/eukaryotes/saccharomyces_cerevisiae/genome_gfp.gtf', checkIfExists: true) + ]) + input[1] = UNTAR.out.untar.map { meta, dir -> [ meta, dir.listFiles().collect() ] } + input[2] = 'kallisto' + input[3] = 'gene_id' + input[4] = 'gene_name' + """ + } + } + } + + when { + process { + """ + input[0] = UNTAR.out.untar.map { meta, dir -> [ meta, dir.listFiles().collect() ] } + input[1] = CUSTOM_TX2GENE.out.tx2gene + input[2] = 'kallisto' + """ + } + } + + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.counts_gene).match('counts_gene_kallisto') }, + { assert snapshot(process.out.counts_gene_length_scaled).match('counts_gene_length_scaled_kallisto') }, + { assert snapshot(process.out.counts_gene_scaled).match('counts_gene_scaled_kallisto') }, + { assert snapshot(process.out.counts_transcript).match('counts_transcript_kallisto') }, + { assert snapshot(process.out.lengths_gene).match('lengths_gene_kallisto') }, + { assert snapshot(process.out.lengths_transcript).match('lengths_transcript_kallisto') }, + { assert snapshot(process.out.tpm_gene).match('tpm_gene_kallisto') }, + { assert snapshot(process.out.tpm_transcript).match('tpm_transcript_kallisto') }, + { assert snapshot(process.out.versions).match('versions_kallisto') } + ) + } + } + + test("saccharomyces_cerevisiae - kallisto - gtf - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ [], [] ]) + input[1] = Channel.of([ [], [] ]) + input[2] = 'kallisto' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.counts_gene).match('counts_gene_kallisto - stub') }, + { assert snapshot(process.out.counts_gene_length_scaled).match('counts_gene_length_scaled_kallisto - stub') }, + { assert snapshot(process.out.counts_gene_scaled).match('counts_gene_scaled_kallisto - stub') }, + { assert snapshot(process.out.counts_transcript).match('counts_transcript_kallisto - stub') }, + { assert snapshot(process.out.lengths_gene).match('lengths_gene_kallisto - stub') }, + { assert snapshot(process.out.lengths_transcript).match('lengths_transcript_kallisto - stub') }, + { assert snapshot(process.out.tpm_gene).match('tpm_gene_kallisto - stub') }, + { assert snapshot(process.out.tpm_transcript).match('tpm_transcript_kallisto - stub') }, + { assert snapshot(process.out.versions).match('versions_kallisto - stub') } + ) + } + + } + test("saccharomyces_cerevisiae - salmon - gtf") { + + setup { + + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/eukaryotes/saccharomyces_cerevisiae/salmon_results.tar.gz', checkIfExists: true) + ]) + """ + } + } + run("CUSTOM_TX2GENE") { + script "../../../custom/tx2gene/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/eukaryotes/saccharomyces_cerevisiae/genome_gfp.gtf', checkIfExists: true) + ]) + input[1] = UNTAR.out.untar.map { meta, dir -> [ meta, dir.listFiles().collect() ] } + input[2] = 'salmon' + input[3] = 'gene_id' + input[4] = 'gene_name' + """ + } + } + } + + when { + process { + """ + input[0] = UNTAR.out.untar.map { meta, dir -> [ meta, dir.listFiles().collect() ] } + input[1] = CUSTOM_TX2GENE.out.tx2gene + input[2] = 'salmon' + """ + } + } + + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.counts_gene).match('counts_gene_salmon') }, + { assert snapshot(process.out.counts_gene_length_scaled).match('counts_gene_length_scaled_salmon') }, + { assert snapshot(process.out.counts_gene_scaled).match('counts_gene_scaled_salmon') }, + { assert snapshot(process.out.counts_transcript).match('counts_transcript_salmon') }, + { assert snapshot(process.out.lengths_gene).match('lengths_gene_salmon') }, + { assert snapshot(process.out.lengths_transcript).match('lengths_transcript_salmon') }, + { assert snapshot(process.out.tpm_gene).match('tpm_gene_salmon') }, + { assert snapshot(process.out.tpm_transcript).match('tpm_transcript_salmon') }, + { assert snapshot(process.out.versions).match('versions_salmon') } + ) + } + + } + + test("saccharomyces_cerevisiae - salmon - gtf - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ [], [] ]) + input[1] = Channel.of([ [], [] ]) + input[2] = 'salmon' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.counts_gene).match('counts_gene_salmon - stub') }, + { assert snapshot(process.out.counts_gene_length_scaled).match('counts_gene_length_scaled_salmon - stub') }, + { assert snapshot(process.out.counts_gene_scaled).match('counts_gene_scaled_salmon - stub') }, + { assert snapshot(process.out.counts_transcript).match('counts_transcript_salmon - stub') }, + { assert snapshot(process.out.lengths_gene).match('lengths_gene_salmon - stub') }, + { assert snapshot(process.out.lengths_transcript).match('lengths_transcript_salmon - stub') }, + { assert snapshot(process.out.tpm_gene).match('tpm_gene_salmon - stub') }, + { assert snapshot(process.out.tpm_transcript).match('tpm_transcript_salmon - stub') }, + { assert snapshot(process.out.versions).match('versions_salmon - stub') } + ) + } + } +} + diff --git a/modules/nf-core/tximeta/tximport/tests/main.nf.test.snap b/modules/nf-core/tximeta/tximport/tests/main.nf.test.snap new file mode 100644 index 000000000..3cd0ee9e4 --- /dev/null +++ b/modules/nf-core/tximeta/tximport/tests/main.nf.test.snap @@ -0,0 +1,594 @@ +{ + "tpm_transcript_salmon - stub": { + "content": [ + [ + [ + [ + + ], + "[].transcript_tpm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:35:50.683744" + }, + "lengths_gene_kallisto - stub": { + "content": [ + [ + [ + [ + + ], + "[].gene_lengths.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:35:16.126128" + }, + "counts_gene_scaled_salmon - stub": { + "content": [ + [ + [ + [ + + ], + "[].gene_counts_scaled.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:35:50.654405" + }, + "counts_gene_kallisto - stub": { + "content": [ + [ + [ + [ + + ], + "[].gene_counts.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:35:16.112898" + }, + "lengths_transcript_salmon - stub": { + "content": [ + [ + [ + [ + + ], + "[].transcript_lengths.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:35:50.67148" + }, + "versions_salmon - stub": { + "content": [ + [ + "versions.yml:md5,6ff317cceddc686f84d79cb976e1e28b" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:35:50.690592" + }, + "counts_gene_length_scaled_kallisto": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.gene_counts_length_scaled.tsv:md5,4944841ac711124d29673b6b6ed16ef3" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:34:59.621599" + }, + "lengths_transcript_salmon": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.transcript_lengths.tsv:md5,db6d8ab9f8e1123d5984fd534b4347dc" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:35:32.876208" + }, + "counts_transcript_kallisto": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.transcript_counts.tsv:md5,42e0106e75fa97c1c684c6d9060f1724" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:34:59.62725" + }, + "counts_transcript_kallisto - stub": { + "content": [ + [ + [ + [ + + ], + "[].transcript_counts.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:35:16.122852" + }, + "counts_transcript_salmon": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.transcript_counts.tsv:md5,ff0f5be09ca7a322672c0074ba35da17" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:35:32.866731" + }, + "lengths_gene_salmon - stub": { + "content": [ + [ + [ + [ + + ], + "[].gene_lengths.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:35:50.6654" + }, + "tpm_gene_salmon": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.gene_tpm.tsv:md5,6076364cc78741a4f8bc8935a045d13d" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:35:32.881193" + }, + "tpm_transcript_salmon": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.transcript_tpm.tsv:md5,7a334b565e1e865efb1caf615f194ef7" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:35:32.886363" + }, + "tpm_gene_salmon - stub": { + "content": [ + [ + [ + [ + + ], + "[].gene_tpm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:35:50.677538" + }, + "lengths_transcript_kallisto": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.transcript_lengths.tsv:md5,f974b52840431a5dae57bcb615badbf1" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:34:59.632822" + }, + "counts_gene_length_scaled_kallisto - stub": { + "content": [ + [ + [ + [ + + ], + "[].gene_counts_length_scaled.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:35:16.11652" + }, + "tpm_gene_kallisto - stub": { + "content": [ + [ + [ + [ + + ], + "[].gene_tpm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:35:16.133742" + }, + "counts_transcript_salmon - stub": { + "content": [ + [ + [ + [ + + ], + "[].transcript_counts.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:35:50.660144" + }, + "counts_gene_scaled_kallisto": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.gene_counts_scaled.tsv:md5,39d14e361434978b3cadae901a26a028" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:34:59.624732" + }, + "counts_gene_salmon": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.gene_counts.tsv:md5,c14cab7e15cfac73ec0602dc2c404551" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:35:32.852188" + }, + "versions_salmon": { + "content": [ + [ + "versions.yml:md5,6ff317cceddc686f84d79cb976e1e28b" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:35:32.892224" + }, + "counts_gene_length_scaled_salmon": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.gene_counts_length_scaled.tsv:md5,5f92a6784f6edc5e3b336c71c3ee7daf" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:35:32.857451" + }, + "tpm_gene_kallisto": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.gene_tpm.tsv:md5,85d108269769ae0d841247b9b9ed922d" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:34:59.636454" + }, + "lengths_transcript_kallisto - stub": { + "content": [ + [ + [ + [ + + ], + "[].transcript_lengths.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:35:16.129712" + }, + "lengths_gene_kallisto": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.gene_lengths.tsv:md5,db6becdf807fd164a9c63dd1dd916d9c" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:34:59.630042" + }, + "counts_gene_scaled_kallisto - stub": { + "content": [ + [ + [ + [ + + ], + "[].gene_counts_scaled.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:35:16.119638" + }, + "tpm_transcript_kallisto": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.transcript_tpm.tsv:md5,65862ed9d4a05abfab952e680dc0e49d" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:34:59.639525" + }, + "lengths_gene_salmon": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.gene_lengths.tsv:md5,1691ea2677612805cd699265c83024d7" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:35:32.871162" + }, + "counts_gene_length_scaled_salmon - stub": { + "content": [ + [ + [ + [ + + ], + "[].gene_counts_length_scaled.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:35:50.605613" + }, + "counts_gene_kallisto": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.gene_counts.tsv:md5,e89c28692ea214396b2d4cb702a804c3" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:34:59.61832" + }, + "versions_kallisto": { + "content": [ + [ + "versions.yml:md5,6ff317cceddc686f84d79cb976e1e28b" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:34:59.642751" + }, + "counts_gene_salmon - stub": { + "content": [ + [ + [ + [ + + ], + "[].gene_counts.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:35:50.598457" + }, + "versions_kallisto - stub": { + "content": [ + [ + "versions.yml:md5,6ff317cceddc686f84d79cb976e1e28b" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:35:16.141689" + }, + "tpm_transcript_kallisto - stub": { + "content": [ + [ + [ + [ + + ], + "[].transcript_tpm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:35:16.137716" + }, + "counts_gene_scaled_salmon": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.gene_counts_scaled.tsv:md5,fdfb3d23aaf5d4316d81247ec4664ca0" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T12:35:32.862272" + } +} \ No newline at end of file diff --git a/modules/nf-core/tximeta/tximport/tests/tags.yml b/modules/nf-core/tximeta/tximport/tests/tags.yml new file mode 100644 index 000000000..fc96a89e0 --- /dev/null +++ b/modules/nf-core/tximeta/tximport/tests/tags.yml @@ -0,0 +1,2 @@ +tximeta/tximport: + - "modules/nf-core/tximeta/tximport/**" diff --git a/nextflow.config b/nextflow.config index 2120cb545..ff6b0bbcb 100644 --- a/nextflow.config +++ b/nextflow.config @@ -7,37 +7,33 @@ */ // Global default params, used in configs -// TODO: tidy up after workflow is running smoothly params { // Input options input = null - outdir = './results' + outdir = null phenotype = null + annotation = null + mirna_expression = null // workflow options - module = 'circrna_discovery' - tool = 'circexplorer2' - bsj_reads = 0 - tool_filter = 0 - duplicates_fun = 'mean' - exon_boundary = 200 - save_intermediates = true + tools = 'circexplorer2' + bsj_reads = 1 + tool_filter = 1 + exon_boundary = 0 + save_intermediates = false - // reference genome options + // References genome = null - igenomes_base = 's3://ngi-igenomes/igenomes' + igenomes_base = 's3://ngi-igenomes/igenomes/' igenomes_ignore = false bowtie = null bowtie2 = null bwa = null + star = null hisat2 = null hisat2_build_memory = '200.GB' segemehl = null - star = null - gtf = null - mature = null - species = null save_reference = true // Trimming @@ -57,6 +53,7 @@ params { alignSJDBoverhangMin = 10 chimSegmentMin = 10 sjdboverhang = 100 + limitSjdbInsertNsj = 1000000 //> MAPSPLICE seglen = 25 @@ -65,6 +62,17 @@ params { min_map_len = 40 min_fusion_distance = 200 + //> Quantification + bootstrap_samples = 30 + + //> MIRNA processing + mirna_expression = null + mirna_min_reads = 5 + mirna_min_sample_percentage = 0.2 + mirna_tools = 'miranda,targetscan' + mirna_tool_filter = 1 + mirna_correlation = 'pearson' + //> MISC save_unaligned = false @@ -79,8 +87,6 @@ params { multiqc_methods_description = null // Boilerplate options - outdir = './results' - tracedir = "${params.outdir}/pipeline_info" publish_dir_mode = 'copy' email = null email_on_fail = null @@ -89,27 +95,28 @@ params { hook_url = null help = false version = false - validate_params = true - show_hidden_params = false - schema_ignore_params = 'genomes' - enable_conda = false - // Config options + config_profile_name = null + config_profile_description = null custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_description = null config_profile_contact = null config_profile_url = null - config_profile_name = null - // Max resource options - // Defaults only, expecting to be overwritten - max_memory = '128.GB' - max_cpus = 16 + max_memory = '300.GB' + max_cpus = 50 max_time = '240.h' + test_data_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/circrna' + + // Schema validation default options + validationFailUnrecognisedParams = false + validationLenientMode = false + validationSchemaIgnoreParams = 'genomes,igenomes_base,test_data_base' + validationShowHiddenParams = false + validate_params = true } // Load base.config by default for all pipelines @@ -123,84 +130,129 @@ try { } // Load nf-core/circrna custom profiles from different institutions. -// Warning: Uncomment only if a pipeline-specific instititutional config already exists on nf-core/configs! -// try { -// includeConfig "${params.custom_config_base}/pipeline/circrna.config" -// } catch (Exception e) { -// System.err.println("WARNING: Could not load nf-core/config/circrna profiles: ${params.custom_config_base}/pipeline/circrna.config") -// } - - +try { + includeConfig "${params.custom_config_base}/pipeline/circrna.config" +} catch (Exception e) { + System.err.println("WARNING: Could not load nf-core/config/circrna profiles: ${params.custom_config_base}/pipeline/circrna.config") +} profiles { - debug { process.beforeScript = 'echo $HOSTNAME' } + debug { + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false + nextflow.enable.configProcessNamesValidation = true + } conda { - params.enable_conda = true - conda.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + conda.channels = ['conda-forge', 'bioconda', 'defaults'] + apptainer.enabled = false } mamba { - params.enable_conda = true - conda.enabled = true - conda.useMamba = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } docker { - docker.enabled = true - docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + docker.enabled = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + singularity.enabled = true + singularity.autoMounts = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false + } + apptainer { + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.runOptions = '--no-mount tmp --writable-tmpfs' + } + wave { + apptainer.ociAutoPull = true + singularity.ociAutoPull = true + wave.enabled = true + wave.freeze = true + wave.strategy = 'conda,container' } gitpod { - executor.name = 'local' - executor.cpus = 16 - executor.memory = 60.GB + executor.name = 'local' + executor.cpus = 4 + executor.memory = 8.GB } - test { includeConfig 'conf/test.config' } - test_full { includeConfig 'conf/test_full.config' } + test { includeConfig 'conf/test.config' } + test_igenomes { includeConfig 'conf/test_igenomes.config' } + full { includeConfig 'conf/full.config' } + test_full { includeConfig 'conf/test_full.config' } } +// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' + +// Nextflow plugins +plugins { + id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet +} // Load igenomes.config if required if (!params.igenomes_ignore) { @@ -208,8 +260,6 @@ if (!params.igenomes_ignore) { } else { params.genomes = [:] } - - // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -224,22 +274,25 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false + def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" } dag { enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" } manifest { @@ -248,7 +301,7 @@ manifest { homePage = 'https://github.com/nf-core/circrna' description = """Quantification, miRNA target prediction and differential expression analysis of circular RNAs""" mainScript = 'main.nf' - nextflowVersion = '!>=22.10.1' + nextflowVersion = '!>=23.04.0' version = 'dev' doi = '' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 6b1deb4ce..ee8ea89c8 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -14,88 +14,147 @@ "properties": { "input": { "type": "string", - "fa_icon": "fas fa-file-csv", + "format": "file-path", + "exists": true, + "schema": "assets/schema_input.json", "mimetype": "text/csv", - "description": "A 'samplehseet' CSV file containing the absolute paths to FASTQ files.", - "help_text": "An example of a valid CSV sample sheet is provided below, depicting paired-end control samples and single-end treatment samples. The headers `sample`, `fastq_1` and `fastq_2` must be included in the input CSV file.\n\n| sample \t| fastq_1 \t| fastq_2 \t|\n|-----------\t|--------------------------\t|--------------------------\t|\n| control_1 \t| /data/ctrl_1_R1.fastq.gz \t| /data/ctrl_1_R2.fastq.gz \t|\n| control_2 \t| /data/ctrl_2_R1.fastq.gz \t| /data/ctrl_2_R2.fastq.gz \t|\n| control_3 \t| /data/ctrl_3_R1.fastq.gz \t| /data/ctrl_3_R2.fastq.gz \t|\n| treated_1 \t| /data/trt_1_R1.fastq.gz \t| \t|\n| treated_2 \t| /data/trt_2_R1.fastq.gz \t| \t|\n| treated_3 \t| /data/trt_3_R1.fastq.gz \t| \t|" + "pattern": "^\\S+\\.csv$", + "description": "Path to comma-separated file containing information about the samples in the experiment.", + "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/circrna/usage#samplesheet-input).", + "fa_icon": "fas fa-file-csv" }, "outdir": { "type": "string", "format": "directory-path", "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", "fa_icon": "fas fa-folder-open", - "default": "./results" + "default": null }, "phenotype": { "type": "string", - "description": "Phenotype CSV file specifying the experimental design for DESeq2.", - "fa_icon": "fas fa-file-csv", + "format": "file-path", + "exists": true, + "schema": "assets/schema_phenotype.json", + "mimetype": "text/csv", + "pattern": "^\\S+\\.csv$", + "description": "Phenotype CSV file specifying the experimental design. If provided, the pipeline will run CIRCTEST.", + "help_text": "There are two rules for providing the phenotype CSV file. 1) The 'sample' column must match the sample sheets 'sample' column. 2) The response variable containing the phenotype of primary interest in the experiment must have the column name condition. All other columns included in the file are controlled for in the `DESeq2` design. \n\n| sample \t| condition \t| replicate \t|\n|-----------\t|-----------\t|-----------\t|\n| control_1 \t| ctr \t| 1 \t|\n| control_2 \t| ctr \t| 2 \t|\n| control_3 \t| ctr \t| 3 \t|\n| treated_1 \t| trt \t| 1 \t|\n| treated_2 \t| trt \t| 2 \t|\n| treated_3 \t| trt \t| 3 \t|\n\nThe above phenotype file will identify differentially expressed circRNAs/mRNAs between control and treatment cells, whilst controlling for the effect of variation between replicates: ` ~ replicates + condition`", + "fa_icon": "fas fa-file-csv" + }, + "annotation": { + "type": "string", + "format": "file-path", + "exists": true, + "schema": "assets/schema_annotation.json", "mimetype": "text/csv", - "help_text": "There are two rules for providing the phenotype CSV file. 1) The phenotype 'sample' column must match the sample sheets 'sample' column. 2) The response variable containing the phenotype of primary interest in the experiment must have the column name condition. All other columns included in the file are controlled for in the `DESeq2` design. \n\n| sample \t| condition \t| replicate \t|\n|-----------\t|-----------\t|-----------\t|\n| control_1 \t| ctr \t| 1 \t|\n| control_2 \t| ctr \t| 2 \t|\n| control_3 \t| ctr \t| 3 \t|\n| treated_1 \t| trt \t| 1 \t|\n| treated_2 \t| trt \t| 2 \t|\n| treated_3 \t| trt \t| 3 \t|\n\nThe above phenotype file will identify differentially expressed circRNAs/mRNAs between control and treatment cells, whilst controlling for the effect of variation between replicates: ` ~ replicates + condition`", - "pattern": "\\.csv$" + "pattern": "^\\S+\\.csv$", + "description": "Path to a CSV file containing BED files that should be used for annotation.", + "help_text": "The annotation file should be a CSV file with the following columns: `name`, `file` and `min_overlap`. The `name` column should contain a unique identifier for the annotation, the `file` column should contain the path to the BED file and the `min_overlap` column should contain the minimum overlap required for a circRNA to be considered as overlapping with the annotation. The `min_overlap` column is optional and defaults to 0.9 if not provided.", + "fa_icon": "fas fa-file-csv" } } }, - "pipeline_options": { - "title": "Pipeline Options", + "mirna_processing_options": { + "title": "miRNA options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define paths and threasholds for miRNA analysis.", + "properties": { + "mirna_expression": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/tsv", + "pattern": "^\\S+\\.tsv$", + "description": "path to tab-separated file providing the expression counts of mirnas, which are created in pipeline 'smrnaseq'. \n\nmirna \t sample1 \t sample2 \t sample3 \t\nid1\t count_sample1 \t count_sample2 \t count_sample3 \t\nid2 \t ... \t ... \t ... \t \n", + "fa_icon": "fas fa-file-tsv" + }, + "mirna_min_sample_percentage": { + "type": "number", + "fa_icon": "fas fa-circle-notch", + "description": "Minimum percentage of samples, a miRNA has to be expressed in to pass filtering.", + "help_text": "The mirna_min_percentage parameter sets the minimum percentage of samples in which a miRNA must be expressed to pass filtering. The default value is 0.2, which means a miRNA must be detected in at least 20% of the samples to be included in the analysis.", + "default": 0.2, + "minimum": 0 + }, + "mirna_min_reads": { + "type": "integer", + "fa_icon": "fas fa-circle-notch", + "description": "Minimum number of reads, a miRNA is required to have to pass filtering.", + "help_text": "This parameter determines the minimum number of reads that a miRNA must have to pass filtering. The default is 5, meaning a miRNA must have at least 5 reads across the samples to be considered for analysis.", + "default": 5, + "minimum": 0 + }, + "mirna_correlation": { + "type": "string", + "fa_icon": "fas fa-wrench", + "description": "Specifies the type of correlation to be used when analyzing the relationship between miRNA and transcript expression levels. Valid options are 'pearson' or 'spearman'.", + "help_text": "Select the correlation method to be applied in the correlation analysis of miRNAs.", + "default": "pearson", + "pattern": "^(pearson|spearman)$" + } + } + }, + "discovery_options": { + "title": "Discovery Options", "type": "object", "fa_icon": "fas fa-circle-notch", - "description": "Main workflow parameters.", - "help_text": "Documentation for selecting circRNA quantification tools, analysis modules, outputs etc.", - "required": ["tool", "module"], + "description": "Parameters for circrna discovery.", + "required": ["tools"], "properties": { - "tool": { + "tools": { "type": "string", "fa_icon": "fas fa-wrench", "description": "Comma separated list of circRNA quantification tools to use. Supported tools: ciriquant, circexplorer2, find_circ, circrna_finder, mapsplice, dcc, segemehl", + "pattern": "^(ciriquant|circexplorer2|find_circ|circrna_finder|mapsplice|dcc|segemehl)(,(ciriquant|circexplorer2|find_circ|circrna_finder|mapsplice|dcc|segemehl))*$", "help_text": "Select one or a combination of circRNA quantification tools for the pipeline e.g:\n--tool 'circexplorer2, ciriquant, find_circ'\n\nN.B: Selecting more than one circRNA quantification tool will trigger the circRNA filtering parameter --tool_filter", - "default": "circexplorer2", - "pattern": "^((circexplorer2|circrna_finder|ciriquant|dcc|find_circ|mapsplice|segemehl)?,?)*[^,]+$" - }, - "module": { - "type": "string", - "fa_icon": "fas fa-sliders-h", - "description": "Comma separated list of modules to run: 'circrna_discovery', 'mirna_prediction' & 'differential_expression'.", - "mandatory": "circrna_discovery", - "pattern": "^((circrna_discovery|mirna_prediction|differential_expression)?,?)*[^,]+$", - "help_text": "The 'circrna_discovery' module is mandatory. After circRNA quantification, the user can select 'mirna_prediction', 'differential_expression' or both to deploy additional analyses e.g:\n--module 'circrna_discovery, mirna_prediction, differential_expression'\n", - "default": "circrna_discovery" + "default": "circexplorer2" }, "bsj_reads": { "type": "integer", "fa_icon": "fas fa-circle-notch", "description": "Minimum number of reads spanning circRNA back-splice junction required for circRNA to be output by workflow.", - "help_text": "Filter low confidence circRNAs by removing circRNAs with read counts below a specified value. To disable, set the value to 0 (default).", - "default": 0, - "minimum": 0 + "help_text": "Filter low confidence circRNAs by removing circRNAs with read counts below a specified value. To disable, set the value to 1 (default).", + "default": 1, + "minimum": 1 }, "tool_filter": { "type": "integer", "fa_icon": "fas fa-intersection", "description": "Specify the minimum number of tools circRNAs must be called by to be output by the workflow.", - "help_text": "When multiple circRNA quantification tools have been provided to `--tool`, set a filtering method whereby circRNAs are output if they have been called by at least *n* quantification tools.\n\nSetting `--tool_filter` to 0/1 is the same as taking the union, all circRNAs are included in the output.\n\nSetting `--tool_filter` to 2 will output circRNAs that have been called by at least 2 quantification tools and so on.\n\nPlease note this is only reflected in the circRNA count matrix generated.", - "default": 0, - "minimum": 0, - "maximum": 6 - }, - "duplicates_fun": { - "type": "string", - "fa_icon": "fas fa-copy", - "description": "Aggregate function to apply to duplicate circRNAs when more than one quantification tool has been selected. Options: max, mean", - "default": "mean", - "enum": ["mean", "max"] + "help_text": "When multiple circRNA quantification tools have been provided to `--tool`, set a filtering method whereby circRNAs are output if they have been called by at least *n* quantification tools.\n\nSetting `--tool_filter` to 1 is the same as taking the union, all circRNAs are included in the output.\n\nSetting `--tool_filter` to 2 will output circRNAs that have been called by at least 2 quantification tools and so on.", + "default": 1, + "minimum": 1, + "maximum": 7 }, "save_intermediates": { "type": "boolean", "description": "Save intermediate alignment/circRNA files.", - "default": true, + "default": false, "fa_icon": "fas fa-save" }, "exon_boundary": { "type": "integer", "description": "Specify the distance at which the annotation script decides if a candidate is a circRNA or EI-circRNA.", - "help_text": "During annotation, if one of the start or end position of a circular candidate imperfectly overlaps an exon boundary, the script will consider positions within 'exon_boundary' (default 200bp) to be an exonic circRNA. If they fall outside of this range, the candidate is assumed to be an exonic-intronic circRNA, and the entire underlying sequence is taken for miRNA analysis, as opposed to just the exonic sequences for canonical exonic circRNAs. ", - "default": 200 + "help_text": "During annotation, if one of the start or end position of a circular candidate imperfectly overlaps an exon boundary, the script will consider positions within 'exon_boundary' (default 0bp) to be an exonic circRNA. If they fall outside of this range, the candidate is assumed to be an exonic-intronic circRNA, and the entire underlying sequence is taken for miRNA analysis, as opposed to just the exonic sequences for canonical exonic circRNAs. ", + "default": 0 + }, + "mirna_tools": { + "type": "string", + "fa_icon": "fas fa-wrench", + "description": "Comma separated list of miRNA bindingsite prediction tools to use. Supported tools: miranda, targetscan.", + "help_text": "Select one or a combination of miRNA bindingsite prediction tools for the pipeline e.g:\n--mirna_tools 'miranda,targetscan'", + "default": "miranda,targetscan", + "pattern": "^((miranda|targetscan)?,?)*[^,]+$" + }, + "mirna_tool_filter": { + "type": "integer", + "fa_icon": "fas fa-intersection", + "description": "Specify the number of votes required for a miRNA to be further considered in downstream analysis.'", + "help_text": "Controls the number of votes required for a binding site prediction to be considered valid. If a miRNA binding site was predicted by two different tools (e.g., miRanda and TargetScan), it receives two votes. By specifying additional tools for miRNA binding site prediction (using the 'mirna_tool_filter' parameter), you can adjust the number of votes required for a binding site to be considered valid.", + "default": 1, + "minimum": 1, + "maximum": 4 } } }, @@ -121,6 +180,11 @@ "description": "Minimum overhang for annotated junctions", "default": 10 }, + "limitSjdbInsertNsj": { + "type": "integer", + "description": "Maximum number of junction to be inserted to the genome on the fly at the mapping stage, including those from annotations and those detected in the 1st step of the 2-pass run", + "default": 1000000 + }, "chimSegmentMin": { "type": "integer", "description": "Minimum length of chimeric segment length. Must be set to a positive value to detect circular junctions.", @@ -156,11 +220,17 @@ "description": "Sequencing center information to be added to read group of BAM files.", "fa_icon": "fas fa-synagogue" }, + "bootstrap_samples": { + "type": "integer", + "description": "Number of bootstrap samples to generate during quantification.", + "default": 30 + }, "save_unaligned": { "type": "boolean", "fa_icon": "fas fa-save", "description": "Where possible, save unaligned reads from either STAR, HISAT2 or Salmon to the results directory.", - "help_text": "This may either be in the form of FastQ or BAM files depending on the options available for that particular tool." + "help_text": "This may either be in the form of FastQ or BAM files depending on the options available for that particular tool.", + "default": false } } }, @@ -185,10 +255,11 @@ "fasta": { "type": "string", "format": "file-path", + "exists": true, "mimetype": "text/plain", "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "description": "Path to FASTA genome file.", - "help_text": "Must be provided if --genome null", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\nThis parameter is *mandatory* if `--genome` is not specified.", "fa_icon": "fas fa-book" }, "gtf": { @@ -196,46 +267,40 @@ "fa_icon": "fas fa-address-book", "mimetype": "text/plain", "description": "Path to reference GTF file.", + "help_text": "This parameter is *mandatory* if `--genome` is not specified. Needs to contain the following attributes: `gene_id`, `transcript_id` and `gene_name`.", "pattern": "\\.gtf$" }, "mature": { "type": "string", - "description": "Path to FASTA file with mature miRNAs.", + "description": "Path to FASTA file with mature miRNAs. This parameter needs to be specified to perform miRNA interaction analyses.", "mimetype": "text/plain", "help_text": "Typically this will be the `mature.fa` file from miRBase. Can be given either as a plain text `.fa` file or a compressed `.gz` file.", "fa_icon": "fas fa-wheelchair", - "default": "null" - }, - "species": { - "type": "string", - "fa_icon": "fas fa-dog", - "description": "String identifying species.", - "help_text": "Check the igenomes.config file for species configured to work with nf-core/circrna", - "default": "null" + "default": null }, "bowtie": { "type": "string", "fa_icon": "fas fa-bold", - "description": "Path to Bowtie index files.", - "default": "null" + "description": "Path to Bowtie index files, surrounded by quotes. No glob pattern required.", + "default": null }, "bowtie2": { "type": "string", "fa_icon": "fas fa-bold", - "description": "Path to Bowtie2 index files.", - "default": "null" + "description": "Path to Bowtie2 index files, surrounded by quotes. No glob pattern required.", + "default": null }, "bwa": { "type": "string", "fa_icon": "fas fa-bold", - "description": "Path to BWA index directory.", - "default": "null" + "description": "Path to BWA index directory, surrounded by quotes. No glob pattern required.", + "default": null }, "hisat2": { "type": "string", - "description": "The title to use in the MultiQC report.", + "description": "Path to Hisat2 index directory, surrounded by quotes. No glob pattern required.", "default": null, - "fa_icon": "fab fa-redhat" + "fa_icon": "fab fa-bold" }, "hisat2_build_memory": { "type": "string", @@ -247,22 +312,14 @@ }, "segemehl": { "type": "string", - "default": "None", + "default": null, "fa_icon": "fab fa-stripe-s", - "description": "Path to Segemehl Index file" + "description": "Path to Segemehl Index **file**." }, "star": { "type": "string", "fa_icon": "far fa-star", - "description": "Path to STAR index directory." - }, - "igenomes_base": { - "type": "string", - "format": "directory-path", - "description": "Directory / URL base for iGenomes references.", - "default": "s3://ngi-igenomes/igenomes", - "fa_icon": "fas fa-cloud-download-alt", - "hidden": true + "description": "Path to STAR index directory, surrounded by quotes. No glob pattern required." }, "igenomes_ignore": { "type": "boolean", @@ -283,13 +340,21 @@ "type": "boolean", "description": "Skip the adapter trimming step.", "help_text": "Use this if your input FastQ files have already been trimmed outside of the workflow or if you're very confident that there is no adapter contamination in your data.", - "fa_icon": "fas fa-fast-forward" + "fa_icon": "fas fa-fast-forward", + "default": false }, "save_trimmed": { "type": "boolean", "description": "Save the trimmed FastQ files in the results directory.", "help_text": "By default, trimmed FastQ files will not be saved to the results directory. Specify this flag (or set to true in your config file) to copy these files to the results directory when complete.", - "fa_icon": "fas fa-save" + "fa_icon": "fas fa-save", + "default": false + }, + "skip_fastqc": { + "type": "boolean", + "description": "Skip FastQC quality control of the sequencing reads.", + "fa_icon": "fas fa-terminal", + "default": false }, "clip_r1": { "type": "integer", @@ -383,7 +448,7 @@ "max_cpus": { "type": "integer", "description": "Maximum number of CPUs that can be requested for any single job.", - "default": 16, + "default": 50, "fa_icon": "fas fa-microchip", "hidden": true, "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" @@ -391,7 +456,7 @@ "max_memory": { "type": "string", "description": "Maximum amount of memory that can be requested for any single job.", - "default": "128.GB", + "default": "300.GB", "fa_icon": "fas fa-memory", "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", "hidden": true, @@ -402,7 +467,7 @@ "description": "Maximum amount of time that can be requested for any single job.", "default": "240.h", "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", + "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", "hidden": true, "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" } @@ -415,11 +480,6 @@ "description": "Less common options for the pipeline, typically set in a config file.", "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", "properties": { - "skip_fastqc": { - "type": "boolean", - "description": "Skip FastQC quality control of the sequencing reads.", - "fa_icon": "fas fa-terminal" - }, "help": { "type": "boolean", "description": "Display help text.", @@ -443,11 +503,10 @@ }, "email": { "type": "string", - "description": "Phenotype CSV file specifying the experimental design for DESeq2.", - "fa_icon": "fas fa-journal-whills", - "mimetype": "text/csv", - "help_text": "The response variable containing the phenotype of primary interest in the experiment must have the column name condition. An example phenotype file is given below:\n\n| Sample_ID | condition | replicates |\n|---------|-----------|------------|\n| control_rep1 | control | 1 |\n| control_rep2 | control | 2 |\n| control_rep3 | control | 3 |\n| lung_rep1 | lung | 1 |\n| lung_rep2 | lung | 2 |\n| lung_rep3 | lung | 3 |\n| melanoma_rep1 | melanoma | 1 |\n| melanoma_rep2 | melanoma | 2 |\n| melanoma_rep3 | melanoma | 3 |\n\nThis will produce the DESeq2 design formula '~ replicates + condition' i.e all columns not named condition will be controlled for in the linear mixed model.", - "pattern": "\\.csv$" + "description": "Email address for completion summary.", + "fa_icon": "fas fa-envelope", + "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" }, "email_on_fail": { "type": "string", @@ -491,6 +550,7 @@ }, "multiqc_config": { "type": "string", + "format": "file-path", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", "hidden": true @@ -506,13 +566,6 @@ "description": "Custom MultiQC yaml file containing HTML including a methods description.", "fa_icon": "fas fa-cog" }, - "tracedir": { - "type": "string", - "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.outdir}/pipeline_info", - "fa_icon": "fas fa-cogs", - "hidden": true - }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", @@ -520,37 +573,48 @@ "fa_icon": "fas fa-check-square", "hidden": true }, - "show_hidden_params": { + "validationShowHiddenParams": { "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." }, - "enable_conda": { + "validationFailUnrecognisedParams": { "type": "boolean", - "description": "Run this workflow with Conda. You can also use '-profile conda' instead of providing this parameter.", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", "hidden": true, - "fa_icon": "fas fa-bacon" + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." + }, + "validationLenientMode": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters in lenient more.", + "hidden": true, + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." } } } }, "allOf": [ { - "$ref": "#/definitions/alignment_options" + "$ref": "#/definitions/input_output_options" }, { - "$ref": "#/definitions/input_output_options" + "$ref": "#/definitions/reference_genome_options" }, { - "$ref": "#/definitions/pipeline_options" + "$ref": "#/definitions/read_trimming_options" }, { - "$ref": "#/definitions/reference_genome_options" + "$ref": "#/definitions/alignment_options" }, { - "$ref": "#/definitions/read_trimming_options" + "$ref": "#/definitions/discovery_options" + }, + { + "$ref": "#/definitions/mirna_processing_options" }, { "$ref": "#/definitions/institutional_config_options" diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 0d62beb6f..000000000 --- a/pyproject.toml +++ /dev/null @@ -1,10 +0,0 @@ -# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black. -# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. -[tool.black] -line-length = 120 -target_version = ["py37", "py38", "py39", "py310"] - -[tool.isort] -profile = "black" -known_first_party = ["nf_core"] -multi_line_output = 3 diff --git a/subworkflows/local/annotation.nf b/subworkflows/local/annotation.nf new file mode 100644 index 000000000..dea819a65 --- /dev/null +++ b/subworkflows/local/annotation.nf @@ -0,0 +1,44 @@ +include { BEDTOOLS_INTERSECT as INTERSECT_GTF } from '../../modules/nf-core/bedtools/intersect' +include { GAWK as INGEST_DATABASE_NAMES } from '../../modules/nf-core/gawk' +include { GNU_SORT as COMBINE_DATABASES } from '../../modules/nf-core/gnu/sort' +include { BEDTOOLS_INTERSECT as INTERSECT_DATABASE } from '../../modules/nf-core/bedtools/intersect' +include { ANNOTATION as ANNOTATE } from '../../modules/local/annotation' + +workflow ANNOTATION { + take: + regions + ch_gtf + exon_boundary + ch_annotation + + main: + ch_versions = Channel.empty() + + INTERSECT_GTF( regions.combine(ch_gtf.map{meta, gtf -> gtf}), [[], []] ) + ch_versions = ch_versions.mix(INTERSECT_GTF.out.versions) + + INGEST_DATABASE_NAMES( ch_annotation, [] ) + ch_versions = ch_versions.mix(INGEST_DATABASE_NAMES.out.versions) + + INTERSECT_DATABASE( regions.combine(INGEST_DATABASE_NAMES.out.output) + .map{ meta1, regions, meta2, database -> + [[id: "${meta1.id}-${meta2.id}", + original_meta: meta1, + min_overlap: meta2.min_overlap], regions, database] }, + [[], []]) + ch_versions = ch_versions.mix(INTERSECT_DATABASE.out.versions) + + ANNOTATE( INTERSECT_GTF.out.intersect + .join(INTERSECT_DATABASE.out.intersect + .map{ meta, bed -> [meta.original_meta, bed] } + .groupTuple(), remainder: true) + .map{ meta, gtf_intersection, db_intersections -> [meta, gtf_intersection, db_intersections ?: []]}, + exon_boundary ) + ch_versions = ch_versions.mix(ANNOTATE.out.versions) + + emit: + bed = ANNOTATE.out.bed + gtf = ANNOTATE.out.gtf + + versions = ch_versions +} diff --git a/subworkflows/local/bsj_detection.nf b/subworkflows/local/bsj_detection.nf new file mode 100644 index 000000000..89ddec4ed --- /dev/null +++ b/subworkflows/local/bsj_detection.nf @@ -0,0 +1,239 @@ +// MODULES +include { GAWK as FILTER_BSJS } from '../../modules/nf-core/gawk' +include { GAWK as MASK_SCORES } from '../../modules/nf-core/gawk' +include { GNU_SORT as CONCAT_TOOLS_PER_SAMPLE } from '../../modules/nf-core/gnu/sort' +include { BEDTOOLS_GROUPBY as COUNT_TOOLS } from '../../modules/nf-core/bedtools/groupby' +include { GAWK as FILTER_MIN_TOOLS } from '../../modules/nf-core/gawk' +include { GNU_SORT as CONCAT_SAMPLES } from '../../modules/nf-core/gnu/sort' +include { GAWK as EXTRACT_COUNTS } from '../../modules/nf-core/gawk' +include { CSVTK_JOIN as COMBINE_COUNTS_PER_TOOL } from '../../modules/nf-core/csvtk/join' +include { UPSET as UPSET_SAMPLES } from '../../modules/local/upset' +include { UPSET as UPSET_ALL } from '../../modules/local/upset' +include { BEDTOOLS_GETFASTA as FASTA_COMBINED } from '../../modules/nf-core/bedtools/getfasta' +include { BEDTOOLS_GETFASTA as FASTA_PER_SAMPLE } from '../../modules/nf-core/bedtools/getfasta' +include { BEDTOOLS_GETFASTA as FASTA_PER_SAMPLE_TOOL } from '../../modules/nf-core/bedtools/getfasta' +include { FAIL_ON_EMPTY } from '../../modules/local/fail_on_empty' + +// SUBWORKFLOWS +include { SEGEMEHL } from './detection_tools/segemehl' +include { STAR2PASS } from './detection_tools/star2pass' +include { CIRCEXPLORER2 } from './detection_tools/circexplorer2' +include { CIRCRNA_FINDER } from './detection_tools/circrna_finder' +include { FIND_CIRC } from './detection_tools/find_circ' +include { CIRIQUANT } from './detection_tools/ciriquant' +include { DCC } from './detection_tools/dcc' +include { MAPSPLICE } from './detection_tools/mapsplice' +include { ANNOTATION as ANNOTATE_COMBINED } from './annotation' +include { ANNOTATION as ANNOTATE_PER_SAMPLE } from './annotation' +include { ANNOTATION as ANNOTATE_PER_SAMPLE_TOOL } from './annotation' + +workflow BSJ_DETECTION { + + take: + reads + ch_fasta + ch_gtf + ch_annotation + bowtie_index + bowtie2_index + bwa_index + chromosomes + hisat2_index + star_index + bsj_reads + exon_boundary + + main: + ch_versions = Channel.empty() + ch_bsj_bed_per_sample_tool = Channel.empty() + ch_multiqc_files = Channel.empty() + fasta = ch_fasta.map{meta, fasta -> fasta} + gtf = ch_gtf.map{meta, gtf -> gtf} + + // STAR 2-PASS-MODE + star_ignore_sjdbgtf = true + seq_center = params.seq_center ?: '' + seq_platform = '' + STAR2PASS( reads, star_index, ch_gtf, bsj_reads, star_ignore_sjdbgtf, seq_center, seq_platform ) + ch_versions = ch_versions.mix(STAR2PASS.out.versions) + + // + // DISCOVERY TOOLS: + // + tools_selected = params.tools.split(',').collect{it.trim().toLowerCase()} + + if (tools_selected.size() == 0) { + error 'No tools selected for circRNA discovery.' + } + + if (tools_selected.contains('segemehl')) { + SEGEMEHL( reads, fasta, params.segemehl ) + ch_versions = ch_versions.mix(SEGEMEHL.out.versions) + ch_bsj_bed_per_sample_tool = ch_bsj_bed_per_sample_tool.mix(SEGEMEHL.out.bed) + } + + if (tools_selected.contains('circexplorer2')) { + CIRCEXPLORER2( gtf, fasta, STAR2PASS.out.junction ) + ch_versions = ch_versions.mix(CIRCEXPLORER2.out.versions) + ch_bsj_bed_per_sample_tool = ch_bsj_bed_per_sample_tool.mix(CIRCEXPLORER2.out.bed) + } + + if (tools_selected.contains('circrna_finder')) { + CIRCRNA_FINDER( fasta, STAR2PASS.out.sam, STAR2PASS.out.junction, + STAR2PASS.out.tab ) + ch_versions = ch_versions.mix(CIRCRNA_FINDER.out.versions) + ch_bsj_bed_per_sample_tool = ch_bsj_bed_per_sample_tool.mix(CIRCRNA_FINDER.out.bed) + } + + if (tools_selected.contains('find_circ')) { + FIND_CIRC( reads, bowtie2_index, ch_fasta ) + ch_versions = ch_versions.mix(FIND_CIRC.out.versions) + ch_bsj_bed_per_sample_tool = ch_bsj_bed_per_sample_tool.mix(FIND_CIRC.out.bed) + } + + if (tools_selected.contains('ciriquant')) { + CIRIQUANT( reads, ch_gtf, ch_fasta, bwa_index, hisat2_index ) + ch_versions = ch_versions.mix(CIRIQUANT.out.versions) + ch_bsj_bed_per_sample_tool = ch_bsj_bed_per_sample_tool.mix(CIRIQUANT.out.bed) + } + + if (tools_selected.contains('dcc')) { + DCC( reads, ch_fasta, ch_gtf, star_index, STAR2PASS.out.junction, + star_ignore_sjdbgtf, seq_platform, seq_center, bsj_reads ) + ch_versions = ch_versions.mix(DCC.out.versions) + ch_bsj_bed_per_sample_tool = ch_bsj_bed_per_sample_tool.mix(DCC.out.bed) + } + + if (tools_selected.contains('mapsplice')) { + MAPSPLICE( reads, gtf, fasta, bowtie_index, chromosomes, + STAR2PASS.out.junction ) + ch_versions = ch_versions.mix(MAPSPLICE.out.versions) + ch_bsj_bed_per_sample_tool = ch_bsj_bed_per_sample_tool.mix(MAPSPLICE.out.bed) + } + + // + // QUANTIFY BSJs PER TOOL + // + + EXTRACT_COUNTS( ch_bsj_bed_per_sample_tool, [] ) + ch_versions = ch_versions.mix(EXTRACT_COUNTS.out.versions) + + COMBINE_COUNTS_PER_TOOL( EXTRACT_COUNTS.out.output + .map{ meta, bed -> [[id: meta.tool], bed]} + .groupTuple() ) + ch_versions = ch_versions.mix(COMBINE_COUNTS_PER_TOOL.out.versions) + + // + // APPLY bsj_reads FILTER + // + + ch_bsj_bed_per_sample_tool_filtered = FILTER_BSJS( ch_bsj_bed_per_sample_tool, [] ).output + ch_versions = ch_versions.mix(FILTER_BSJS.out.versions) + + + // + // MERGE BED FILES + // + + MASK_SCORES( ch_bsj_bed_per_sample_tool_filtered, [] ) + ch_versions = ch_versions.mix(MASK_SCORES.out.versions) + ch_bsj_bed_per_sample_tool_masked = MASK_SCORES.out.output + .filter{ meta, bed -> !bed.empty } + + CONCAT_TOOLS_PER_SAMPLE( + MASK_SCORES.out.output.map{ meta, bed -> [ [id: meta.id], bed ] }.groupTuple() + ) + ch_versions = ch_versions.mix(CONCAT_TOOLS_PER_SAMPLE.out.versions) + + COUNT_TOOLS( CONCAT_TOOLS_PER_SAMPLE.out.sorted, 5 ) + ch_versions = ch_versions.mix(COUNT_TOOLS.out.versions) + + FILTER_MIN_TOOLS( COUNT_TOOLS.out.bed, [] ) + ch_versions = ch_versions.mix(FILTER_MIN_TOOLS.out.versions) + ch_bsj_bed_per_sample = FILTER_MIN_TOOLS.out.output + .filter{ meta, bed -> bed.size() > 0 } + + CONCAT_SAMPLES( + ch_bsj_bed_per_sample.map{ meta, bed -> [[id: "all"], bed] }.groupTuple() + ) + ch_versions = ch_versions.mix(CONCAT_SAMPLES.out.versions) + ch_bsj_bed_combined = CONCAT_SAMPLES.out.sorted + + // + // UPSET PLOTS + // + + UPSET_SAMPLES( ch_bsj_bed_per_sample_tool_masked + .map{ meta, bed -> [meta.id, meta.tool, bed]} + .groupTuple() + .map{ sample, tools, beds -> [[id: sample], tools, beds]} ) + ch_multiqc_files = ch_multiqc_files.mix(UPSET_SAMPLES.out.multiqc) + ch_versions = ch_versions.mix(UPSET_SAMPLES.out.versions) + + UPSET_ALL( ch_bsj_bed_per_sample_tool_masked + .map{ meta, bed -> ["all", meta.tool, bed] } + .groupTuple() + .map{ sample, tools, beds -> [[id: sample], tools, beds]} ) + ch_multiqc_files = ch_multiqc_files.mix(UPSET_ALL.out.multiqc) + ch_versions = ch_versions.mix(UPSET_ALL.out.versions) + + // + // ANNOTATION + // + + ANNOTATE_COMBINED( ch_bsj_bed_combined, ch_gtf, exon_boundary, ch_annotation ) + ch_versions = ch_versions.mix(ANNOTATE_COMBINED.out.versions) + ch_bsj_bed12_combined = ANNOTATE_COMBINED.out.bed + ch_bsj_gtf_combined = ANNOTATE_COMBINED.out.gtf + + ANNOTATE_PER_SAMPLE( ch_bsj_bed_per_sample, ch_gtf, exon_boundary, ch_annotation ) + ch_versions = ch_versions.mix(ANNOTATE_PER_SAMPLE.out.versions) + ch_bsj_bed12_per_sample = ANNOTATE_PER_SAMPLE.out.bed + ch_bsj_gtf_per_sample = ANNOTATE_PER_SAMPLE.out.gtf + + ANNOTATE_PER_SAMPLE_TOOL( ch_bsj_bed_per_sample_tool_masked, ch_gtf, exon_boundary, ch_annotation ) + ch_versions = ch_versions.mix(ANNOTATE_PER_SAMPLE_TOOL.out.versions) + ch_bsj_bed12_per_sample_tool = ANNOTATE_PER_SAMPLE_TOOL.out.bed + ch_bsj_gtf_per_sample_tool = ANNOTATE_PER_SAMPLE_TOOL.out.gtf + + // + // FASTA WORKFLOW: + // + + FASTA_COMBINED( ch_bsj_bed_combined, fasta ) + ch_versions = ch_versions.mix(FASTA_COMBINED.out.versions) + ch_bsj_fasta_combined = FASTA_COMBINED.out.fasta + + FASTA_PER_SAMPLE( ch_bsj_bed_per_sample, fasta ) + ch_versions = ch_versions.mix(FASTA_PER_SAMPLE.out.versions) + ch_bsj_fasta_per_sample = FASTA_PER_SAMPLE.out.fasta + + FASTA_PER_SAMPLE_TOOL( ch_bsj_bed_per_sample_tool_masked, fasta ) + ch_versions = ch_versions.mix(FASTA_PER_SAMPLE_TOOL.out.versions) + ch_bsj_fasta_per_sample_tool = FASTA_PER_SAMPLE_TOOL.out.fasta + + // STOP PIPELINE IF NO CIRCULAR RNAs WERE FOUND + FAIL_ON_EMPTY( + ch_bsj_bed_combined.ifEmpty([[id: "empty"], []]), + // Make sure to wait for per-sample results + Channel.empty() + .mix(ch_bsj_bed12_combined) + .mix(ch_bsj_bed12_per_sample) + .mix(ch_bsj_bed12_per_sample_tool) + .mix(ch_bsj_fasta_combined) + .mix(ch_bsj_fasta_per_sample) + .mix(ch_bsj_fasta_per_sample_tool) + .mix(UPSET_SAMPLES.out.plot) + .map{ meta, f -> f } + .collect() + ) + + emit: + bed = ch_bsj_bed_combined + bed12 = ch_bsj_bed12_combined + gtf = ch_bsj_gtf_combined + fasta = ch_bsj_fasta_combined + + multiqc_files = ch_multiqc_files + versions = ch_versions +} diff --git a/subworkflows/local/circrna_discovery.nf b/subworkflows/local/circrna_discovery.nf deleted file mode 100644 index e6f6571a4..000000000 --- a/subworkflows/local/circrna_discovery.nf +++ /dev/null @@ -1,227 +0,0 @@ - -include { ANNOTATION } from '../../modules/local/annotation/full_annotation/main' -include { BOWTIE2_ALIGN as FIND_CIRC_ALIGN } from '../../modules/nf-core/bowtie2/align/main' -include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' -include { FIND_CIRC_ANCHORS } from '../../modules/local/find_circ/anchors/main' -include { FIND_CIRC } from '../../modules/local/find_circ/find_circ/main' -include { FIND_CIRC_FILTER } from '../../modules/local/find_circ/filter/main' -include { CIRIQUANT_YML } from '../../modules/local/ciriquant/yml/main' -include { CIRIQUANT } from '../../modules/local/ciriquant/ciriquant/main' -include { CIRIQUANT_FILTER } from '../../modules/local/ciriquant/filter/main' -include { CIRCEXPLORER2_REFERENCE } from '../../modules/local/circexplorer2/reference/main' -include { CIRCEXPLORER2_PARSE } from '../../modules/nf-core/circexplorer2/parse/main' -include { CIRCEXPLORER2_ANNOTATE } from '../../modules/nf-core/circexplorer2/annotate/main' -include { CIRCEXPLORER2_FILTER } from '../../modules/local/circexplorer2/filter/main' -include { CIRCRNA_FINDER_FILTER } from '../../modules/local/circrna_finder/filter/main' -include { SEGEMEHL_ALIGN } from '../../modules/nf-core/segemehl/align/main' -include { SEGEMEHL_FILTER } from '../../modules/local/segemehl/filter/main' -include { STAR_ALIGN as STAR_1ST_PASS } from '../../modules/nf-core/star/align/main' -include { STAR_ALIGN as STAR_2ND_PASS } from '../../modules/nf-core/star/align/main' -include { SJDB as STAR_SJDB } from '../../modules/local/star/sjdb/main' -include { STAR_ALIGN as DCC_1ST_PASS } from '../../modules/nf-core/star/align/main' -include { STAR_ALIGN as DCC_2ND_PASS } from '../../modules/nf-core/star/align/main' -include { SJDB as DCC_SJDB } from '../../modules/local/star/sjdb/main' -include { STAR_ALIGN as DCC_MATE1_1ST_PASS } from '../../modules/nf-core/star/align/main' -include { STAR_ALIGN as DCC_MATE1_2ND_PASS } from '../../modules/nf-core/star/align/main' -include { SJDB as DCC_MATE1_SJDB } from '../../modules/local/star/sjdb/main' -include { STAR_ALIGN as DCC_MATE2_1ST_PASS } from '../../modules/nf-core/star/align/main' -include { STAR_ALIGN as DCC_MATE2_2ND_PASS } from '../../modules/nf-core/star/align/main' -include { SJDB as DCC_MATE2_SJDB } from '../../modules/local/star/sjdb/main' -include { DCC } from '../../modules/local/dcc/dcc/main' -include { DCC_FILTER } from '../../modules/local/dcc/filter/main' -include { CIRCEXPLORER2_REFERENCE as MAPSPLICE_REFERENCE } from '../../modules/local/circexplorer2/reference/main' -include { MAPSPLICE_ALIGN } from '../../modules/local/mapsplice/align/main' -include { CIRCEXPLORER2_PARSE as MAPSPLICE_PARSE } from '../../modules/nf-core/circexplorer2/parse/main' -include { CIRCEXPLORER2_ANNOTATE as MAPSPLICE_ANNOTATE } from '../../modules/nf-core/circexplorer2/annotate/main' -include { CIRCEXPLORER2_FILTER as MAPSPLICE_FILTER } from '../../modules/local/circexplorer2/filter/main' -include { FASTA } from '../../modules/local/fasta/main' -include { MERGE_TOOLS } from '../../modules/local/count_matrix/merge_tools/main' -include { COUNTS_COMBINED } from '../../modules/local/count_matrix/combined/main' -include { COUNTS_SINGLE } from '../../modules/local/count_matrix/single/main' - -workflow CIRCRNA_DISCOVERY { - - take: - reads - fasta - gtf - bowtie_index - bowtie2_index - bwa_index - chromosomes - hisat2_index - segemehl_index - star_index - bsj_reads - tool_filter - duplicates_fun - exon_boundary - - main: - ch_versions = Channel.empty() - - // - // SEGEMEHL WORKFLOW: - // - SEGEMEHL_ALIGN( reads, fasta, segemehl_index ) - segemehl_filter = SEGEMEHL_ALIGN.out.results.map{ meta, results -> meta.tool = "segemehl"; return [ meta, results ] } - SEGEMEHL_FILTER( segemehl_filter, bsj_reads ) - - ch_versions = ch_versions.mix(SEGEMEHL_ALIGN.out.versions) - - // - // STAR WORFKLOW: - // - - STAR_1ST_PASS( reads, star_index, gtf, true, '', '' ) - sjdb = STAR_1ST_PASS.out.tab.map{ meta, tab -> return [ tab ] }.collect() - STAR_SJDB( sjdb, bsj_reads ) - STAR_2ND_PASS( reads, star_index, STAR_SJDB.out.sjtab, true, '', '' ) - - ch_versions = ch_versions.mix(STAR_1ST_PASS.out.versions) - - // - // CIRCEXPLORER2 WORKFLOW: - // - - CIRCEXPLORER2_REFERENCE( gtf ) - CIRCEXPLORER2_PARSE( STAR_2ND_PASS.out.junction ) - CIRCEXPLORER2_ANNOTATE( CIRCEXPLORER2_PARSE.out.junction, fasta, CIRCEXPLORER2_REFERENCE.out.txt ) - circexplorer2_filter = CIRCEXPLORER2_ANNOTATE.out.txt.map{ meta, txt -> meta.tool = "circexplorer2"; return [ meta, txt ] } - CIRCEXPLORER2_FILTER( circexplorer2_filter, bsj_reads ) - - ch_versions = ch_versions.mix(CIRCEXPLORER2_REFERENCE.out.versions) - ch_versions = ch_versions.mix(CIRCEXPLORER2_PARSE.out.versions) - ch_versions = ch_versions.mix(CIRCEXPLORER2_ANNOTATE.out.versions) - - // - // CIRCRNA_FINDER WORKFLOW: - // - - circrna_finder_stage = STAR_2ND_PASS.out.sam.join( STAR_2ND_PASS.out.junction).join(STAR_2ND_PASS.out.tab) - circrna_finder_filter = circrna_finder_stage.map{ meta, sam, junction, tab -> meta.tool = "circrna_finder"; return [ meta, sam, junction, tab ] } - CIRCRNA_FINDER_FILTER( circrna_finder_filter, fasta, bsj_reads ) - - ch_versions = ch_versions.mix(CIRCRNA_FINDER_FILTER.out.versions) - - // - // FIND_CIRC WORKFLOW: - // - - FIND_CIRC_ALIGN( reads, bowtie2_index.collect(), false, true ) - SAMTOOLS_INDEX( FIND_CIRC_ALIGN.out.bam ) - SAMTOOLS_VIEW( FIND_CIRC_ALIGN.out.bam.join( SAMTOOLS_INDEX.out.bai ), fasta, [] ) - FIND_CIRC_ANCHORS( SAMTOOLS_VIEW.out.bam ) - FIND_CIRC( FIND_CIRC_ANCHORS.out.anchors, bowtie2_index.collect(), fasta ) - find_circ_filter = FIND_CIRC.out.bed.map{ meta, bed -> meta.tool = "find_circ"; return [ meta, bed ] } - FIND_CIRC_FILTER( find_circ_filter, bsj_reads ) - - ch_versions = ch_versions.mix(FIND_CIRC_ALIGN.out.versions) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions) - ch_versions = ch_versions.mix(SAMTOOLS_VIEW.out.versions) - ch_versions = ch_versions.mix(FIND_CIRC_ANCHORS.out.versions) - ch_versions = ch_versions.mix(FIND_CIRC_FILTER.out.versions) - - // - // CIRIQUANT WORKFLOW: - // - - CIRIQUANT_YML( gtf, fasta, bwa_index.map{ meta, index -> return index }, hisat2_index ) - CIRIQUANT( reads, CIRIQUANT_YML.out.yml.collect() ) - CIRIQUANT_FILTER( CIRIQUANT.out.gtf.map{ meta, gtf -> meta.tool = "ciriquant"; return [ meta, gtf ] }, bsj_reads ) - - ch_versions = ch_versions.mix(CIRIQUANT.out.versions) - - // - // DCC WORKFLOW - // - - DCC_1ST_PASS( reads, star_index, gtf, true, '', '' ) - DCC_SJDB( DCC_1ST_PASS.out.tab.map{ meta, tab -> return [ tab ] }.collect(), bsj_reads ) - DCC_2ND_PASS( reads, star_index, DCC_SJDB.out.sjtab, true, '', '' ) - - mate1 = reads.map{ meta, reads -> return [meta, reads[0] ] } - DCC_MATE1_1ST_PASS( mate1, star_index, gtf, true, '', '' ) - DCC_MATE1_SJDB( DCC_MATE1_1ST_PASS.out.tab.map{ meta, tab -> return [ tab ] }.collect(), bsj_reads ) - DCC_MATE1_2ND_PASS( mate1, star_index, DCC_MATE1_SJDB.out.sjtab, true, '', '' ) - - mate2 = reads.map{ meta, reads -> return [ meta, reads[1] ] } - DCC_MATE2_1ST_PASS( mate2, star_index, gtf, true, '', '' ) - DCC_MATE2_SJDB( DCC_MATE2_1ST_PASS.out.tab.map{ meta, tab -> return [ tab ] }.collect(), bsj_reads ) - DCC_MATE2_2ND_PASS( mate2, star_index, DCC_MATE2_SJDB.out.sjtab, true, '', '' ) - - dcc_stage = DCC_2ND_PASS.out.junction.join( DCC_MATE1_2ND_PASS.out.junction, remainder: true ).join( DCC_MATE2_2ND_PASS.out.junction, remainder: true ) - dcc = dcc_stage.map{ it -> def meta = it[0]; if( meta.single_end ){ return [ it[0], it[1], [], [] ] } else { return it } }.view() - DCC( dcc, fasta, gtf ) - DCC_FILTER( DCC.out.txt.map{ meta, txt -> meta.tool = "dcc"; return [ meta, txt ] }, bsj_reads ) - - ch_versions = ch_versions.mix(DCC_MATE1_1ST_PASS.out.versions) - ch_versions = ch_versions.mix(DCC.out.versions) - - // - // MAPSPLICE WORKFLOW: - // - - MAPSPLICE_REFERENCE( gtf ) - MAPSPLICE_ALIGN( reads, bowtie_index.collect(), chromosomes, gtf ) - MAPSPLICE_PARSE( MAPSPLICE_ALIGN.out.raw_fusions ) - MAPSPLICE_ANNOTATE( MAPSPLICE_PARSE.out.junction, fasta, MAPSPLICE_REFERENCE.out.txt ) - mapsplice_filter = MAPSPLICE_ANNOTATE.out.txt.map{ meta, txt -> meta.tool = "mapsplice"; return [ meta, txt ] } - MAPSPLICE_FILTER( mapsplice_filter, bsj_reads ) - - ch_versions = ch_versions.mix(MAPSPLICE_REFERENCE.out.versions) - ch_versions = ch_versions.mix(MAPSPLICE_ALIGN.out.versions) - ch_versions = ch_versions.mix(MAPSPLICE_PARSE.out.versions) - ch_versions = ch_versions.mix(MAPSPLICE_ANNOTATE.out.versions) - - // - // ANNOTATION WORKFLOW: - // - - circrna_filtered = CIRCEXPLORER2_FILTER.out.results.mix(SEGEMEHL_FILTER.out.results, CIRCRNA_FINDER_FILTER.out.results, FIND_CIRC_FILTER.out.results, CIRIQUANT_FILTER.out.results, DCC_FILTER.out.results, MAPSPLICE_FILTER.out.results ) - ANNOTATION( circrna_filtered, gtf, exon_boundary ) - - ch_versions = ch_versions.mix(ANNOTATION.out.versions) - - // - // FASTA WORKFLOW: - // - - FASTA( ANNOTATION.out.bed, fasta ) - - ch_versions = ch_versions.mix(FASTA.out.versions) - - // - // COUNT MATRIX WORKFLOW: - // - - ch_matrix = CIRCEXPLORER2_FILTER.out.matrix.mix(SEGEMEHL_FILTER.out.matrix, CIRCRNA_FINDER_FILTER.out.matrix, FIND_CIRC_FILTER.out.matrix, CIRIQUANT_FILTER.out.matrix, DCC_FILTER.out.matrix, MAPSPLICE_FILTER.out.matrix ) - tools_selected = params.tool.split(',').collect{it.trim().toLowerCase()} - - if( tools_selected.size() > 1){ - - MERGE_TOOLS( ch_matrix.map{ meta, bed -> var = [:]; var.id = meta.id; return [ var, bed ] }.groupTuple(), tool_filter, duplicates_fun ) - - COUNTS_COMBINED( MERGE_TOOLS.out.merged.map{ meta, bed -> return [ bed ] }.collect() ) - - dea_matrix = COUNTS_COMBINED.out.dea_matrix - clr_matrix = COUNTS_COMBINED.out.clr_matrix - - }else{ - - // TODO: concerned that this does not wait for all files? - COUNTS_SINGLE( ch_matrix.map{ meta, bed -> var = [:]; var.tool = meta.tool; return [ var, bed ] }.groupTuple() ) - - dea_matrix = COUNTS_SINGLE.out.dea_matrix - clr_matrix = COUNTS_SINGLE.out.clr_matrix - - } - - emit: - circrna_bed12 = ANNOTATION.out.bed - fasta = FASTA.out.analysis_fasta - versions = ch_versions - dea_matrix - clr_matrix -} diff --git a/subworkflows/local/detection_tools/circexplorer2.nf b/subworkflows/local/detection_tools/circexplorer2.nf new file mode 100644 index 000000000..b3908e91d --- /dev/null +++ b/subworkflows/local/detection_tools/circexplorer2.nf @@ -0,0 +1,30 @@ +include { CIRCEXPLORER2_REFERENCE as REFERENCE } from '../../../modules/local/circexplorer2/reference' +include { CIRCEXPLORER2_PARSE as PARSE } from '../../../modules/nf-core/circexplorer2/parse' +include { CIRCEXPLORER2_ANNOTATE as ANNOTATE } from '../../../modules/nf-core/circexplorer2/annotate' +include { GAWK as UNIFY } from '../../../modules/nf-core/gawk' + +workflow CIRCEXPLORER2 { + take: + gtf + fasta + star_junctions + + main: + ch_versions = Channel.empty() + + REFERENCE( gtf ) + PARSE( star_junctions ) + ANNOTATE( PARSE.out.junction, fasta, REFERENCE.out.txt ) + UNIFY( ANNOTATE.out.txt + .map{ meta, txt -> [ meta + [tool: "circexplorer2"], txt ] }, [] ) + + ch_versions = ch_versions.mix(REFERENCE.out.versions) + ch_versions = ch_versions.mix(PARSE.out.versions) + ch_versions = ch_versions.mix(ANNOTATE.out.versions) + ch_versions = ch_versions.mix(UNIFY.out.versions) + + emit: + bed = UNIFY.out.output + + versions = ch_versions +} diff --git a/subworkflows/local/detection_tools/circrna_finder.nf b/subworkflows/local/detection_tools/circrna_finder.nf new file mode 100644 index 000000000..a45459394 --- /dev/null +++ b/subworkflows/local/detection_tools/circrna_finder.nf @@ -0,0 +1,28 @@ +include { CIRCRNA_FINDER as MAIN } from '../../../modules/local/circrna_finder' +include { GAWK as UNIFY } from '../../../modules/nf-core/gawk' + +workflow CIRCRNA_FINDER { + take: + fasta + star_sam + star_junctions + star_tab + + main: + ch_versions = Channel.empty() + + ch_joined = star_sam.join(star_junctions).join(star_tab) + .map{ meta, sam, junction, tab -> + [ meta + [tool: "circrna_finder"], [sam, junction, tab] ] } + + MAIN( ch_joined ) + UNIFY( MAIN.out.results, [] ) + + ch_versions = ch_versions.mix(MAIN.out.versions) + ch_versions = ch_versions.mix(UNIFY.out.versions) + + emit: + bed = UNIFY.out.output + + versions = ch_versions +} diff --git a/subworkflows/local/detection_tools/ciriquant.nf b/subworkflows/local/detection_tools/ciriquant.nf new file mode 100644 index 000000000..7340e6e12 --- /dev/null +++ b/subworkflows/local/detection_tools/ciriquant.nf @@ -0,0 +1,26 @@ +include { CIRIQUANT as MAIN } from '../../../modules/local/ciriquant' +include { GAWK as UNIFY } from '../../../modules/nf-core/gawk' + +workflow CIRIQUANT { + take: + reads + ch_gtf + ch_fasta + bwa_index + hisat2_index + + main: + ch_versions = Channel.empty() + + MAIN( reads, ch_gtf, ch_fasta, bwa_index, hisat2_index ) + UNIFY( MAIN.out.gtf.map{ meta, gtf -> + [ meta + [tool: "ciriquant"], gtf ] }, [] ) + + ch_versions = ch_versions.mix(MAIN.out.versions) + ch_versions = ch_versions.mix(UNIFY.out.versions) + + emit: + bed = UNIFY.out.output + + versions = ch_versions +} diff --git a/subworkflows/local/detection_tools/dcc.nf b/subworkflows/local/detection_tools/dcc.nf new file mode 100644 index 000000000..4bb1103d6 --- /dev/null +++ b/subworkflows/local/detection_tools/dcc.nf @@ -0,0 +1,67 @@ +include { STAR_ALIGN as MATE1_1ST_PASS } from '../../../modules/nf-core/star/align' +include { STAR_ALIGN as MATE1_2ND_PASS } from '../../../modules/nf-core/star/align' +include { SJDB as MATE1_SJDB } from '../../../modules/local/star/sjdb' +include { STAR_ALIGN as MATE2_1ST_PASS } from '../../../modules/nf-core/star/align' +include { STAR_ALIGN as MATE2_2ND_PASS } from '../../../modules/nf-core/star/align' +include { SJDB as MATE2_SJDB } from '../../../modules/local/star/sjdb' +include { DCC as MAIN } from '../../../modules/local/dcc' +include { GAWK as UNIFY } from '../../../modules/nf-core/gawk' + +workflow DCC { + take: + reads + ch_fasta + ch_gtf + star_index + star_junction + ignore_sjdbgtf + seq_platform + seq_center + bsj_reads + + main: + ch_versions = Channel.empty() + + mate1 = reads.filter{ meta, reads -> !meta.single_end } + .map{ meta, reads -> return [ [id: meta.id, single_end: true], reads[0] ] } + MATE1_1ST_PASS( mate1, star_index, ch_gtf, ignore_sjdbgtf, seq_platform, seq_center ) + MATE1_SJDB( MATE1_1ST_PASS.out.tab + .map{ meta, tab -> return tab }.collect().map{[[id: "mate1_sjdb"], it]}, bsj_reads ) + MATE1_2ND_PASS( mate1, star_index, MATE1_SJDB.out.sjtab, ignore_sjdbgtf, seq_platform, seq_center ) + + mate2 = reads.filter{ meta, reads -> !meta.single_end } + .map{ meta, reads -> return [ [id: meta.id, single_end: true], reads[1] ] } + MATE2_1ST_PASS( mate2, star_index, ch_gtf, ignore_sjdbgtf, seq_platform, seq_center ) + MATE2_SJDB( MATE2_1ST_PASS.out.tab + .map{ meta, tab -> return tab }.collect().map{[[id: "mate2_sjdb"], it]}, bsj_reads ) + MATE2_2ND_PASS( mate2, star_index, MATE2_SJDB.out.sjtab, ignore_sjdbgtf, seq_platform, seq_center ) + + dcc_stage = star_junction.map{ meta, junction -> return [ meta.id, meta, junction]} + .join( + MATE1_2ND_PASS.out.junction.map{ meta, junction -> return [ meta.id, junction] }, + remainder: true + ) + .join( + MATE2_2ND_PASS.out.junction.map{ meta, junction -> return [ meta.id, junction] }, + remainder: true + ) + .map{ id, meta, junction, mate1, mate2 -> return [ meta, junction, mate1, mate2 ]} + + dcc = dcc_stage.map{ it -> [ it[0], it[1], it[2] ?: [], it[3] ?: [] ] } + MAIN( dcc, ch_fasta.map{ meta, fasta -> fasta }, ch_gtf.map{ meta, gtf -> gtf } ) + UNIFY( MAIN.out.txt.map{ meta, txt -> [ meta + [tool: "dcc"], txt ] }, [] ) + + ch_versions = ch_versions.mix(MATE1_1ST_PASS.out.versions) + ch_versions = ch_versions.mix(MATE1_SJDB.out.versions) + ch_versions = ch_versions.mix(MATE1_2ND_PASS.out.versions) + ch_versions = ch_versions.mix(MATE2_1ST_PASS.out.versions) + ch_versions = ch_versions.mix(MATE2_SJDB.out.versions) + ch_versions = ch_versions.mix(MATE2_2ND_PASS.out.versions) + ch_versions = ch_versions.mix(MAIN.out.versions) + ch_versions = ch_versions.mix(UNIFY.out.versions) + + emit: + bed = UNIFY.out.output + + versions = ch_versions +} diff --git a/subworkflows/local/detection_tools/find_circ.nf b/subworkflows/local/detection_tools/find_circ.nf new file mode 100644 index 000000000..ce0cc557b --- /dev/null +++ b/subworkflows/local/detection_tools/find_circ.nf @@ -0,0 +1,36 @@ +include { BOWTIE2_ALIGN as ALIGN } from '../../../modules/nf-core/bowtie2/align' +include { SAMTOOLS_VIEW } from '../../../modules/nf-core/samtools/view' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index' +include { FIND_CIRC_ANCHORS as ANCHORS } from '../../../modules/local/find_circ/anchors' +include { FIND_CIRC as MAIN } from '../../../modules/local/find_circ/find_circ' +include { GAWK as UNIFY } from '../../../modules/nf-core/gawk' + +workflow FIND_CIRC { + take: + reads + bowtie2_index + ch_fasta + + main: + ch_versions = Channel.empty() + + ALIGN( reads, bowtie2_index, ch_fasta, false, true ) + SAMTOOLS_INDEX( ALIGN.out.bam ) + SAMTOOLS_VIEW( ALIGN.out.bam.join( SAMTOOLS_INDEX.out.bai ), ch_fasta, [] ) + ANCHORS( SAMTOOLS_VIEW.out.bam ) + MAIN( ANCHORS.out.anchors, bowtie2_index, ch_fasta.map{ meta, fasta -> fasta } ) + UNIFY( MAIN.out.bed.map{ meta, bed -> + [ meta + [tool: "find_circ"], bed ] }, [] ) + + ch_versions = ch_versions.mix(ALIGN.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW.out.versions) + ch_versions = ch_versions.mix(ANCHORS.out.versions) + ch_versions = ch_versions.mix(MAIN.out.versions) + ch_versions = ch_versions.mix(UNIFY.out.versions) + + emit: + bed = UNIFY.out.output + + versions = ch_versions +} diff --git a/subworkflows/local/detection_tools/mapsplice.nf b/subworkflows/local/detection_tools/mapsplice.nf new file mode 100644 index 000000000..f81124265 --- /dev/null +++ b/subworkflows/local/detection_tools/mapsplice.nf @@ -0,0 +1,36 @@ +include { CIRCEXPLORER2_REFERENCE as REFERENCE } from '../../../modules/local/circexplorer2/reference' +include { MAPSPLICE_ALIGN as ALIGN } from '../../../modules/local/mapsplice/align' +include { CIRCEXPLORER2_PARSE as PARSE } from '../../../modules/nf-core/circexplorer2/parse' +include { CIRCEXPLORER2_ANNOTATE as ANNOTATE } from '../../../modules/nf-core/circexplorer2/annotate' +include { GAWK as UNIFY } from '../../../modules/nf-core/gawk' + +workflow MAPSPLICE { + take: + reads + gtf + fasta + bowtie_index + chromosomes + star_junctions + + main: + ch_versions = Channel.empty() + + REFERENCE( gtf ) + ALIGN( reads, bowtie_index, chromosomes, gtf ) + PARSE( ALIGN.out.raw_fusions ) + ANNOTATE( PARSE.out.junction, fasta, REFERENCE.out.txt ) + UNIFY( ANNOTATE.out.txt.map{ meta, txt -> + [ meta + [tool: "mapsplice"], txt ] }, [] ) + + ch_versions = ch_versions.mix(REFERENCE.out.versions) + ch_versions = ch_versions.mix(ALIGN.out.versions) + ch_versions = ch_versions.mix(PARSE.out.versions) + ch_versions = ch_versions.mix(ANNOTATE.out.versions) + ch_versions = ch_versions.mix(UNIFY.out.versions) + + emit: + bed = UNIFY.out.output + + versions = ch_versions +} diff --git a/subworkflows/local/detection_tools/segemehl.nf b/subworkflows/local/detection_tools/segemehl.nf new file mode 100644 index 000000000..9e8e2c0d9 --- /dev/null +++ b/subworkflows/local/detection_tools/segemehl.nf @@ -0,0 +1,37 @@ +include { SEGEMEHL_INDEX as INDEX } from '../../../modules/nf-core/segemehl/index' +include { SEGEMEHL_ALIGN as ALIGN } from '../../../modules/nf-core/segemehl/align' +include { GAWK as EXTRACT } from '../../../modules/nf-core/gawk' +include { GNU_SORT as SORT } from '../../../modules/nf-core/gnu/sort' +include { BEDTOOLS_GROUPBY as GROUP } from '../../../modules/nf-core/bedtools/groupby' +include { GAWK as UNIFY } from '../../../modules/nf-core/gawk' + +workflow SEGEMEHL { + take: + reads + fasta + index + + main: + ch_versions = Channel.empty() + + index = index ?: INDEX( fasta ).index + + ALIGN( reads, fasta, index ) + EXTRACT( ALIGN.out.single_bed + .map{ meta, bed -> [ meta + [tool: "segemehl"], bed ] }, [] ) + + SORT( EXTRACT.out.output ) + GROUP( SORT.out.sorted, 5 ) + UNIFY( GROUP.out.bed, [] ) + + ch_versions = ch_versions.mix(ALIGN.out.versions) + ch_versions = ch_versions.mix(EXTRACT.out.versions) + ch_versions = ch_versions.mix(SORT.out.versions) + ch_versions = ch_versions.mix(GROUP.out.versions) + ch_versions = ch_versions.mix(UNIFY.out.versions) + + emit: + bed = UNIFY.out.output + + versions = ch_versions +} diff --git a/subworkflows/local/detection_tools/star2pass.nf b/subworkflows/local/detection_tools/star2pass.nf new file mode 100644 index 000000000..4e216545b --- /dev/null +++ b/subworkflows/local/detection_tools/star2pass.nf @@ -0,0 +1,34 @@ +include { STAR_ALIGN as PASS_1 } from '../../../modules/nf-core/star/align' +include { STAR_ALIGN as PASS_2 } from '../../../modules/nf-core/star/align' +include { SJDB } from '../../../modules/local/star/sjdb' + + +workflow STAR2PASS { + take: + reads + star_index + ch_gtf + bsj_reads + ignore_sjdbgtf + seq_center + seq_platform + + main: + ch_versions = Channel.empty() + + PASS_1( reads, star_index, ch_gtf, ignore_sjdbgtf, seq_platform, seq_center) + sjdb = PASS_1.out.tab.map{ meta, tab -> return tab }.collect().map{[[id: "star_sjdb"], it]} + SJDB( sjdb, bsj_reads ) + PASS_2( reads, star_index, SJDB.out.sjtab, ignore_sjdbgtf, seq_platform, seq_center ) + + ch_versions = ch_versions.mix(PASS_1.out.versions) + ch_versions = ch_versions.mix(SJDB.out.versions) + ch_versions = ch_versions.mix(PASS_2.out.versions) + + emit: + junction = PASS_2.out.junction + sam = PASS_2.out.sam + tab = PASS_2.out.tab + + versions = ch_versions +} diff --git a/subworkflows/local/differential_expression.nf b/subworkflows/local/differential_expression.nf deleted file mode 100644 index ef9c4c596..000000000 --- a/subworkflows/local/differential_expression.nf +++ /dev/null @@ -1,61 +0,0 @@ -include { HISAT2_ALIGN } from '../../modules/nf-core/hisat2/align/main' -include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort/main' -include { STRINGTIE_STRINGTIE } from '../../modules/nf-core/stringtie/stringtie/main' -include { STRINGTIE_PREPDE } from '../../modules/local/stringtie/prepde/main' -include { DESEQ2_DIFFERENTIAL_EXPRESSION } from '../../modules/local/deseq2/differential_expression/main' -include { PARENT_GENE } from '../../modules/local/annotation/parent_gene/main' -include { PREPARE_CLR_TEST } from '../../modules/local/circtest/prepare/main' -include { CIRCTEST } from '../../modules/local/circtest/test/main' - -workflow DIFFERENTIAL_EXPRESSION { - - take: - reads - gtf - fasta - hisat2_index - splice_sites - phenotype - dea_matrix - clr_matrix - species - ensembl_map - exon_boundary - - main: - ch_versions = Channel.empty() - - // - // LINEAR RNA ALIGNEMT WORKFLOW: - // - - HISAT2_ALIGN( reads, hisat2_index, splice_sites ) - SAMTOOLS_SORT( HISAT2_ALIGN.out.bam ) - STRINGTIE_STRINGTIE( SAMTOOLS_SORT.out.bam, gtf ) - STRINGTIE_PREPDE( STRINGTIE_STRINGTIE.out.transcript_gtf.map{ meta, gtf -> return [ gtf ] }.collect() ) - - // - // Circular, Linear Differential Expression - // - - DESEQ2_DIFFERENTIAL_EXPRESSION( STRINGTIE_PREPDE.out.gene_matrix, phenotype, dea_matrix, species, ensembl_map ) - - // - // CircRNA - Host Gene Ratio tests - // - - PARENT_GENE( clr_matrix, gtf, exon_boundary ) - PREPARE_CLR_TEST( STRINGTIE_PREPDE.out.gene_matrix, clr_matrix, PARENT_GENE.out.circ_host_map, gtf ) - CIRCTEST( PREPARE_CLR_TEST.out.circular, PREPARE_CLR_TEST.out.linear, phenotype ) - - ch_versions = ch_versions.mix(HISAT2_ALIGN.out.versions) - ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions) - ch_versions = ch_versions.mix(STRINGTIE_STRINGTIE.out.versions) - ch_versions = ch_versions.mix(DESEQ2_DIFFERENTIAL_EXPRESSION.out.versions) - ch_versions = ch_versions.mix(PARENT_GENE.out.versions) - ch_versions = ch_versions.mix(PREPARE_CLR_TEST.out.versions) - ch_versions = ch_versions.mix(CIRCTEST.out.versions) - - emit: - versions = ch_versions -} diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf deleted file mode 100644 index 753a28383..000000000 --- a/subworkflows/local/input_check.nf +++ /dev/null @@ -1,78 +0,0 @@ -// -// Check input files -// - -include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' - -workflow INPUT_CHECK { - take: - samplesheet - ch_phenotype - - main: - phenotype = params.phenotype && params.module.contains('differential_expression') ? examine_phenotype(ch_phenotype) : Channel.empty() - - SAMPLESHEET_CHECK ( samplesheet ) - .csv - .splitCsv ( header:true, sep:',' ) - .map { create_fastq_channel(it) } - .set { reads } - - emit: - phenotype // Channel: [ pheno ] - reads // channel: [ val(meta), [ reads ] ] - versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] -} - -// Function to get list of [ meta, [ fastq_1, fastq_2 ] ] -// Function to get list of [ meta, [ fastq_1, fastq_2 ] ] -def create_fastq_channel(LinkedHashMap row) { - // create meta map - def meta = [:] - meta.id = row.sample - meta.single_end = row.single_end.toBoolean() - - // add path(s) of the fastq file(s) to the meta map - def fastq_meta = [] - if (!file(row.fastq_1).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" - } - if (meta.single_end) { - fastq_meta = [ meta, [ file(row.fastq_1) ] ] - - // Conduct check here, cannot figure out how to outside of this scope i.e accessing meta.single end for an if else from channel. - if(params.tool.split(',').contains('ciriquant')){ - exit 1, "ERROR: Unfortunately CIRIquant does not support single-end reads. Please select one of the other 6 quantification tools." - } - - } else { - if (!file(row.fastq_2).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" - } - fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ] - } - return fastq_meta -} - - -def examine_phenotype(pheno){ - - Channel - .fromPath(pheno) - .splitCsv(header: true, sep: ',') - .map{ row -> - - def expected_cols = ['condition'] - - if (!row.keySet().containsAll(expected_cols)) exit 1, "[nf-core/circrna] error: 'condition' is not a column name in the phenotype file.\n\nThe primary response variable must be named 'condition', please refer to the usage documentation online" - - def condition = row.condition.matches('NA') ? 'NA' : row.condition - - if(condition == '') exit 1, "[nf-core/circrna] error: Invalid phenotype file, condition column contains empty cells." - if(condition.matches('NA')) exit 1, "[nf-core/circrna] error: NA value in phenotype condition column." - - } - .toList() - - return Channel.value(file(pheno)) -} diff --git a/subworkflows/local/mirna/mirna_bindingsites.nf b/subworkflows/local/mirna/mirna_bindingsites.nf new file mode 100644 index 000000000..d81abae59 --- /dev/null +++ b/subworkflows/local/mirna/mirna_bindingsites.nf @@ -0,0 +1,114 @@ +include { BIOAWK as ADD_BACKSPLICE } from '../../../modules/nf-core/bioawk' +include { MIRANDA } from '../../../modules/nf-core/miranda' +include { GAWK as UNIFY_MIRANDA } from '../../../modules/nf-core/gawk' +include { TARGETSCAN } from '../../../modules/local/targetscan/predict' +include { GAWK as UNIFY_TARGETSCAN } from '../../../modules/nf-core/gawk' +include { MIRNA_TARGETS } from '../../../modules/local/mirna_targets' +include { CAT_CAT as COMBINE_BINDINGSITES } from '../../../modules/nf-core/cat/cat' +include { MAJORITY_VOTE } from '../../../modules/local/majority_vote' + +workflow MIRNA_BINDINGSITES { + take: + transcriptome_fasta + circrna_bed12 + mirna_fasta + + main: + ch_versions = Channel.empty() + ch_predictions = Channel.empty() + + // miRNAs can potentially bind to circRNAs right at the backsplice site + // In this case, the miRNA binding sequence would partially overlap with start and end of the circRNA + // To account for this, the first 25bp of the circRNA are added to the end of the circRNA sequence + ADD_BACKSPLICE( transcriptome_fasta ) + ch_versions = ch_versions.mix(ADD_BACKSPLICE.out.versions) + + ch_transcriptome_batches = ADD_BACKSPLICE.out.output + .splitFasta(by: 100, file: true) + .map{ meta, file -> [[id: "batch_" + file.baseName.split("\\.").last()], file]} + + // + // MIRNA PREDICTION TOOLS: + // + tools_selected = params.mirna_tools.split(',').collect{it.trim().toLowerCase()} + + if (tools_selected.size() == 0) { + error 'No tools selected for miRNA discovery.' + } + + if (tools_selected.contains('targetscan')) { + // + // TARGETSCAN WORKFLOW: + // + TARGETSCAN( ch_transcriptome_batches, formatMiRNAForTargetScan( mirna_fasta ).collect() ) + UNIFY_TARGETSCAN( TARGETSCAN.out.txt, [] ) + + ch_versions = ch_versions.mix(TARGETSCAN.out.versions) + ch_versions = ch_versions.mix(UNIFY_TARGETSCAN.out.versions) + ch_predictions = ch_predictions.mix(UNIFY_TARGETSCAN.out.output) + } + + if (tools_selected.contains('miranda')) { + // + // MIRANDA WORKFLOW: + // + MIRANDA( ch_transcriptome_batches, mirna_fasta.map{meta, mature -> mature}.collect() ) + UNIFY_MIRANDA( MIRANDA.out.txt, [] ) + + ch_versions = ch_versions.mix(MIRANDA.out.versions) + ch_versions = ch_versions.mix(UNIFY_MIRANDA.out.versions) + ch_predictions = ch_predictions.mix(UNIFY_MIRANDA.out.output) + } + + // + // CONSOLIDATE PREDICTIONS WORKFLOW: + // + // TODO: This is an artifact and should be removed if we have a replacement + + // consolidate_targets = TARGETSCAN.out.txt.join(MIRANDA.out.txt).join(circrna_bed12) + consolidate_targets = TARGETSCAN.out.txt.join(MIRANDA.out.txt) + + MIRNA_TARGETS( consolidate_targets ) + + ch_versions = ch_versions.mix(MIRNA_TARGETS.out.versions) + + // + // MAJORITY VOTING: + // + COMBINE_BINDINGSITES ( ch_predictions.map{meta, file -> file}.collect().map{[[id: "mirna"], it]} ) + MAJORITY_VOTE( COMBINE_BINDINGSITES.out.file_out ) + + ch_versions = ch_versions.mix(COMBINE_BINDINGSITES.out.versions) + ch_versions = ch_versions.mix(MAJORITY_VOTE.out.versions) + + emit: + binding_sites = MAJORITY_VOTE.out.targets + + versions = ch_versions +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ +// Formatting miRNA input for targetscan +// takes mature.fa, iterates over entries (id, seq) and generates a new file +// writing: +// 1. miR ID +// 2. miR (7bp) seed sequence from mature seq +// 3. Species ID (set to 0000, not important for output). +// to new file +def formatMiRNAForTargetScan(ch_mature) { + + def ch_targetscan_meta_formatted = ch_mature + .map { meta, mature -> mature } + .splitFasta(record: [id: true, seqString: true]) + .map { record -> + return "${record.id}\t${record.seqString[1..7]}\t0000\n" + } + .collectFile(name: 'mature.txt') + + ch_targetscan_meta_formatted = ch_targetscan_meta_formatted.map { [[id: "mature_targetscan"], it] } + return ch_targetscan_meta_formatted +} diff --git a/subworkflows/local/mirna_prediction.nf b/subworkflows/local/mirna_prediction.nf index 953c794ef..b3fc46cc0 100644 --- a/subworkflows/local/mirna_prediction.nf +++ b/subworkflows/local/mirna_prediction.nf @@ -1,43 +1,84 @@ -include { TARGETSCAN_DATABASE } from '../../modules/local/targetscan/database/main' -include { TARGETSCAN } from '../../modules/local/targetscan/predict/main' -include { MIRANDA } from '../../modules/nf-core/miranda/main' -include { MIRNA_TARGETS } from '../../modules/local/mirna_targets/main' +// MODULES +include { BIOAWK as ADD_BACKSPLICE } from '../../modules/nf-core/bioawk' +include { DESEQ2_NORMALIZATION } from '../../modules/local/deseq2/normalization' +include { MIRNA_FILTERING } from '../../modules/local/mirna_filtering' +include { COMPUTE_CORRELATIONS } from '../../modules/local/compute_correlations' -workflow MIRNA_PREDICTION{ +// SUBWORKFLOWS +include { MIRNA_BINDINGSITES } from './mirna/mirna_bindingsites' + +workflow MIRNA_PREDICTION { take: - circrna_fasta - circrna_bed12 - mature + transcriptome_fasta + circrna_annotation + ch_mature + ch_mirna + transcript_counts + quantification_rds main: ch_versions = Channel.empty() // - // TARGETSCAN WORKFLOW: + // MIRNA NORMALIZATION WORKFLOW: // - TARGETSCAN_DATABASE( mature ) - TARGETSCAN( circrna_fasta, TARGETSCAN_DATABASE.out.mature_txt ) + if (params.mirna_expression) { - ch_versions = ch_versions.mix(TARGETSCAN.out.versions) + ch_mirna_normalized = DESEQ2_NORMALIZATION( ch_mirna ).normalized - // - // MIRANDA WORKFLOW: - // + ch_versions = ch_versions.mix(DESEQ2_NORMALIZATION.out.versions) - MIRANDA( circrna_fasta, mature ) + ch_mirna_filtered = MIRNA_FILTERING(ch_mirna_normalized, + params.mirna_min_sample_percentage, + params.mirna_min_reads + ).filtered - ch_versions = ch_versions.mix(MIRANDA.out.versions) + ch_versions = ch_versions.mix(MIRNA_FILTERING.out.versions) - // - // CONSOLIDATE PREDICTIONS WORKFLOW: - // + // + // MIRNA BINDING SITES: + // + + // Filtering miRNAs from ch_mature if they are not in ch_mirna_filtered. + ch_uniq_mirnas = ch_mirna_filtered.map{ meta, path -> path }.splitCsv( sep: '\t' ).map{ it[0] }.unique().collect() + + ch_mature = ch_mature + .map{ meta, path -> + path + } + .splitFasta( record: [id:true, seqString:true] ) + .combine(ch_uniq_mirnas.map{ it -> [it]}) // Not sure why this mapping is necessary but I think it is + .filter{ record, mirnas -> + ch_uniq_mirnas.contains(record.id).value + }.map{ record, mirnas -> + ">${record.id}\n${record.seqString}" + } + .collectFile( name: 'mature_filtered.fa', newLine: true) + .map{ it -> [[id: 'mature_filtered'], it]} + } + + MIRNA_BINDINGSITES( transcriptome_fasta, circrna_annotation, ch_mature ) + ch_versions = ch_versions.mix(MIRNA_BINDINGSITES.out.versions) + + if (params.mirna_expression) { + // + // COMPUTE CORRELATION: + // + ch_binding_site_batches = MIRNA_BINDINGSITES.out.binding_sites + .splitText(by: 100, file: true) + .map{ meta, file -> [[id: "batch_" + file.baseName.split("\\.").last()], file]} + + COMPUTE_CORRELATIONS(ch_binding_site_batches, ch_mirna_filtered, quantification_rds) - consolidate_targets = TARGETSCAN.out.txt.join(MIRANDA.out.txt).join(circrna_bed12) - MIRNA_TARGETS( consolidate_targets ) + ch_correlation_results = COMPUTE_CORRELATIONS.out.correlations + .map{meta, results -> results} + .flatten().collect() + .map{results -> [[id: 'correlation'], results]} - ch_versions = ch_versions.mix(MIRNA_TARGETS.out.versions) + ch_versions = ch_versions.mix(COMPUTE_CORRELATIONS.out.versions) + } emit: versions = ch_versions diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf index 516b3faed..cb87f6b47 100644 --- a/subworkflows/local/prepare_genome.nf +++ b/subworkflows/local/prepare_genome.nf @@ -1,91 +1,72 @@ - -include { BOWTIE_BUILD } from '../../modules/nf-core/bowtie/build/main' -include { BOWTIE2_BUILD } from '../../modules/nf-core/bowtie2/build/main' -include { BWA_INDEX } from '../../modules/nf-core/bwa/index/main' -include { HISAT2_EXTRACTSPLICESITES } from '../../modules/nf-core/hisat2/extractsplicesites/main' -include { HISAT2_BUILD } from '../../modules/nf-core/hisat2/build/main' -include { STAR_GENOMEGENERATE } from '../../modules/nf-core/star/genomegenerate/main' -include { SEGEMEHL_INDEX } from '../../modules/nf-core/segemehl/index/main' +include { CUSTOM_GTFFILTER as GTFFILTER } from '../../modules/nf-core/custom/gtffilter' +include { SEQKIT_SPLIT } from '../../modules/local/seqkit/split' +include { BOWTIE_BUILD } from '../../modules/nf-core/bowtie/build' +include { BOWTIE2_BUILD } from '../../modules/nf-core/bowtie2/build' +include { BWA_INDEX } from '../../modules/nf-core/bwa/index' +include { HISAT2_EXTRACTSPLICESITES } from '../../modules/nf-core/hisat2/extractsplicesites' +include { HISAT2_BUILD } from '../../modules/nf-core/hisat2/build' +include { STAR_GENOMEGENERATE } from '../../modules/nf-core/star/genomegenerate' +include { GAWK as CLEAN_FASTA } from '../../modules/nf-core/gawk' +include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx' workflow PREPARE_GENOME { take: - fasta - gtf + ch_fasta + ch_gtf main: ch_versions = Channel.empty() - ch_fasta = Channel.fromPath(fasta) - - // MapSplice & find_circ requires reference genome to be split per chromosome: - if( ( params.tool.contains('mapsplice') || params.tool.contains('find_circ') ) && params.module.contains('circrna_discovery') ){ - file("${params.outdir}/genome/chromosomes").mkdirs() - ch_fasta.splitFasta( record: [id:true] ) - .map{ record -> record.id.toString() } - .set{ ID } - - ch_fasta.splitFasta( file: true ) - .merge( ID ).map{ file, id -> file.copyTo("${params.outdir}/genome/chromosomes/${id}.fa") } + // MapSplice cannot deal with extra field in the fasta headers + // this removes all additional fields in the headers of the input fasta file + if( params.tools.split(',').contains('mapsplice') ) { + CLEAN_FASTA(ch_fasta, []) + ch_fasta = CLEAN_FASTA.out.output - stage_chromosomes = Channel.value("${workflow.launchDir}/${params.outdir}/genome/chromosomes") + ch_versions = ch_versions.mix(CLEAN_FASTA.out.versions) } - // some index procs use tuple, some dont -_- - ch_fasta.map{ it -> - meta = [:] - meta.id = it.simpleName - return [ meta, [it] ] - }.set{ fasta_tuple } + GTFFILTER(ch_gtf, ch_fasta) + ch_gtf = GTFFILTER.out.gtf - BOWTIE_BUILD( - fasta - ) + SEQKIT_SPLIT(ch_fasta) - BOWTIE2_BUILD( - fasta_tuple - ) + BOWTIE_BUILD(ch_fasta.map{ meta, fasta -> fasta }) - BWA_INDEX ( - fasta_tuple - ) + BOWTIE2_BUILD(ch_fasta) - HISAT2_EXTRACTSPLICESITES( - gtf - ) + BWA_INDEX (ch_fasta) - HISAT2_BUILD( - fasta, - gtf, - HISAT2_EXTRACTSPLICESITES.out.txt - ) + HISAT2_EXTRACTSPLICESITES(ch_gtf) - STAR_GENOMEGENERATE( - fasta, - gtf - ) + HISAT2_BUILD(ch_fasta, ch_gtf, HISAT2_EXTRACTSPLICESITES.out.txt) - SEGEMEHL_INDEX( - fasta - ) + STAR_GENOMEGENERATE(ch_fasta, ch_gtf) + + SAMTOOLS_FAIDX(ch_fasta, [[], []]) // Collect versions - ch_versions = ch_versions.mix(BOWTIE_BUILD.out.versions) - ch_versions = ch_versions.mix(BOWTIE2_BUILD.out.versions) - ch_versions = ch_versions.mix(BWA_INDEX.out.versions) - ch_versions = ch_versions.mix(HISAT2_EXTRACTSPLICESITES.out.versions) - ch_versions = ch_versions.mix(HISAT2_BUILD.out.versions) - ch_versions = ch_versions.mix(SEGEMEHL_INDEX.out.versions) - ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions) + ch_versions = ch_versions.mix(GTFFILTER.out.versions, + SEQKIT_SPLIT.out.versions, + BOWTIE_BUILD.out.versions, + BOWTIE2_BUILD.out.versions, + BWA_INDEX.out.versions, + HISAT2_EXTRACTSPLICESITES.out.versions, + HISAT2_BUILD.out.versions, + STAR_GENOMEGENERATE.out.versions, + SAMTOOLS_FAIDX.out.versions) emit: - bowtie = BOWTIE_BUILD.out.index - bowtie2 = BOWTIE2_BUILD.out.index - bwa = BWA_INDEX.out.index - chromosomes = ( params.tool.contains('mapsplice') || params.tool.contains('find_circ') ) ? stage_chromosomes : 'null' - hisat2 = HISAT2_BUILD.out.index - star = STAR_GENOMEGENERATE.out.index - segemehl = SEGEMEHL_INDEX.out.index - splice_sites = HISAT2_EXTRACTSPLICESITES.out.txt + gtf = ch_gtf + faidx = SAMTOOLS_FAIDX.out.fai + bowtie = params.bowtie ?: BOWTIE_BUILD.out.index + bowtie2 = params.bowtie2 ? Channel.value([[id: "bowtie2"], file(params.bowtie2, checkIfExists: true)]) : BOWTIE2_BUILD.out.index.collect() + bwa = params.bwa ? Channel.value([[id: "bwa"], file(params.bwa, checkIfExists: true)]) : BWA_INDEX.out.index.collect() + hisat2 = params.hisat2 ? Channel.value([[id: "hisat2"], file(params.hisat2, checkIfExists: true)]) : HISAT2_BUILD.out.index.collect() + star = params.star ? Channel.value([[id: "star"], file(params.star, checkIfExists: true)]) : STAR_GENOMEGENERATE.out.index.collect() + chromosomes = SEQKIT_SPLIT.out.split + splice_sites = HISAT2_EXTRACTSPLICESITES.out.txt.collect() + versions = ch_versions } diff --git a/subworkflows/local/quantification.nf b/subworkflows/local/quantification.nf new file mode 100644 index 000000000..62abc16ac --- /dev/null +++ b/subworkflows/local/quantification.nf @@ -0,0 +1,136 @@ +include { GNU_SORT as COMBINE_TRANSCRIPTOME_GTFS } from '../../modules/nf-core/gnu/sort' +include { GAWK as EXCLUDE_OVERLONG_TRANSCRIPTS } from '../../modules/nf-core/gawk' +include { TRANSCRIPTOME } from '../../modules/local/quantification/transcriptome' +include { GAWK as MARK_CIRCULAR } from '../../modules/nf-core/gawk' +include { PSIRC_INDEX } from '../../modules/local/psirc/index' +include { PSIRC_QUANT } from '../../modules/local/psirc/quant' +include { CUSTOM_TX2GENE } from '../../modules/nf-core/custom/tx2gene' +include { TXIMETA_TXIMPORT } from '../../modules/nf-core/tximeta/tximport' +include { TXIMETA_TXIMETA } from '../../modules/local/tximeta/tximeta' +include { MERGE_EXPERIMENTS } from '../../modules/local/quantification/merge_experiments' +include { CSVTK_JOIN as JOIN_GENE_COUNTS } from '../../modules/nf-core/csvtk/join' +include { CSVTK_JOIN as JOIN_GENE_TPM } from '../../modules/nf-core/csvtk/join' +include { CSVTK_JOIN as JOIN_TX_COUNTS } from '../../modules/nf-core/csvtk/join' +include { CSVTK_JOIN as JOIN_TX_TPM } from '../../modules/nf-core/csvtk/join' +include { SPLIT_TYPES as SPLIT_TYPES_COUNTS } from '../../modules/local/quantification/split_types' +include { SPLIT_TYPES as SPLIT_TYPES_TPM } from '../../modules/local/quantification/split_types' + +workflow QUANTIFICATION { + take: + ch_gtf + ch_fasta + reads + circ_annotation_bed + circ_annotation_gtf + bootstrap_samples + ch_phenotype + ch_faidx + + main: + ch_versions = Channel.empty() + + COMBINE_TRANSCRIPTOME_GTFS( + ch_gtf.mix(circ_annotation_gtf).map{meta, gtf -> gtf}.collect().map{[[id: "transcriptome"], it]}, + ) + + EXCLUDE_OVERLONG_TRANSCRIPTS( + COMBINE_TRANSCRIPTOME_GTFS.out.sorted, [] + ) + + TRANSCRIPTOME(EXCLUDE_OVERLONG_TRANSCRIPTS.out.output, ch_fasta) + MARK_CIRCULAR(TRANSCRIPTOME.out.transcriptome, []) + + ch_versions = ch_versions.mix( + COMBINE_TRANSCRIPTOME_GTFS.out.versions, + TRANSCRIPTOME.out.versions, + MARK_CIRCULAR.out.versions, + EXCLUDE_OVERLONG_TRANSCRIPTS.out.versions + ) + + PSIRC_INDEX(MARK_CIRCULAR.out.output) + PSIRC_QUANT(reads, PSIRC_INDEX.out.index.collect(), MARK_CIRCULAR.out.output, ch_faidx, bootstrap_samples) + + CUSTOM_TX2GENE( + COMBINE_TRANSCRIPTOME_GTFS.out.sorted, + PSIRC_QUANT.out.directory.map{meta, quant -> quant}.collect().map{[[id: "quant"], it]}, + "kallisto", + "gene_id", + "gene_name" + ) + + TXIMETA_TXIMETA( + PSIRC_QUANT.out.directory, + "kallisto" + ) + + TXIMETA_TXIMPORT( + PSIRC_QUANT.out.directory, + CUSTOM_TX2GENE.out.tx2gene, + "kallisto" + ) + + ch_versions = ch_versions.mix( + PSIRC_INDEX.out.versions, + PSIRC_QUANT.out.versions, + CUSTOM_TX2GENE.out.versions, + TXIMETA_TXIMETA.out.versions, + TXIMETA_TXIMPORT.out.versions + ) + + JOIN_GENE_COUNTS( + TXIMETA_TXIMPORT.out.counts_gene.map{meta, counts -> counts}.collect().map{[[id: "gene_counts"], it]} + ) + + JOIN_GENE_TPM( + TXIMETA_TXIMPORT.out.tpm_gene.map{meta, tpm -> tpm}.collect().map{[[id: "gene_tpm"], it]} + ) + + JOIN_TX_COUNTS( + TXIMETA_TXIMPORT.out.counts_transcript.map{meta, counts -> counts}.collect().map{[[id: "tx_counts"], it]} + ) + + JOIN_TX_TPM( + TXIMETA_TXIMPORT.out.tpm_transcript.map{meta, tpm -> tpm}.collect().map{[[id: "tx_tpm"], it]} + ) + + SPLIT_TYPES_COUNTS( + JOIN_TX_COUNTS.out.csv + ) + + SPLIT_TYPES_TPM( + JOIN_TX_TPM.out.csv + ) + + + MERGE_EXPERIMENTS( + TXIMETA_TXIMETA.out.se.map{meta, se -> se}.collect().map{[[id: "experiments"], it]}, + ch_phenotype.ifEmpty([[], []]), + EXCLUDE_OVERLONG_TRANSCRIPTS.out.output, + JOIN_TX_TPM.out.csv + ) + + ch_versions = ch_versions.mix( + JOIN_GENE_COUNTS.out.versions, + JOIN_GENE_TPM.out.versions, + JOIN_TX_COUNTS.out.versions, + JOIN_TX_TPM.out.versions, + SPLIT_TYPES_COUNTS.out.versions, + SPLIT_TYPES_TPM.out.versions, + MERGE_EXPERIMENTS.out.versions + ) + + emit: + se = MERGE_EXPERIMENTS.out.merged + transcriptome = TRANSCRIPTOME.out.transcriptome + rds = MERGE_EXPERIMENTS.out.merged + gene_counts = JOIN_GENE_COUNTS.out.csv + gene_tpm = JOIN_GENE_TPM.out.csv + tx_counts = JOIN_TX_COUNTS.out.csv + tx_tpm = JOIN_TX_TPM.out.csv + linear_tx_counts = SPLIT_TYPES_COUNTS.out.linear + linear_tx_tpm = SPLIT_TYPES_TPM.out.linear + circular_tx_counts = SPLIT_TYPES_COUNTS.out.circular + circular_tx_tpm = SPLIT_TYPES_TPM.out.circular + + versions = ch_versions +} diff --git a/subworkflows/local/statistical_tests.nf b/subworkflows/local/statistical_tests.nf new file mode 100644 index 000000000..332f9ac70 --- /dev/null +++ b/subworkflows/local/statistical_tests.nf @@ -0,0 +1,24 @@ +include { CIRCTEST_PREPARE } from '../../modules/local/circtest/prepare' +include { CIRCTEST_CIRCTEST } from '../../modules/local/circtest/circtest' + +workflow STATISTICAL_TESTS { + take: + ch_quantification + ch_gene_counts + ch_circ_counts + ch_phenotype + + main: + ch_versions = Channel.empty() + + CIRCTEST_PREPARE(ch_circ_counts, ch_gene_counts) + ch_versions = ch_versions.mix(CIRCTEST_PREPARE.out.versions) + + CIRCTEST_CIRCTEST(CIRCTEST_PREPARE.out.circ_counts, + CIRCTEST_PREPARE.out.gene_counts, + ch_phenotype) + ch_versions = ch_versions.mix(CIRCTEST_CIRCTEST.out.versions) + + emit: + versions = ch_versions +} diff --git a/subworkflows/local/utils_nfcore_circrna_pipeline/main.nf b/subworkflows/local/utils_nfcore_circrna_pipeline/main.nf new file mode 100644 index 000000000..137ca1883 --- /dev/null +++ b/subworkflows/local/utils_nfcore_circrna_pipeline/main.nf @@ -0,0 +1,281 @@ +// +// Subworkflow with functionality specific to the nf-core/circrna pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { fromSamplesheet } from 'plugin/nf-validation' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' +include { nfCoreLogo } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' + +/* +======================================================================================== + SUBWORKFLOW TO INITIALISE PIPELINE +======================================================================================== +*/ + +workflow PIPELINE_INITIALISATION { + + take: + version // boolean: Display version and exit + help // boolean: Display help text + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + monochrome_logs // boolean: Do not use coloured log outputs + nextflow_cli_args // array: List of positional nextflow CLI args + outdir // string: The output directory where the results will be saved + input // string: Path to input samplesheet + + main: + + ch_versions = Channel.empty() + + // + // Print version and exit if required and dump pipeline parameters to JSON file + // + UTILS_NEXTFLOW_PIPELINE ( + version, + true, + outdir, + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + ) + + // + // Validate parameters and generate parameter summary to stdout + // + pre_help_text = nfCoreLogo(monochrome_logs) + post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) + def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " + UTILS_NFVALIDATION_PLUGIN ( + help, + workflow_command, + pre_help_text, + post_help_text, + validate_params, + "nextflow_schema.json" + ) + + // + // Check config provided to the pipeline + // + UTILS_NFCORE_PIPELINE ( + nextflow_cli_args + ) + // + // Custom validation for pipeline parameters + // + validateInputParameters() + + // + // Create channel from input file provided through params.input + // + Channel + .fromSamplesheet("input") + .map { + meta, fastq_1, fastq_2 -> + if (!fastq_2) { + return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] + } else { + return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] + } + } + .groupTuple() + .map { + validateInputSamplesheet(it) + } + .map { + meta, fastqs -> + return [ meta, fastqs.flatten() ] + } + .set { ch_samplesheet } + + emit: + samplesheet = ch_samplesheet + versions = ch_versions +} + +/* +======================================================================================== + SUBWORKFLOW FOR PIPELINE COMPLETION +======================================================================================== +*/ + +workflow PIPELINE_COMPLETION { + + take: + email // string: email address + email_on_fail // string: email address sent on pipeline failure + plaintext_email // boolean: Send plain-text email instead of HTML + outdir // path: Path to output directory where results will be published + monochrome_logs // boolean: Disable ANSI colour codes in log output + hook_url // string: hook URL for notifications + multiqc_report // string: Path to MultiQC report + + main: + + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + + // + // Completion email and summary + // + workflow.onComplete { + if (email || email_on_fail) { + completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs, multiqc_report.toList()) + } + + completionSummary(monochrome_logs) + + if (hook_url) { + imNotification(summary_params, hook_url) + } + } + + workflow.onError { + log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + } +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ +// +// Check and validate pipeline parameters +// +def validateInputParameters() { + genomeExistsError() +} + +// +// Validate channels from input samplesheet +// +def validateInputSamplesheet(input) { + def (metas, fastqs) = input[1..2] + + // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end + def endedness_ok = metas.collect{ it.single_end }.unique().size == 1 + if (!endedness_ok) { + error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") + } + + // Check that multiple runs of the same sample are of the same strandedness i.e. auto / unstranded / forward / reverse + def strandedness_ok = metas.collect{ it.strandedness }.unique().size == 1 + if (!strandedness_ok) { + error("Please check input samplesheet -> Multiple runs of a sample must be of the same strandedness: ${metas[0].id}") + } + + return [ metas[0], fastqs ] +} +// +// Get attribute from genome config file e.g. fasta +// +def getGenomeAttribute(attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + return params.genomes[ params.genome ][ attribute ] + } + } + return null +} + +// +// Exit pipeline if incorrect --genome key provided +// +def genomeExistsError() { + if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + + " Currently, the available genome keys are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + error(error_string) + } +} + +// +// Generate methods description for MultiQC +// +def toolCitationText() { + // TODO nf-core: Optionally add in-text citation tools to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text +} + +def toolBibliographyText() { + // TODO nf-core: Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text +} + +def methodsDescriptionText(mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = workflow.toMap() + meta["manifest_map"] = workflow.manifest.toMap() + + // Pipeline DOI + if (meta.manifest_map.doi) { + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + def temp_doi_ref = "" + String[] manifest_doi = meta.manifest_map.doi.tokenize(",") + for (String doi_ref: manifest_doi) temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " + meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) + } else meta["doi_text"] = "" + meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + // meta["tool_bibliography"] = toolBibliographyText() + + + def methods_text = mqc_methods_yaml.text + + def engine = new groovy.text.SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html.toString() +} + +def checkParameterExistence(it, list) { + if (!list.contains(it)) { + log.warn "Unknown parameter: ${it}" + return false + } + return true +} + +def checkParameterList(list, realList) { + return list.every{ checkParameterExistence(it, realList) } +} diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/main.nf b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf new file mode 100644 index 000000000..b716375b0 --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf @@ -0,0 +1,50 @@ +// +// Sort, index BAM file and run samtools stats, flagstat and idxstats +// + +include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' +include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' + +workflow BAM_SORT_STATS_SAMTOOLS { + take: + ch_bam // channel: [ val(meta), [ bam ] ] + ch_fasta // channel: [ val(meta), path(fasta) ] + + main: + + ch_versions = Channel.empty() + + SAMTOOLS_SORT ( ch_bam, ch_fasta ) + ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first()) + + SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam ) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + SAMTOOLS_SORT.out.bam + .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) + .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) + .map { + meta, bam, bai, csi -> + if (bai) { + [ meta, bam, bai ] + } else { + [ meta, bam, csi ] + } + } + .set { ch_bam_bai } + + BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta ) + ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) + + emit: + bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] + csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), [ csi ] ] + + stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml b/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml new file mode 100644 index 000000000..e01f9ccf6 --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml @@ -0,0 +1,70 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: bam_sort_stats_samtools +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +components: + - samtools/sort + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat + - bam_stats_samtools +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - fasta: + type: file + description: Reference genome fasta file + pattern: "*.{fasta,fa}" +# TODO Update when we decide on a standard for subworkflow docs +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - crai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - stats: + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - flagstat: + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + - idxstats: + type: file + description: File containing samtools idxstats output + pattern: "*.{idxstats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" +maintainers: + - "@drpatelh" + - "@ewels" diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test new file mode 100644 index 000000000..821a3cf50 --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test @@ -0,0 +1,134 @@ +nextflow_workflow { + + name "Test Workflow BAM_SORT_STATS_SAMTOOLS" + script "../main.nf" + workflow "BAM_SORT_STATS_SAMTOOLS" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/bam_sort_stats_samtools" + tag "bam_sort_stats_samtools" + tag "subworkflows/bam_stats_samtools" + tag "bam_stats_samtools" + tag "samtools" + tag "samtools/index" + tag "samtools/sort" + tag "samtools/stats" + tag "samtools/idxstats" + tag "samtools/flagstat" + + test("test_bam_sort_stats_samtools_single_end") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert workflow.out.bam.get(0).get(1) ==~ ".*.bam"}, + { assert workflow.out.bai.get(0).get(1) ==~ ".*.bai"}, + { assert snapshot( + workflow.out.flagstat, + workflow.out.idxstats, + workflow.out.stats, + workflow.out.versions).match() } + ) + } + } + + test("test_bam_sort_stats_samtools_paired_end") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert workflow.out.bam.get(0).get(1) ==~ ".*.bam"}, + { assert workflow.out.bai.get(0).get(1) ==~ ".*.bai"}, + { assert snapshot( + workflow.out.flagstat, + workflow.out.idxstats, + workflow.out.stats, + workflow.out.versions).match() } + ) + } + } + + test("test_bam_sort_stats_samtools_single_end - stub") { + + options "-stub" + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match() } + ) + } + } + + test("test_bam_sort_stats_samtools_paired_end - stub") { + + options "-stub" + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap new file mode 100644 index 000000000..b7f4da177 --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap @@ -0,0 +1,330 @@ +{ + "test_bam_sort_stats_samtools_single_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,2191911d72575a2358b08b1df64ccb53" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,613e048487662c694aa4a2f73ca96a20" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d32de3b3716a11039cef2367c3c1a56e" + ] + ], + [ + "versions.yml:md5,494b5530a1aa29fd5867cf655bebbfe1", + "versions.yml:md5,9fcb0cd845bfb1f89d83201bb20649b4", + "versions.yml:md5,bacc323ec4055d6f69f07a09089772d1", + "versions.yml:md5,ce946e97097c6a9ccf834a3f91f6da30", + "versions.yml:md5,d6c8dae685f1b7d050165fc15c7a20b5" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T17:02:44.34964" + }, + "test_bam_sort_stats_samtools_paired_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,cca83e4fc9406fc3875b5e60055d6574" + ] + ], + [ + "versions.yml:md5,494b5530a1aa29fd5867cf655bebbfe1", + "versions.yml:md5,9fcb0cd845bfb1f89d83201bb20649b4", + "versions.yml:md5,bacc323ec4055d6f69f07a09089772d1", + "versions.yml:md5,ce946e97097c6a9ccf834a3f91f6da30", + "versions.yml:md5,d6c8dae685f1b7d050165fc15c7a20b5" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T17:03:02.583095" + }, + "test_bam_sort_stats_samtools_single_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + "versions.yml:md5,494b5530a1aa29fd5867cf655bebbfe1", + "versions.yml:md5,9fcb0cd845bfb1f89d83201bb20649b4", + "versions.yml:md5,bacc323ec4055d6f69f07a09089772d1", + "versions.yml:md5,ce946e97097c6a9ccf834a3f91f6da30", + "versions.yml:md5,d6c8dae685f1b7d050165fc15c7a20b5" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csi": [ + + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "idxstats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,494b5530a1aa29fd5867cf655bebbfe1", + "versions.yml:md5,9fcb0cd845bfb1f89d83201bb20649b4", + "versions.yml:md5,bacc323ec4055d6f69f07a09089772d1", + "versions.yml:md5,ce946e97097c6a9ccf834a3f91f6da30", + "versions.yml:md5,d6c8dae685f1b7d050165fc15c7a20b5" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T17:03:22.328703" + }, + "test_bam_sort_stats_samtools_paired_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + "versions.yml:md5,494b5530a1aa29fd5867cf655bebbfe1", + "versions.yml:md5,9fcb0cd845bfb1f89d83201bb20649b4", + "versions.yml:md5,bacc323ec4055d6f69f07a09089772d1", + "versions.yml:md5,ce946e97097c6a9ccf834a3f91f6da30", + "versions.yml:md5,d6c8dae685f1b7d050165fc15c7a20b5" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csi": [ + + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "idxstats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,494b5530a1aa29fd5867cf655bebbfe1", + "versions.yml:md5,9fcb0cd845bfb1f89d83201bb20649b4", + "versions.yml:md5,bacc323ec4055d6f69f07a09089772d1", + "versions.yml:md5,ce946e97097c6a9ccf834a3f91f6da30", + "versions.yml:md5,d6c8dae685f1b7d050165fc15c7a20b5" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T17:03:38.833662" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml b/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml new file mode 100644 index 000000000..30b69d6a4 --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/bam_sort_stats_samtools: + - subworkflows/nf-core/bam_sort_stats_samtools/** diff --git a/subworkflows/nf-core/bam_stats_samtools/main.nf b/subworkflows/nf-core/bam_stats_samtools/main.nf new file mode 100644 index 000000000..44d4c010a --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/main.nf @@ -0,0 +1,32 @@ +// +// Run SAMtools stats, flagstat and idxstats +// + +include { SAMTOOLS_STATS } from '../../../modules/nf-core/samtools/stats/main' +include { SAMTOOLS_IDXSTATS } from '../../../modules/nf-core/samtools/idxstats/main' +include { SAMTOOLS_FLAGSTAT } from '../../../modules/nf-core/samtools/flagstat/main' + +workflow BAM_STATS_SAMTOOLS { + take: + ch_bam_bai // channel: [ val(meta), path(bam), path(bai) ] + ch_fasta // channel: [ val(meta), path(fasta) ] + + main: + ch_versions = Channel.empty() + + SAMTOOLS_STATS ( ch_bam_bai, ch_fasta ) + ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions) + + SAMTOOLS_FLAGSTAT ( ch_bam_bai ) + ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions) + + SAMTOOLS_IDXSTATS ( ch_bam_bai ) + ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions) + + emit: + stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), path(stats) ] + flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), path(flagstat) ] + idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), path(idxstats) ] + + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/nf-core/bam_stats_samtools/meta.yml b/subworkflows/nf-core/bam_stats_samtools/meta.yml new file mode 100644 index 000000000..809bf736b --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/meta.yml @@ -0,0 +1,43 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: bam_stats_samtools +description: Produces comprehensive statistics from SAM/BAM/CRAM file +keywords: + - statistics + - counts + - bam + - sam + - cram +components: + - samtools/stats + - samtools/idxstats + - samtools/flagstat +input: + - ch_bam_bai: + description: | + The input channel containing the BAM/CRAM and it's index + Structure: [ val(meta), path(bam), path(bai) ] + - ch_fasta: + description: | + Reference genome fasta file + Structure: [ path(fasta) ] +output: + - stats: + description: | + File containing samtools stats output + Structure: [ val(meta), path(stats) ] + - flagstat: + description: | + File containing samtools flagstat output + Structure: [ val(meta), path(flagstat) ] + - idxstats: + description: | + File containing samtools idxstats output + Structure: [ val(meta), path(idxstats)] + - versions: + description: | + Files containing software versions + Structure: [ path(versions.yml) ] +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf new file mode 100644 index 000000000..ac31f28f6 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -0,0 +1,126 @@ +// +// Subworkflow with functionality that may be useful for any Nextflow pipeline +// + +import org.yaml.snakeyaml.Yaml +import groovy.json.JsonOutput +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NEXTFLOW_PIPELINE { + + take: + print_version // boolean: print version + dump_parameters // boolean: dump parameters + outdir // path: base directory used to publish pipeline results + check_conda_channels // boolean: check conda channels + + main: + + // + // Print workflow version and exit on --version + // + if (print_version) { + log.info "${workflow.manifest.name} ${getWorkflowVersion()}" + System.exit(0) + } + + // + // Dump pipeline parameters to a JSON file + // + if (dump_parameters && outdir) { + dumpParametersToJSON(outdir) + } + + // + // When running with Conda, warn if channels have not been set-up appropriately + // + if (check_conda_channels) { + checkCondaChannels() + } + + emit: + dummy_emit = true +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Generate version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Dump pipeline parameters to a JSON file +// +def dumpParametersToJSON(outdir) { + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = JsonOutput.toJson(params) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() +} + +// +// When running with -profile conda, warn if channels have not been set-up appropriately +// +def checkCondaChannels() { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } + + if (channels_missing | channel_priority_violation) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml new file mode 100644 index 000000000..e5c3a0a82 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NEXTFLOW_PIPELINE" +description: Subworkflow with functionality that may be useful for any Nextflow pipeline +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - print_version: + type: boolean + description: | + Print the version of the pipeline and exit + - dump_parameters: + type: boolean + description: | + Dump the parameters of the pipeline to a JSON file + - output_directory: + type: directory + description: Path to output dir to write JSON file to. + pattern: "results/" + - check_conda_channel: + type: boolean + description: | + Check if the conda channel priority is correct. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" + - "@drpatelh" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test new file mode 100644 index 000000000..68718e4f5 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test @@ -0,0 +1,54 @@ + +nextflow_function { + + name "Test Functions" + script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Test Function getWorkflowVersion") { + + function "getWorkflowVersion" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dumpParametersToJSON") { + + function "dumpParametersToJSON" + + when { + function { + """ + // define inputs of the function here. Example: + input[0] = "$outputDir" + """.stripIndent() + } + } + + then { + assertAll( + { assert function.success } + ) + } + } + + test("Test Function checkCondaChannels") { + + function "checkCondaChannels" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 000000000..e3f0baf47 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,20 @@ +{ + "Test Function getWorkflowVersion": { + "content": [ + "v9.9.9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:05.308243" + }, + "Test Function checkCondaChannels": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:12.425833" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test new file mode 100644 index 000000000..ca964ce8e --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,111 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NEXTFLOW_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + workflow "UTILS_NEXTFLOW_PIPELINE" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Should run no inputs") { + + when { + workflow { + """ + print_version = false + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should print version") { + + when { + workflow { + """ + print_version = true + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.stdout.contains("nextflow_workflow v9.9.9") } + ) + } + } + + test("Should dump params") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = 'results' + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should not create params JSON if no output directory") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = null + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config new file mode 100644 index 000000000..d0a926bf6 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml new file mode 100644 index 000000000..f84761125 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nextflow_pipeline: + - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf new file mode 100644 index 000000000..14558c392 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -0,0 +1,446 @@ +// +// Subworkflow with utility functions specific to the nf-core pipeline template +// + +import org.yaml.snakeyaml.Yaml +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFCORE_PIPELINE { + + take: + nextflow_cli_args + + main: + valid_config = checkConfigProvided() + checkProfileProvided(nextflow_cli_args) + + emit: + valid_config +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Warn if a -profile or Nextflow config has not been provided to run the pipeline +// +def checkConfigProvided() { + valid_config = true + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + + "Please refer to the quick start section and usage docs for the pipeline.\n " + valid_config = false + } + return valid_config +} + +// +// Exit pipeline if --profile contains spaces +// +def checkProfileProvided(nextflow_cli_args) { + if (workflow.profile.endsWith(',')) { + error "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } + if (nextflow_cli_args[0]) { + log.warn "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } +} + +// +// Citation string for pipeline +// +def workflowCitation() { + def temp_doi_ref = "" + String[] manifest_doi = workflow.manifest.doi.tokenize(",") + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + for (String doi_ref: manifest_doi) temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n" + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + "* The pipeline\n" + + temp_doi_ref + "\n" + + "* The nf-core framework\n" + + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + + "* Software dependencies\n" + + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" +} + +// +// Generate workflow version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + Yaml yaml = new Yaml() + versions = yaml.load(yaml_file).collectEntries { k, v -> [ k.tokenize(':')[-1], v ] } + return yaml.dumpAsMap(versions).trim() +} + +// +// Get workflow version for pipeline +// +def workflowVersionToYAML() { + return """ + Workflow: + $workflow.manifest.name: ${getWorkflowVersion()} + Nextflow: $workflow.nextflow.version + """.stripIndent().trim() +} + +// +// Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions + .unique() + .map { processVersionsFromYAML(it) } + .unique() + .mix(Channel.of(workflowVersionToYAML())) +} + +// +// Get workflow summary for MultiQC +// +def paramsSummaryMultiqc(summary_params) { + def summary_section = '' + for (group in summary_params.keySet()) { + def group_params = summary_params.get(group) // This gets the parameters of that particular group + if (group_params) { + summary_section += "

    $group

    \n" + summary_section += "
    \n" + for (param in group_params.keySet()) { + summary_section += "
    $param
    ${group_params.get(param) ?: 'N/A'}
    \n" + } + summary_section += "
    \n" + } + } + + String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + + return yaml_file_text +} + +// +// nf-core logo +// +def nfCoreLogo(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) +} + +// +// Return dashed line +// +def dashedLine(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" +} + +// +// ANSII colours used for terminal logging +// +def logColours(monochrome_logs=true) { + Map colorcodes = [:] + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes +} + +// +// Attach the multiqc report to email +// +def attachMultiqcReport(multiqc_report) { + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + } + mqc_report = mqc_report[0] + } + } + } catch (all) { + if (multiqc_report) { + log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" + } + } + return mqc_report +} + +// +// Construct and send completion email +// +def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { + + // Set up the e-mail variables + def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + } + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = getWorkflowVersion() + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = attachMultiqcReport(multiqc_report) + + // Check if we are only sending emails on failure + def email_address = email + if (!email && email_on_fail && !workflow.success) { + email_address = email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("${workflow.projectDir}/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("${workflow.projectDir}/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] + def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + Map colors = logColours(monochrome_logs) + if (email_address) { + try { + if (plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + mail_cmd.execute() << email_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + } + } + + // Write summary e-mail HTML to a file + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() +} + +// +// Print pipeline summary on completion +// +def completionSummary(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } +} + +// +// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack +// +def imNotification(summary_params, hook_url) { + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = getWorkflowVersion() + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("${workflow.projectDir}/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml new file mode 100644 index 000000000..d08d24342 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml @@ -0,0 +1,24 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFCORE_PIPELINE" +description: Subworkflow with utility functions specific to the nf-core pipeline template +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - nextflow_cli_args: + type: list + description: | + Nextflow CLI positional arguments +output: + - success: + type: boolean + description: | + Dummy output to indicate success +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test new file mode 100644 index 000000000..1dc317f8f --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -0,0 +1,134 @@ + +nextflow_function { + + name "Test Functions" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Test Function checkConfigProvided") { + + function "checkConfigProvided" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function checkProfileProvided") { + + function "checkProfileProvided" + + when { + function { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function workflowCitation") { + + function "workflowCitation" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function nfCoreLogo") { + + function "nfCoreLogo" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dashedLine") { + + function "dashedLine" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function without logColours") { + + function "logColours" + + when { + function { + """ + input[0] = true + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function with logColours") { + function "logColours" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 000000000..1037232c9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,166 @@ +{ + "Test Function checkProfileProvided": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:03.360873" + }, + "Test Function checkConfigProvided": { + "content": [ + true + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:59.729647" + }, + "Test Function nfCoreLogo": { + "content": [ + "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:10.562934" + }, + "Test Function workflowCitation": { + "content": [ + "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:07.019761" + }, + "Test Function without logColours": { + "content": [ + { + "reset": "", + "bold": "", + "dim": "", + "underlined": "", + "blink": "", + "reverse": "", + "hidden": "", + "black": "", + "red": "", + "green": "", + "yellow": "", + "blue": "", + "purple": "", + "cyan": "", + "white": "", + "bblack": "", + "bred": "", + "bgreen": "", + "byellow": "", + "bblue": "", + "bpurple": "", + "bcyan": "", + "bwhite": "", + "ublack": "", + "ured": "", + "ugreen": "", + "uyellow": "", + "ublue": "", + "upurple": "", + "ucyan": "", + "uwhite": "", + "iblack": "", + "ired": "", + "igreen": "", + "iyellow": "", + "iblue": "", + "ipurple": "", + "icyan": "", + "iwhite": "", + "biblack": "", + "bired": "", + "bigreen": "", + "biyellow": "", + "biblue": "", + "bipurple": "", + "bicyan": "", + "biwhite": "" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:17.969323" + }, + "Test Function dashedLine": { + "content": [ + "-\u001b[2m----------------------------------------------------\u001b[0m-" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:14.366181" + }, + "Test Function with logColours": { + "content": [ + { + "reset": "\u001b[0m", + "bold": "\u001b[1m", + "dim": "\u001b[2m", + "underlined": "\u001b[4m", + "blink": "\u001b[5m", + "reverse": "\u001b[7m", + "hidden": "\u001b[8m", + "black": "\u001b[0;30m", + "red": "\u001b[0;31m", + "green": "\u001b[0;32m", + "yellow": "\u001b[0;33m", + "blue": "\u001b[0;34m", + "purple": "\u001b[0;35m", + "cyan": "\u001b[0;36m", + "white": "\u001b[0;37m", + "bblack": "\u001b[1;30m", + "bred": "\u001b[1;31m", + "bgreen": "\u001b[1;32m", + "byellow": "\u001b[1;33m", + "bblue": "\u001b[1;34m", + "bpurple": "\u001b[1;35m", + "bcyan": "\u001b[1;36m", + "bwhite": "\u001b[1;37m", + "ublack": "\u001b[4;30m", + "ured": "\u001b[4;31m", + "ugreen": "\u001b[4;32m", + "uyellow": "\u001b[4;33m", + "ublue": "\u001b[4;34m", + "upurple": "\u001b[4;35m", + "ucyan": "\u001b[4;36m", + "uwhite": "\u001b[4;37m", + "iblack": "\u001b[0;90m", + "ired": "\u001b[0;91m", + "igreen": "\u001b[0;92m", + "iyellow": "\u001b[0;93m", + "iblue": "\u001b[0;94m", + "ipurple": "\u001b[0;95m", + "icyan": "\u001b[0;96m", + "iwhite": "\u001b[0;97m", + "biblack": "\u001b[1;90m", + "bired": "\u001b[1;91m", + "bigreen": "\u001b[1;92m", + "biyellow": "\u001b[1;93m", + "biblue": "\u001b[1;94m", + "bipurple": "\u001b[1;95m", + "bicyan": "\u001b[1;96m", + "biwhite": "\u001b[1;97m" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:21.714424" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test new file mode 100644 index 000000000..8940d32d1 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,29 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFCORE_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + workflow "UTILS_NFCORE_PIPELINE" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Should run without failures") { + + when { + workflow { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap new file mode 100644 index 000000000..859d1030f --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap @@ -0,0 +1,19 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + true + ], + "valid_config": [ + true + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:25.726491" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config new file mode 100644 index 000000000..d0a926bf6 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml new file mode 100644 index 000000000..ac8523c9a --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfcore_pipeline: + - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf new file mode 100644 index 000000000..2585b65d1 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf @@ -0,0 +1,62 @@ +// +// Subworkflow that uses the nf-validation plugin to render help text and parameter summary +// + +/* +======================================================================================== + IMPORT NF-VALIDATION PLUGIN +======================================================================================== +*/ + +include { paramsHelp } from 'plugin/nf-validation' +include { paramsSummaryLog } from 'plugin/nf-validation' +include { validateParameters } from 'plugin/nf-validation' + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFVALIDATION_PLUGIN { + + take: + print_help // boolean: print help + workflow_command // string: default commmand used to run pipeline + pre_help_text // string: string to be printed before help text and summary log + post_help_text // string: string to be printed after help text and summary log + validate_params // boolean: validate parameters + schema_filename // path: JSON schema file, null to use default value + + main: + + log.debug "Using schema file: ${schema_filename}" + + // Default values for strings + pre_help_text = pre_help_text ?: '' + post_help_text = post_help_text ?: '' + workflow_command = workflow_command ?: '' + + // + // Print help message if needed + // + if (print_help) { + log.info pre_help_text + paramsHelp(workflow_command, parameters_schema: schema_filename) + post_help_text + System.exit(0) + } + + // + // Print parameter summary to stdout + // + log.info pre_help_text + paramsSummaryLog(workflow, parameters_schema: schema_filename) + post_help_text + + // + // Validate parameters relative to the parameter JSON schema + // + if (validate_params){ + validateParameters(parameters_schema: schema_filename) + } + + emit: + dummy_emit = true +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml new file mode 100644 index 000000000..3d4a6b04f --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml @@ -0,0 +1,44 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFVALIDATION_PLUGIN" +description: Use nf-validation to initiate and validate a pipeline +keywords: + - utility + - pipeline + - initialise + - validation +components: [] +input: + - print_help: + type: boolean + description: | + Print help message and exit + - workflow_command: + type: string + description: | + The command to run the workflow e.g. "nextflow run main.nf" + - pre_help_text: + type: string + description: | + Text to print before the help message + - post_help_text: + type: string + description: | + Text to print after the help message + - validate_params: + type: boolean + description: | + Validate the parameters and error if invalid. + - schema_filename: + type: string + description: | + The filename of the schema to validate against. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test new file mode 100644 index 000000000..5784a33f2 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test @@ -0,0 +1,200 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFVALIDATION_PLUGIN" + script "../main.nf" + workflow "UTILS_NFVALIDATION_PLUGIN" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "plugin/nf-validation" + tag "'plugin/nf-validation'" + tag "utils_nfvalidation_plugin" + tag "subworkflows/utils_nfvalidation_plugin" + + test("Should run nothing") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should run help") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with command") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with extra text") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = "pre-help-text" + post_help_text = "post-help-text" + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('pre-help-text') } }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } }, + { assert workflow.stdout.any { it.contains('post-help-text') } } + ) + } + } + + test("Should validate params") { + + when { + + params { + monochrome_logs = true + test_data = '' + outdir = 1 + } + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = true + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ ERROR: Validation of pipeline parameters failed!') } } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json new file mode 100644 index 000000000..7626c1c93 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json", + "title": ". pipeline parameters", + "description": "", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["outdir"], + "properties": { + "validate_params": { + "type": "boolean", + "description": "Validate parameters?", + "default": true, + "hidden": true + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "test_data_base": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules", + "description": "Base for test data directory", + "hidden": true + }, + "test_data": { + "type": "string", + "description": "Fake test data param", + "hidden": true + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "logo": { + "type": "boolean", + "default": true, + "description": "Display nf-core logo in console output.", + "fa_icon": "fas fa-image", + "hidden": true + }, + "singularity_pull_docker_container": { + "type": "boolean", + "description": "Pull Singularity container from Docker?", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Use monochrome_logs", + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/generic_options" + } + ] +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml new file mode 100644 index 000000000..60b1cfff4 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfvalidation_plugin: + - subworkflows/nf-core/utils_nfvalidation_plugin/** diff --git a/tower.yml b/tower.yml new file mode 100644 index 000000000..787aedfe9 --- /dev/null +++ b/tower.yml @@ -0,0 +1,5 @@ +reports: + multiqc_report.html: + display: "MultiQC HTML report" + samplesheet.csv: + display: "Auto-created samplesheet with collated metadata and FASTQ paths" diff --git a/workflows/circrna.nf b/workflows/circrna.nf deleted file mode 100644 index 7422dc549..000000000 --- a/workflows/circrna.nf +++ /dev/null @@ -1,256 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) - -// Validate input parameters -WorkflowCircrna.initialise(params, log) - -// Check input path parameters to see if they exist -def checkPathParamList = [ params.input, params.multiqc_config ] -for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } - -// Check mandatory parameters -if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } - -// Genome params -params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false -params.gtf = params.genome ? params.genomes[ params.genome ].gtf ?: false : false -params.bwa = params.genome && params.tool.contains('ciriquant') ? params.genomes[ params.genome ].bwa ?: false : false -params.star = params.genome && ( params.tool.contains('circexplorer2') || params.tool.contains('dcc') || params.tool.contains('circrna_finder') ) ? params.genomes[ params.genome ].star ?: false : false -params.bowtie = params.genome && params.tool.contains('mapsplice') ? params.genomes[ params.genome ].bowtie ?: false : false -params.bowtie2 = params.genome && params.tool.contains('find_circ') ? params.genomes[ params.genome ].bowtie2 ?: false : false -params.mature = params.genome && params.module.contains('mirna_prediction') ? params.genomes[ params.genome ].mature ?: false : false -params.species = params.genome ? params.genomes[ params.genome ].species_id ?: false : false - -ch_phenotype = params.phenotype && params.module.contains('differential_expression') ? file(params.phenotype, checkIfExists:true) : Channel.empty() -ch_fasta = params.fasta ? file(params.fasta) : 'null' -ch_gtf = params.gtf ? file(params.gtf) : 'null' -ch_mature = params.mature && params.module.contains('mirna_prediction') ? file(params.mature) : Channel.empty() -ch_species = params.genome ? Channel.value(params.species) : Channel.value(params.species) - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - CONFIG FILES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() -ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() -ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT LOCAL MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// MODULES: - -// SUBWORKFLOWS: -include { INPUT_CHECK } from '../subworkflows/local/input_check' -include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' -include { CIRCRNA_DISCOVERY } from '../subworkflows/local/circrna_discovery' -include { MIRNA_PREDICTION } from '../subworkflows/local/mirna_prediction' -include { DIFFERENTIAL_EXPRESSION } from '../subworkflows/local/differential_expression' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT NF-CORE MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// MODULES: -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' -include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' - -// SUBWORKFLOWS: -include { FASTQC_TRIMGALORE } from '../subworkflows/nf-core/fastqc_trimgalore' -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN MAIN WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// Info required for completion email and summary -def multiqc_report = [] - -workflow CIRCRNA { - - ch_versions = Channel.empty() - - // - // 1. Pre-processing - // - - // SUBWORKFLOW: - // Validate input samplesheet & phenotype file - INPUT_CHECK ( - ch_input, - ch_phenotype - ) - .reads - .map { - meta, fastq -> - meta.id = meta.id.split('_')[0..-2].join('_') - [ meta, fastq ] } - .dump(tag: 'map') - .groupTuple(by: [0]) - .dump(tag: 'group') - .branch { - meta, fastq -> - single : fastq.size() == 1 - return [ meta, fastq.flatten() ] - multiple: fastq.size() > 1 - return [ meta, fastq.flatten() ] - } - .set { ch_fastq } - ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) - - // MODULE: - // Concatenate FastQ files from same sample if required - CAT_FASTQ ( - ch_fastq.multiple - ) - .reads - .mix(ch_fastq.single) - .set { ch_cat_fastq } - ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first().ifEmpty(null)) - - // SUBORKFLOW: - // Prepare index files &/or use iGenomes if chosen. - PREPARE_GENOME ( - ch_fasta, - ch_gtf - ) - - // Stage the indices via newly created indices, iGenomes or empty list if tool not selected. - bowtie_index = params.fasta ? params.bowtie ? Channel.fromPath(params.bowtie) : PREPARE_GENOME.out.bowtie : [] - bowtie2_index = params.fasta ? params.bowtie2 ? Channel.fromPath(params.bowtie2) : PREPARE_GENOME.out.bowtie2 : [] - bwa_index = params.fasta ? params.bwa ? Channel.fromPath(params.bwa) : PREPARE_GENOME.out.bwa : [] - chromosomes = ( params.tool.contains('mapsplice') || params.tool.contains('find_circ') ) ? PREPARE_GENOME.out.chromosomes : [] - hisat2_index = ( params.tool.contains('ciriquant') || params.module.contains('differential_expression') ) ? PREPARE_GENOME.out.hisat2 : [] - star_index = params.fasta ? params.star ? Channel.fromPath(params.star) : PREPARE_GENOME.out.star : [] - segemehl_index = params.fasta ? params.segemehl ? Channel.fromPath(params.segemehl) : PREPARE_GENOME.out.segemehl : [] - ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) - - // MODULE: Run FastQC, trimgalore! - FASTQC_TRIMGALORE ( - ch_cat_fastq, - params.skip_fastqc, - params.skip_trimming - ) - ch_versions = ch_versions.mix(FASTQC_TRIMGALORE.out.versions) - reads_for_circrna = FASTQC_TRIMGALORE.out.reads - reads_for_diff_exp = FASTQC_TRIMGALORE.out.reads - - // - // 2. circRNA Discovery - // - - CIRCRNA_DISCOVERY( - reads_for_circrna, - ch_fasta, - ch_gtf, - bowtie_index, - bowtie2_index, - bwa_index, - chromosomes, - hisat2_index, - segemehl_index, - star_index, - params.bsj_reads, - params.tool_filter, - params.duplicates_fun, - params.exon_boundary - ) - - ch_versions = ch_versions.mix(CIRCRNA_DISCOVERY.out.versions) - - // - // 3. miRNA prediction - // - - MIRNA_PREDICTION( - CIRCRNA_DISCOVERY.out.fasta, - CIRCRNA_DISCOVERY.out.circrna_bed12, - ch_mature - ) - - ch_versions = ch_versions.mix(MIRNA_PREDICTION.out.versions) - - // - // 4. Differential expression tests - // - - ch_ensembl_database_map = params.module.contains('differential_expression') ? Channel.fromPath("${projectDir}/bin/ensembl_database_map.txt") : Channel.empty() - - DIFFERENTIAL_EXPRESSION( - reads_for_diff_exp, - ch_gtf, - ch_fasta, - hisat2_index, - PREPARE_GENOME.out.splice_sites, - ch_phenotype, - CIRCRNA_DISCOVERY.out.dea_matrix, - CIRCRNA_DISCOVERY.out.clr_matrix, - ch_species, - ch_ensembl_database_map, - params.exon_boundary - ) - - ch_versions = ch_versions.mix(DIFFERENTIAL_EXPRESSION.out.versions) - - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique{ it.text }.collectFile(name: 'collated_versions.yml') - ) - - // MODULE: MultiQC - workflow_summary = WorkflowCircrna.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) - - methods_description = WorkflowCircrna.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description) - ch_methods_description = Channel.value(methods_description) - - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC_TRIMGALORE.out.fastqc_zip.collect{it[1]}.ifEmpty([])) - - MULTIQC ( - ch_multiqc_files.collect(), - ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList() - ) - multiqc_report = MULTIQC.out.report.toList() -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COMPLETION EMAIL AND SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) - } - NfcoreTemplate.summary(workflow, params, log) - if (params.hook_url) { - NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) - } -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ diff --git a/workflows/circrna/main.nf b/workflows/circrna/main.nf new file mode 100644 index 000000000..048b4da34 --- /dev/null +++ b/workflows/circrna/main.nf @@ -0,0 +1,235 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + CONFIG FILES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() +ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() +ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// SUBWORKFLOWS: +include { paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryMultiqc } from '../../subworkflows/nf-core/utils_nfcore_pipeline' +include { validateInputSamplesheet } from '../../subworkflows/local/utils_nfcore_circrna_pipeline' + +include { softwareVersionsToYAML } from '../../subworkflows/nf-core/utils_nfcore_pipeline' +include { PREPARE_GENOME } from '../../subworkflows/local/prepare_genome' +include { BSJ_DETECTION } from '../../subworkflows/local/bsj_detection' +include { ANNOTATION } from '../../subworkflows/local/annotation' +include { QUANTIFICATION } from '../../subworkflows/local/quantification' +include { MIRNA_PREDICTION } from '../../subworkflows/local/mirna_prediction' +include { STATISTICAL_TESTS } from '../../subworkflows/local/statistical_tests' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT NF-CORE MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// MODULES: +include { MULTIQC } from '../../modules/nf-core/multiqc/main' +include { CAT_FASTQ } from '../../modules/nf-core/cat/fastq/main' + +// SUBWORKFLOWS: +include { FASTQC_TRIMGALORE } from '../../subworkflows/nf-core/fastqc_trimgalore' +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow CIRCRNA { + take: + ch_samplesheet + ch_phenotype + ch_fasta + ch_gtf + ch_mature + ch_annotation + ch_versions + ch_mirna + + main: + + ch_multiqc_files = Channel.empty() + + // + // 1. Pre-processing + // + + // SUBWORKFLOW: + ch_samplesheet + .map { + meta, fastq_1, fastq_2 -> + if (!fastq_2) { + return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] + } else { + return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] + } + } + .groupTuple() + .map { + validateInputSamplesheet(it) + } + .map { + meta, fastqs -> + return [ meta, fastqs.flatten() ] + } + .branch { + meta, fastqs -> + single : fastqs.size() == 1 + return [ meta, fastqs ] + multiple: fastqs.size() > 1 + return [ meta, fastqs ] + } + .set { ch_fastq } + + // MODULE: + // Concatenate FastQ files from same sample if required + CAT_FASTQ (ch_fastq.multiple) + .reads + .mix(ch_fastq.single) + .set { ch_cat_fastq } + ch_versions = ch_versions.mix(CAT_FASTQ.out.versions) + + // SUBORKFLOW: + // Prepare index files &/or use iGenomes if chosen. + PREPARE_GENOME ( + ch_fasta, + ch_gtf + ) + + ch_gtf = PREPARE_GENOME.out.gtf + bowtie_index = PREPARE_GENOME.out.bowtie + bowtie2_index = PREPARE_GENOME.out.bowtie2 + bwa_index = PREPARE_GENOME.out.bwa + chromosomes = PREPARE_GENOME.out.chromosomes + hisat2_index = PREPARE_GENOME.out.hisat2 + star_index = PREPARE_GENOME.out.star + ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) + + // MODULE: Run FastQC, trimgalore! + FASTQC_TRIMGALORE ( + ch_cat_fastq, + params.skip_fastqc, + params.skip_trimming + ) + ch_versions = ch_versions.mix(FASTQC_TRIMGALORE.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC_TRIMGALORE.out.trim_zip.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC_TRIMGALORE.out.trim_log.collect{it[1]}.ifEmpty([])) + + // + // 2. BSJ Discovery + // + + BSJ_DETECTION( + FASTQC_TRIMGALORE.out.reads, + ch_fasta, + ch_gtf, + ch_annotation, + bowtie_index, + bowtie2_index, + bwa_index, + chromosomes, + hisat2_index, + star_index, + params.bsj_reads, + params.exon_boundary + ) + + ch_multiqc_files = ch_multiqc_files.mix(BSJ_DETECTION.out.multiqc_files) + ch_versions = ch_versions.mix(BSJ_DETECTION.out.versions) + + // + // 3. circRNA quantification + // + + QUANTIFICATION( + ch_gtf, + ch_fasta, + FASTQC_TRIMGALORE.out.reads, + BSJ_DETECTION.out.bed12, + BSJ_DETECTION.out.gtf, + params.bootstrap_samples, + ch_phenotype, + PREPARE_GENOME.out.faidx + ) + + ch_versions = ch_versions.mix(QUANTIFICATION.out.versions) + + // + // 4. miRNA prediction + // + + if (params.mature) { + MIRNA_PREDICTION( + QUANTIFICATION.out.transcriptome, + BSJ_DETECTION.out.bed12, + ch_mature, + ch_mirna, + QUANTIFICATION.out.circular_tx_counts, + QUANTIFICATION.out.rds + ) + ch_versions = ch_versions.mix(MIRNA_PREDICTION.out.versions) + } + + // + // 5. Statistical tests + // + + STATISTICAL_TESTS( + QUANTIFICATION.out.se, + QUANTIFICATION.out.gene_counts, + QUANTIFICATION.out.circular_tx_counts, + ch_phenotype + ) + + ch_versions = ch_versions.mix(STATISTICAL_TESTS.out.versions) + + + // + // Collate and save software versions + // + softwareVersionsToYAML(ch_versions) + .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_pipeline_software_mqc_versions.yml', sort: true, newLine: true) + .set { ch_collated_versions } + + // MultiQC + ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo) : Channel.empty() + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList() + ) + + emit: + multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/
    Process Name \\", + " \\ Software Version
    CUSTOM_DUMPSOFTWAREVERSIONSpython3.11.7
    yaml5.4.1
    TOOL1tool10.11.9
    TOOL2tool21.9
    WorkflowNextflow
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls