Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Private ips setup #2

Draft
wants to merge 37 commits into
base: stage-cloud-storage-files
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
43d8cfb
Update nextflow.config
cgpu Mar 10, 2021
d207346
Rename s3Location to be cloud agnostic
cgpu Mar 10, 2021
1947089
Adds google.config
cgpu Mar 11, 2021
016346b
Update nextflow.config
cgpu Mar 11, 2021
41d0089
Adds gls info in main.nf
cgpu Mar 11, 2021
c76c322
Update google.config
cgpu Mar 11, 2021
0688754
Inits processAMachineType to false
cgpu Mar 11, 2021
0586b93
Removes machineType
cgpu Mar 11, 2021
b522e7b
Adds google.lifeSciences.usePrivateAddress
cgpu Mar 11, 2021
19589e1
Replaces s3Location with cloudStorageLocation
cgpu Mar 11, 2021
41d2be9
Update nextflow.config
cgpu Mar 11, 2021
f0a00e0
Update nextflow.config
cgpu Mar 11, 2021
a186a42
Updates publishDir; Updates s3 -> agnostic
cgpu Mar 12, 2021
a142cb2
Updates branch
cgpu Mar 12, 2021
015de36
Adds option to gls_sshDaemon
cgpu Mar 14, 2021
4cad33c
Absorbs latest changes from stage-cloud-storage-files
cgpu Apr 12, 2021
bea3be8
Adds private ip params in log.info
cgpu Apr 12, 2021
13e1fd0
Adds empty line
cgpu Apr 12, 2021
edc502b
Removes defaultBranch
cgpu Apr 23, 2021
28f6019
Removes log.info "defaultBranch"
cgpu May 12, 2021
13c1cc4
Sets default gls copyImage to gcr.io
cgpu May 12, 2021
1f7bc34
Adds sshImage and copyImage as params
cgpu May 12, 2021
db87f42
Updates container for google cloud config
cgpu May 14, 2021
4479be5
Adds awsbatch.config
cgpu May 20, 2021
fd4e2c0
Parameterises batch options
cgpu May 20, 2021
6f1ab7c
Adds aws batch params in log.info header
cgpu May 20, 2021
8308a1f
Fix param name typo
cgpu May 20, 2021
b9bafa9
Fix params name
cgpu May 21, 2021
38a3873
Update google.config
cgpu Jul 1, 2021
1561fbb
Adds aws_ignite.config in conf/
cgpu Aug 18, 2021
e18bf80
Experimentally add execution vars in print header
cgpu Aug 18, 2021
909119a
Adds defaults for ignite on AWS
cgpu Aug 18, 2021
94ff60a
Adds defaults for ignite on AWS
cgpu Aug 18, 2021
d72c3a2
Adds command-logs folder in results (.command*)
cgpu Aug 24, 2021
161df30
Adds qa.config
cgpu Sep 28, 2021
e66ab54
Update qa.config
cgpu Sep 28, 2021
6cc20ea
Updates regex * -> ** [traverse nested folders]
cgpu Oct 6, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions conf/aws_ignite.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
params {
cloud_autoscale_enabled = true
cloud_autoscale_max_instances = 2
}

docker.enabled = true

cloud {
autoscale {
enabled = params.cloud_autoscale_enabled
maxInstances = params.cloud_autoscale_max_instances
}
}
20 changes: 20 additions & 0 deletions conf/awsbatch.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
params {
executor = 'awsbatch'
dataLocation = 's3://lifebit-featured-datasets/pipelines/spammer-nf/input_files'
aws_batch_cliPath = '~/miniconda/bin/aws'
aws_batch_fetchInstanceType = true
aws_batch_process_queue = "nf-batch-4"
aws_batch_docker_run_options = "--ulimit nofile=65535:65535 --ulimit nproc=65535 --user 0"
}

docker {
enabled = true
runOptions = params.aws_batch_docker_run_options
}

process {
queue = params.aws_batch_process_queue
}

aws.batch.cliPath = params.aws_batch_cliPath
aws.batch.fetchInstanceType = params.aws_batch_fetchInstanceType
16 changes: 13 additions & 3 deletions conf/google.config
Original file line number Diff line number Diff line change
@@ -1,13 +1,23 @@
params {
executor = 'google-lifesciences'
dataLocation = 'gs://lifebit-featured-datasets/pipelines/spammer-nf/input_files'

// Nextflow default values with correction so that all pulled from gcr.io
gls_copyImage = 'gcr.io/google.com/cloudsdktool/cloud-sdk:alpine'
gls_sshImage = 'gcr.io/cloud-genomics-pipelines/tools'
container = 'gcr.io/nextflow-250616/ubuntu:latest'
}

google {
// both images must be hosted in gcr.io of using private ips (gls_usePrivateAddress true)
lifeSciences.copyImage = params.gls_copyImage
lifeSciences.sshImage = params.gls_sshImage

lifeSciences.bootDiskSize = params.gls_bootDiskSize
lifeSciences.preemptible = params.gls_preemptible
zone = params.zone
network = params.network
subnetwork = params.subnetwork
lifeSciences.network = params.network
lifeSciences.subnetwork = params.subnetwork
lifeSciences.usePrivateAddress = params.gls_usePrivateAddress
google.lifeSciences.sshDaemon = params.gls_sshDaemon
}
cgpu marked this conversation as resolved.
Show resolved Hide resolved

8 changes: 8 additions & 0 deletions conf/qa.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
params {
dataLocation = false
fileSufix = "cram"
repsProcessA = 2
processA_cpus = 4
processATimeRange = "30-120"
errorStrategy = 'terminate'
}
31 changes: 27 additions & 4 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ fileSystem = params.dataLocation.contains(':') ? params.dataLocation.split(':')[
// Header log info
log.info "\nPARAMETERS SUMMARY"
log.info "mainScript : ${params.mainScript}"
log.info "defaultBranch : ${params.defaultBranch}"
log.info "config : ${params.config}"
log.info "fileSystem : ${fileSystem}"
log.info "dataLocation : ${params.dataLocation}"
Expand All @@ -27,16 +26,38 @@ log.info "container : ${params.container}"
log.info "maxForks : ${params.maxForks}"
log.info "queueSize : ${params.queueSize}"
log.info "executor : ${params.executor}"
if(params.executor == 'awsbatch') {
log.info "aws_batch_cliPath : ${params.aws_batch_cliPath}"
log.info "aws_batch_fetchInstanceType : ${params.aws_batch_fetchInstanceType}"
log.info "aws_batch_process_queue : ${params.aws_batch_process_queue}"
log.info "aws_batch_docker_run_options : ${params.aws_batch_docker_run_options}"
}
if(params.config == 'conf/aws_ignite.config') {
log.info "cloud_autoscale_enabled : ${params.cloud_autoscale_enabled}"
log.info "cloud.autoscale.enabled : cloud.autoscale.enabled"
log.info "cloud_autoscale_max_instances : ${params.cloud_autoscale_max_instances}"
log.info "cloud.autoscale.maxInstances : cloud.autoscale.maxInstances "
}
if(params.executor == 'google-lifesciences') {
log.info "gls_bootDiskSize : ${params.gls_bootDiskSize}"
log.info "gls_preemptible : ${params.gls_preemptible}"
log.info "gls_usePrivateAddress : ${params.gls_usePrivateAddress}"
log.info "zone : ${params.zone}"
log.info "network : ${params.network}"
log.info "subnetwork : ${params.subnetwork}"
cgpu marked this conversation as resolved.
Show resolved Hide resolved
log.info "lifeSciences.usePrivateAddress : ${params.gls_usePrivateAddress}"
log.info "google.lifeSciences.sshDaemon : ${params.gls_sshDaemon}"
}
log.info ""

numberRepetitionsForProcessA = params.repsProcessA
numberFilesForProcessA = params.filesProcessA
processAWriteToDiskMb = params.processAWriteToDiskMb
processAInput = Channel.from([1] * numberRepetitionsForProcessA)
processAInputFiles = Channel.fromPath("${params.dataLocation}/*${params.fileSuffix}").take( numberRepetitionsForProcessA )
processAInputFiles = Channel.fromPath("${params.dataLocation}/**${params.fileSuffix}").take( numberRepetitionsForProcessA )

process processA {
publishDir "${params.output}/${task.hash}", mode: 'copy'
publishDir "${params.output}/${task.hash}/", mode: 'copy'
tag "cpus: ${task.cpus}, cloud storage: ${cloud_storage_file}"

input:
Expand All @@ -48,6 +69,7 @@ process processA {
val x into processCInput
val x into processDInput
file "*.txt"
file("command-logs") optional true

script:
"""
Expand All @@ -61,6 +83,8 @@ process processA {
done;
sleep \$timeToWait
echo "task cpus: ${task.cpus}"

${params.savescript}
"""
}

Expand Down Expand Up @@ -102,4 +126,3 @@ process processD {
sleep \$timeToWait
"""
}

21 changes: 17 additions & 4 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@ docker.enabled = true
// NOTE:
// Initialise the values of the params to the preferred default value or to false
params {
processA_memory = '4.GB'
processA_cpus = 1
processA_disk = '20.GB'
mainScript = 'main.nf'
defaultBranch = 'stage-s3-files'
config = 'conf/standard.config'
dataLocation = 's3://lifebit-featured-datasets/pipelines/spammer-nf/input_files'
fileSuffix = ''
Expand All @@ -13,6 +15,7 @@ params {
processATimeRange = "1-2"
filesProcessA = 1
processATimeBetweenFileCreationInSecs = 0
savescript = 'mkdir command-logs; cp .command.* command-logs'

processBTimeRange = "2-3"
processBWriteToDiskMb = 10
Expand All @@ -23,7 +26,6 @@ params {

echo = false
cpus = 1
processA_cpus = 1
errorStrategy = 'ignore'
container = 'quay.io/lifebitai/ubuntu:18.10'
maxForks = 200
Expand All @@ -32,11 +34,21 @@ params {
executor = 'ignite'

// google-lifesciences
gls_usePrivateAddress = false
gls_sshDaemon = false
gls_bootDiskSize = '50.GB'
gls_preemptible = true
zone = 'us-east1-b'
network = 'default'
subnetwork = 'default'

// values defined in conf/google.config
gls_copyImage = false
gls_sshImage = false

// ignite on aws
cloud_autoscale_enabled = false
cloud_autoscale_max_instances = 2
}

// Do not update the order because the values set in params scope will not be overwritten
Expand All @@ -56,7 +68,9 @@ process {
errorStrategy = params.errorStrategy

withName: processA {
disk = params.processA_disk
cpus = params.processA_cpus
memory = params.processA_memory
}
}

Expand All @@ -69,5 +83,4 @@ manifest {
homePage = 'https://github.com/lifebit-ai/spammer-nf'
description = 'A stampede of processes at your fingertips'
mainScript = params.mainScript
defaultBranch = params.defaultBranch
}
}