Skip to content

Commit

Permalink
Remove dependencies on lock file in egs-parallel
Browse files Browse the repository at this point in the history
Don't quit the egs-parallel submit scripts if no lock file is found, and
add a -f (--force) option to override existing egsjob or lock file.

The lock file for parallel jobs is managed inside EGSnrc, so the script
should not manage it as well: this creates an obscure correlation
between the code and the script. Moreover, the uniform run control
method does no create a lock file. Previously, the submit script would
quit if there was no lock file, now it merely reports its content. The
top-level egs-parallel script now prevents the run if there is an
.egsjob file OR a .lock file, for the same reason. This can be
overridden with the added --force option.
  • Loading branch information
ftessier committed Sep 1, 2020
1 parent bd0346b commit f59c2af
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 14 deletions.
24 changes: 20 additions & 4 deletions HEN_HOUSE/scripts/bin/egs-parallel
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ function help {
-q | --queue scheduler queue ("long" by default)
-n | --nthread number of threads ("8" by default)
-o | --option option(s) to pass to job scheduler, in quotes
-f | --force force run, even if lock or egsjob file present
-v | --verbose echo detailed egs-parallel log messages to terminal
-c | --command command to run, given in quotes
Expand Down Expand Up @@ -91,6 +92,7 @@ opt_nthread="8"
opt_delay="0"
opt_command=""
opt_options=""
opt_force="no"
verbosity="silent"
declare -a opt_options_array

Expand All @@ -103,6 +105,7 @@ while [ "$#" -gt 0 ]; do
# options without arguments
case $opt in
-h|--help) help; exit;;
-f|--force) opt_force="yes"; continue;;
-v|--verbose) verbosity="verbose"; continue;;
esac

Expand Down Expand Up @@ -201,10 +204,23 @@ if ! [ -r $egsinp ]; then
quit $LINENO "cannot access input file: $egsinp.egsinp"
fi

### check that there is not currently a .lock file associated with this input file
lock=$egs_home/$cmd_app/$basename.lock
if [ -e $lock ]; then
quit $LINENO "there is already a lock file for $basename: $lock"
### prevent the run if lock file or egsjob file present
if [ "$opt_force" == "no" ]; then

# check lock file
lock=$egs_home/$cmd_app/$basename.lock
if [ -e $lock ]; then
log "existing lock file: $lock"
quit $LINENO "prevent erasing lock file (override with --force)"
fi

# check egsjob file
egsjob=$egs_home/$cmd_app/$basename.egsjob
if [ -e $egsjob ]; then
log "existing egsjob file: $egsjob"
quit $LINENO "prevent erasing egsjob file (override with --force)"
fi

fi

### report command-line options
Expand Down
2 changes: 1 addition & 1 deletion HEN_HOUSE/scripts/bin/egs-parallel-clean
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ for name in "${opt_names[@]}"; do

### clean .egsparallel and .eo log files if -x (--extra) is invoked, otherwise catenate them
if [ "$opt_extra" = "yes" ]; then
clean_extensions="$clean_extensions .egsparallel-log .egsparallel-out"
clean_extensions="$clean_extensions .egsparallel .egsparallel-log .egsparallel-out"
else
for f in ${name}.egsparallel; do
f=${f%.egsparallel}
Expand Down
10 changes: 10 additions & 0 deletions HEN_HOUSE/scripts/egs-parallel-cpu
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,16 @@ if [ $nthread -gt $cpu_nthread ]; then
nthread=$cpu_nthread
fi

### remove existing egsjob and lock files
if [ -e $basename.egsjob ]; then
log "remove existing egsjob file: $basename.egsjob"
/bin/rm $basename.egsjob
fi
if [ -e $basename.lock ]; then
log "remove existing lock file: $basename.lock"
/bin/rm $basename.lock
fi

### loop to launch nthread jobs on cpu
for job in $(seq 1 $nthread); do

Expand Down
3 changes: 0 additions & 3 deletions HEN_HOUSE/scripts/egs-parallel-dshtask
Original file line number Diff line number Diff line change
Expand Up @@ -224,9 +224,6 @@ else
if [ -r $basename.lock ]; then
content=$(cat $basename.lock)
log "$jobstr: found $basename.lock: $content"
else
log "$jobstr: QUIT ($basename.lock does not exist or is not readable)"
exit
fi

fi
Expand Down
13 changes: 10 additions & 3 deletions HEN_HOUSE/scripts/egs-parallel-pbs
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,16 @@ if [ $trim -gt 0 ]; then
fi
log "job name: $jobname"

### remove existing egsjob and lock files
if [ -e $basename.egsjob ]; then
log "remove existing egsjob file: $basename.egsjob"
/bin/rm $basename.egsjob
fi
if [ -e $basename.lock ]; then
log "remove existing lock file: $basename.lock"
/bin/rm $basename.lock
fi

### loop to launch nthread pbs jobs
for job in $(seq 1 $nthread); do

Expand Down Expand Up @@ -201,9 +211,6 @@ for job in $(seq 1 $nthread); do
if [ -r $basename.lock ]; then
content=$(cat $basename.lock)
log "$jobstr: found $basename.lock: $content"
else
log "$jobstr: QUIT ($basename.lock does not exist or is not readable)"
exit
fi
quit_if_done
fi
Expand Down
18 changes: 15 additions & 3 deletions HEN_HOUSE/scripts/egs-parallel-pbsdsh
Original file line number Diff line number Diff line change
Expand Up @@ -116,16 +116,28 @@ if [ $trim -gt 0 ]; then
fi
log "job name: $jobname"

### create pbsdsh directory to store task files for job numbers
### create pbsdsh directory to store task files for job numbers (remove existing directory)
pbsdsh_dir=$basename.pbsdsh
log "create temporary directory $pbsdsh_dir"
if [ -e $pbsdsh_dir ]; then
log "remove existing directory $pbsdsh_dir"
/bin/rm -r $pbsdsh_dir
fi
log "create temporary directory $pbsdsh_dir"
err=$(mkdir $pbsdsh_dir 2>&1)
if ! [ -z $err ]; then
quit $LINENO "$err"
fi

### remove existing egsjob and lock files
if [ -e $basename.egsjob ]; then
log "remove existing egsjob file: $basename.egsjob"
/bin/rm $basename.egsjob
fi
if [ -e $basename.lock ]; then
log "remove existing lock file: $basename.lock"
/bin/rm $basename.lock
fi

### launch pbsdsh tasks
task_script=$HEN_HOUSE/scripts/egs-parallel-dshtask
jobpid=$(qsub -q $queue $scheduler_options <<EOF
Expand All @@ -138,7 +150,7 @@ jobpid=$(qsub -q $queue $scheduler_options <<EOF
pbsdsh $task_script $pbsdsh_dir $basename $nthread $first $delay '$command'
EOF
)
log "SUBMIT $nthread pbsdsh tasks on $jobid"
log "SUBMIT $nthread pbsdsh tasks on $jobpid"
log "pbsdsh task logs will be collated in ${basename}.eo"

### print pid
Expand Down

0 comments on commit f59c2af

Please sign in to comment.