Skip to content

Commit

Permalink
Initial version of multiple grid engine support added
Browse files Browse the repository at this point in the history
  • Loading branch information
skoren committed Nov 25, 2015
1 parent 63945b1 commit 3f34d3c
Show file tree
Hide file tree
Showing 2 changed files with 234 additions and 42 deletions.
249 changes: 209 additions & 40 deletions src/pipelines/canu/Defaults.pm
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ package canu::Defaults;
require Exporter;

@ISA = qw(Exporter);
@EXPORT = qw(getCommandLineOptions addCommandLineOption writeLog caExit caFailure getNumberOfCPUs getPhysicalMemorySize getAllowedResources diskSpace printHelp setParametersFromFile setParametersFromCommandLine checkParameters getGlobal setGlobal showErrorRates setErrorRate setDefaults);
@EXPORT = qw(getCommandLineOptions addCommandLineOption writeLog caExit caFailure getNumberOfCPUs getPhysicalMemorySize getAllowedResources formatAllowedResources diskSpace printHelp setParametersFromFile setParametersFromCommandLine checkParameters getGlobal setGlobal showErrorRates setErrorRate setDefaults);

use strict;
use Carp qw(cluck);
Expand Down Expand Up @@ -339,6 +339,35 @@ sub expandRange ($$) {
return(@r);
}

sub formatAllowedResources(\%) {
my $hosts_ref = shift @_;
my %hosts = %$hosts_ref;
my $hosts = "";

#
# Process all the memory/thread settings to make them compatible with our
# hardware. These wanted to be closer to their use (like right before the various
# scripts are written), but it needs to be done everytime canu starts, otherwise,
# the canu invocation that writes scripts get the fixes, and the canu invocation
# that runs the scripts does not.
#

print STDERR "-- \n";

foreach my $c (keys %hosts) {
my ($cpus, $mem) = split '-', $c;
my $nodes = $hosts{$c};

printf(STDERR "-- Found %3d host%s with %3d core%s and %4d GB memory under Sun Grid Engine control.\n",
$nodes, ($nodes == 1) ? " " : "s",
$cpus, ($cpus == 1) ? " " : "s",
$mem);

$hosts .= "\0" if (defined($hosts));
$hosts .= "$cpus-$mem-$nodes";
}
return $hosts;
}

# Side effect! This will RESET the $global{} parameters to the computed value. This lets
# the rest of canu - in particular, the part that runs the jobs - use the correct value. Without
Expand Down Expand Up @@ -1182,42 +1211,62 @@ sub checkParameters ($) {
$hosts{"$cpus-$mem"}++;
}
close(F);
setGlobal("availableHosts", formatAllowedResources(%hosts));
}

print STDERR "--\n";
if (uc(getGlobal("gridEngine")) eq "PBS") {
setGlobalIfUndef("gridEngineSubmitCommand", "qsub");
setGlobalIfUndef("gridEngineHoldOption", "-W depend=afteranyarray:\"WAIT_TAG\"");
setGlobalIfUndef("gridEngineHoldOptionNoArray", "-W depend=afterany:\"WAIT_TAG\"");
setGlobalIfUndef("gridEngineSyncOption", "");
setGlobalIfUndef("gridEngineNameOption", "-d `pwd` -N");
setGlobalIfUndef("gridEngineArrayOption", "-t ARRAY_JOBS");
setGlobalIfUndef("gridEngineArrayName", "ARRAY_NAME\[ARRAY_JOBS\]");
setGlobalIfUndef("gridEngineOutputOption", "-j oe -o");
setGlobalIfUndef("gridEngineThreadsOption", "-l nodes=1:ppn=THREAD");
setGlobalIfUndef("gridEngineMemoryOption", "-l mem=MEMORY");
setGlobalIfUndef("gridEnginePropagateCommand", "qalter -W depend=afterany:\"WAIT_TAG\"");
setGlobalIfUndef("gridEngineNameToJobIDCommand", "qstat -f |grep -F -B 1 WAIT_TAG | grep Id: | grep -F [] |awk '{print \$NF}'");
setGlobalifUndef("gridEngineNameToJobIDCommandNoArray", "qstat -f |grep -F -B 1 WAIT_TAG | grep Id: |awk '{print \$NF}'");
setGlobalIfUndef("gridEngineTaskID", "\$PBS_ARRAYID");
setGlobalIfUndef("gridEngineArraySubmitID", "\\\$PBS_ARRAYID");
setGlobalIfUndef("gridEngineJobID", "PBS_JOBID");

foreach my $c (keys %hosts) {
my ($cpus, $mem) = split '-', $c;
my $nodes = $hosts{$c};
# Build a list of the resources available in the grid. This will contain a list with keys
# of "#CPUs-#GBs" and values of the number of nodes With such a config. Later on, we'll use this
# to figure out what specific settings to use for each algorithm.
#
# The list is saved in global{"availableHosts"}
#
# !!! UNTESTED !!!
#
my %hosts;

printf(STDERR "-- Found %3d host%s with %3d core%s and %4d GB memory under Sun Grid Engine control.\n",
$nodes, ($nodes == 1) ? " " : "s",
$cpus, ($cpus == 1) ? " " : "s",
$mem);
open(F, "pbsnodes |");

$hosts .= "\0" if (defined($hosts));
$hosts .= "$cpus-$mem-$nodes";
while (<F>) {
my $cpus = 0;
my $mem = 0;
if ($_ =~ m/status/) {
my @stats = split ',', $_;
for my $stat (@stats) {
if ($stat =~ m/physmem/) {
$mem = ( split '=', $stat )[-1];
} elsif ($stat =~ m/ncpus/) {
$cpus = int(( split '=', $stat )[-1]);
}
}
$mem = $1 * 1024 if ($mem =~ m/(\d+.*\d+)[tT]/);
$mem = $1 * 1 if ($mem =~ m/(\d+.*\d+)[gG]/);
$mem = $1 / 1024 if ($mem =~ m/(\d+.*\d+)[mM]/);
$mem = $1 / 1024 / 1024 if ($mem =~ m/(\d+.*\d+)[kK]/);
$mem = int($mem);
$hosts{"$cpus-$mem"}++;
}
}
close(F);

setGlobal("availableHosts", $hosts);
}

if (uc(getGlobal("gridEngine")) eq "PBS") {
setGlobalIfUndef("gridEngineSubmitCommand", "qsub");
setGlobalIfUndef("gridEngineHoldOption", "-W depend=afterany:\"WAIT_TAG\"");
setGlobalIfUndef("gridEngineHoldOptionNoArray", undef);
setGlobalIfUndef("gridEngineSyncOption", "");
setGlobalIfUndef("gridEngineNameOption", "-d `pwd` -N");
setGlobalIfUndef("gridEngineArrayOption", "-t ARRAY_JOBS");
setGlobalIfUndef("gridEngineArrayName", "ARRAY_NAME\[ARRAY_JOBS\]");
setGlobalIfUndef("gridEngineOutputOption", "-j oe -o");
setGlobalIfUndef("gridEnginePropagateCommand", "qalter -W depend=afterany:\"WAIT_TAG\"");
setGlobalIfUndef("gridEngineThreadsOption", undef);
setGlobalIfUndef("gridEngineMemoryOption", undef);
setGlobalIfUndef("gridEngineNameToJobIDCommand", undef);
setGlobalIfUndef("gridEngineNameToJobIDCommandNoArray", undef);
setGlobalIfUndef("gridEngineTaskID", "\$PBS_TASKNUM");
setGlobalIfUndef("gridEngineArraySubmitID", "\\\$PBS_TASKNUM");
setGlobalIfUndef("gridEngineJobID", "PBS_JOBID");
setGlobal("availableHosts", formatAllowedResources(%hosts));
}

if (uc(getGlobal("gridEngine")) eq "LSF") {
Expand All @@ -1229,14 +1278,120 @@ sub checkParameters ($) {
setGlobalIfUndef("gridEngineArrayOption", "");
setGlobalIfUndef("gridEngineArrayName", "ARRAY_NAME\[ARRAY_JOBS\]");
setGlobalIfUndef("gridEngineOutputOption", "-o");
setGlobalIfUndef("gridEnginePropagateCommand", "bmodify -w \"done\(\"WAIT_TAG\"\)\"");
setGlobalIfUndef("gridEngineThreadsOption", undef);
setGlobalIfUndef("gridEngineMemoryOption", undef);
setGlobalIfUndef("gridEnginePropagateCommand", "bmodify -w \"done\(\"WAIT_TAG\"\)\"");
setGlobalIfUndef("gridEngineNameToJobIDCommand", "bjobs -A -J \"WAIT_TAG\" | grep -v JOBID");
setGlobalIfUndef("gridEngineNameToJobIDCommandNoArray", "bjobs -J \"WAIT_TAG\" | grep -v JOBID");
setGlobalIfUndef("gridEngineTaskID", "\$LSB_JOBINDEX");
setGlobalIfUndef("gridEngineArraySubmitID", "%I");
setGlobalIfUndef("gridEngineJobID", "LSB_JOBID");

# Build a list of the resources available in the grid. This will contain a list with keys
# of "#CPUs-#GBs" and values of the number of nodes With such a config. Later on, we'll use this
# to figure out what specific settings to use for each algorithm.
#
# The list is saved in global{"availableHosts"}
#
# !!! UNTESTED !!
#
my %hosts;

open(F, "lshosts |");

my $h = <F>; # header

my @h = split '\s+', $h;

my $cpuIdx = 4;
my $memIdx = 5;

for (my $ii=0; ($ii < scalar(@h)); $ii++) {
$cpuIdx = $ii if ($h[$ii] eq "ncpus");
$memIdx = $ii if ($h[$ii] eq "maxmem");
}

while (<F>) {
my @v = split '\s+', $_;

my $cpus = $v[$cpuIdx];
my $mem = $v[$memIdx];

$mem = $1 * 1024 if ($mem =~ m/(\d+.*\d+)[tT]/);
$mem = $1 * 1 if ($mem =~ m/(\d+.*\d+)[gG]/);
$mem = $1 / 1024 if ($mem =~ m/(\d+.*\d+)[mM]/);
$mem = int($mem);

$hosts{"$cpus-$mem"}++;
}
close(F);
setGlobal("availableHosts", formatAllowedResources(%hosts));
}

if (uc(getGlobal("gridEngine")) eq "SLURM") {
setGlobalIfUndef("gridEngineSubmitCommand", "sbatch");
setGlobalIfUndef("gridEngineHoldOption", "--depend=afterany:\"WAIT_TAG\"");
setGlobalIfUndef("gridEngineHoldOptionNoArray", "--depend=afterany:\"WAIT_TAG\"");
setGlobalIfUndef("gridEngineSyncOption", ""); ## TODO: SLURM may not support w/out wrapper; See LSF bsub manpage to compare
setGlobalIfUndef("gridEngineNameOption", "-D `pwd` -J");
setGlobalIfUndef("gridEngineArrayOption", "-a ARRAY_JOBS");
setGlobalIfUndef("gridEngineArrayName", "ARRAY_NAME\[ARRAY_JOBS\]");
setGlobalIfUndef("gridEngineOutputOption", "-o"); ## NB: SLURM default joins STDERR & STDOUT if no -e specified
setGlobalIfUndef("gridEngineThreadsOption", "--cpus-per-task=THREADS");
setGlobalIfUndef("gridEngineMemoryOption", "--mem=MEMORY");
setGlobalIfUndef("gridEnginePropagateCommand", "scontrol update job=\"WAIT_TAG\""); ## TODO: manually verify this in all cases
setGlobalIfUndef("gridEngineNameToJobIDCommand", "squeue -h -o\%F -n \"WAIT_TAG\" | uniq"); ## TODO: manually verify this in all cases
setGlobalIfUndef("gridEngineNameToJobIDCommandNoArray", "squeue -h -o\%i -n \"WAIT_TAG\""); ## TODO: manually verify this in all cases
setGlobalIfUndef("gridEngineTaskID", "\$SLURM_ARRAY_TASK_ID");
setGlobalIfUndef("gridEngineArraySubmitID", "%A_%a");
setGlobalIfUndef("gridEngineJobID", "SLURM_JOB_ID");


# Build a list of the resources available in the grid. This will contain a list with keys
# of "#CPUs-#GBs" and values of the number of nodes With such a config. Later on, we'll use this
# to figure out what specific settings to use for each algorithm.
#
# The list is saved in global{"availableHosts"}
#
my %hosts;

open(F, "sinfo --Node --long |");

my $b = <F>; # date/time
my $h = <F>; # header

my @h = split '\s+', $h;

my $nodeIdx = 1;
my $cpuIdx = 4;
my $memIdx = 6;

for (my $ii=0; ($ii < scalar(@h)); $ii++) {
$nodeIdx = $ii if ($h[$ii] eq "NODES");
$cpuIdx = $ii if ($h[$ii] eq "CPUS");
$memIdx = $ii if ($h[$ii] eq "MEMORY");
}

while (<F>) {
my @v = split '\s+', $_;

my $cpus = $v[$cpuIdx];
my $mem = $v[$memIdx];
my $nodes = $v[$nodeIdx];

if ($mem =~ m/(\d+.*\d+)[tT]/) {
$mem = $1 * 1024;
} elsif ($mem =~ m/(\d+.*\d+)[gG]/) {
$mem = $1 * 1;
} else {
$mem /= 1024;
}
$mem = int($mem);

$hosts{"$cpus-$mem"}+=int($nodes);
}
close(F);
setGlobal("availableHosts", formatAllowedResources(%hosts));
}

#
Expand Down Expand Up @@ -1394,14 +1549,6 @@ sub checkParameters ($) {
}
}

#
# Process all the memory/thread settings to make them compatible with our
# hardware. These wanted to be closer to their use (like right before the various
# scripts are written), but it needs to be done everytime canu starts, otherwise,
# the canu invocation that writes scripts get the fixes, and the canu invocation
# that runs the scripts does not.
#

my $err;
my $all;

Expand Down Expand Up @@ -1673,10 +1820,32 @@ sub setDefaults () {
# submitScript() and submitOrRunParallelJob() will return without submitting, or run locally
# (respectively). This means that we can just trivially change the defaults for useGrid and
# useGridMaster to 'enabled' and it'll do the right thing when SGE isn't present.
#
my $pbs = `which pbsnodes 2> /dev/null`;
chomp $pbs;
my $slurm = `which sbatch 2> /dev/null`;
chomp $slurm;
my $lsf = `which bsub 2>/dev/null`;
chomp $lsf;

if (defined($ENV{'SGE_ROOT'})) {
print STDERR "-- Detected Sun Grid Engine in '$ENV{'SGE_ROOT'}/$ENV{'SGE_CELL'}'.\n";
$global{"gridEngine"} = "SGE";
} elsif (defined($slurm) && $slurm ne "") {
$slurm = `dirname $slurm`;
chomp $slurm;
print STDERR "-- Detected Slurm Engine in $slurm.\n";
$global{"gridEngine"} = "SLURM";
} elsif (defined($pbs) && $pbs ne "") {
$pbs = `dirname $pbs`;
chomp $pbs;
print STDERR "-- Detected PBS/Torque Engine in $pbs.\n";
$global{"gridEngine"} = "PBS";
} elsif (defined($lsf) && $lsf ne "") {
$lsf= `dirname $lsf`;
chomp $lsf;
print STDERR "-- Detected Slurm Engine in $lsf.\n";
$global{"gridEngine"} = "LSF";
} else {
print STDERR "-- No grid engine detected, grid disabled.\n";
}
Expand Down
27 changes: 25 additions & 2 deletions src/pipelines/canu/Execution.pm
Original file line number Diff line number Diff line change
Expand Up @@ -792,10 +792,33 @@ sub submitScript ($$$) {
# docs online (for bsub.1) claim that we can still use jobToWaitOn.

if (defined($jobToWaitOn)) {
(my $hold = getGlobal("gridEngineHoldOption")) =~ s/WAIT_TAG/$jobToWaitOn/;
my $hold = getGlobal("gridEngineHoldOption");

# most grid engines don't understand job names to hold on, only IDs
if (uc(getGlobal("gridEngine")) eq "LSF" || uc(getGlobal("gridEngine")) eq "PBS" || uc(getGlobal("gridEngine")) eq "SLURM"){
my $tcmd = getGlobal("gridEngineNameToJobIDCommand");
$tcmd =~ s/WAIT_TAG/$jobToWaitOn/g;
my $propJobCount = `$tcmd |wc -l`;
chomp $propJobCount;
if ($propJobCount == 0) {
$tcmd = getGlobal("gridEngineNameToJobIDCommandNoArray");
$tcmd =~ s/WAIT_TAG/$jobToWaitOn/g;
$hold = getGlobal("gridEngineHoldOptionNoArray");
$propJobCount = `$tcmd |wc -l`;
}
if ($propJobCount != 1) {
print STDERR "Warning: multiple IDs for job $jobToWaitOn got $propJobCount and should have been 1.\n";
}
my $jobID = `$tcmd |tail -n 1 |awk '{print \$1}'`;
chomp $jobID;
$hold =~ s/WAIT_TAG/$jobID/g;
} else {
$hold =~ s/WAIT_TAG/$jobToWaitOn/;
}
$gridOpts .= " " . $hold;
}


my $submitCommand = getGlobal("gridEngineSubmitCommand");
my $nameOption = getGlobal("gridEngineNameOption");
my $outputOption = getGlobal("gridEngineOutputOption");
Expand Down Expand Up @@ -848,7 +871,7 @@ sub buildMemoryOption ($$) {
my $t = shift @_;
my $r;

if (getGlobal("gridEngine") eq "SGE") {
if (getGlobal("gridEngine") eq "SGE" || getGlobal("gridEngine") eq "LSF") {
$m /= $t;
}

Expand Down

0 comments on commit 3f34d3c

Please sign in to comment.