Skip to content

Commit

Permalink
Tweaks to make Canu recognize manually run jobs are complete.
Browse files Browse the repository at this point in the history
  • Loading branch information
brianwalenz committed Sep 18, 2020
1 parent ac0e512 commit 1987868
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 77 deletions.
100 changes: 54 additions & 46 deletions src/pipelines/canu/CorrectReads.pm
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,12 @@ sub buildCorrectionLayoutsConfigure ($) {
my $base = "correction";
my $path = "correction/2-correction";

goto allDone if (-d "$base/$asm.corStore"); # Jobs all finished
goto allDone if (fileExists("$base/$asm.corStore/seqDB.v001.dat"));
goto allDone if (fileExists("$base/$asm.corStore/seqDB.v001.tig"));
goto allDone if (-d "$base/$asm.corStore"); # Jobs all finished
goto allDone if (fileExists("$base/$asm.corStore/seqDB.v001.dat"));
goto allDone if (fileExists("$base/$asm.corStore/seqDB.v001.tig"));

goto finishStage if ((-e "$base/$asm.corStore.WORKING/seqDB.v001.dat") && # Job ran manually, showNext
(-e "$base/$asm.corStore.WORKING/seqDB.v001.tig"));

# The global filter can be estimated from data saved in ovlStore. This code will compute it exactly.
#
Expand Down Expand Up @@ -191,13 +194,14 @@ sub buildCorrectionLayoutsConfigure ($) {
caExit("failed to generate correction layouts", "$base/$asm.corStore.err");
}

rename "$base/$asm.corStore.WORKING", "$base/$asm.corStore";
unlink "$base/$asm.corStore.err";

finishStage:
rename "$base/$asm.corStore.WORKING", "$base/$asm.corStore";

stashFile("$base/$asm.corStore/seqDB.v001.dat");
stashFile("$base/$asm.corStore/seqDB.v001.tig");

finishStage:
generateReport($asm);
resetIteration("cor-buildCorrectionLayoutsConfigure");

Expand Down Expand Up @@ -236,7 +240,10 @@ sub filterCorrectionLayouts ($) {
my $base = "correction";
my $path = "correction/2-correction";

goto allDone if (fileExists("$path/$asm.readsToCorrect")); # Jobs all finished
goto allDone if (fileExists("$path/$asm.readsToCorrect")); # Jobs all finished

goto finishStage if ((-e "$path/$asm.readsToCorrect.WORKING.stats") && # Job ran manually, showNext
(-e "$path/$asm.readsToCorrect.WORKING.log"));

# Analyze the corStore to decide what reads we want to correct.

Expand All @@ -258,6 +265,7 @@ sub filterCorrectionLayouts ($) {
caExit("failed to generate list of reads to correct", "$path/$asm.readsToCorrect.err");
}

finishStage:
rename "$path/$asm.readsToCorrect.WORKING", "$path/$asm.readsToCorrect";
rename "$path/$asm.readsToCorrect.WORKING.stats", "$path/$asm.readsToCorrect.stats";
rename "$path/$asm.readsToCorrect.WORKING.log", "$path/$asm.readsToCorrect.log";
Expand All @@ -276,7 +284,6 @@ sub filterCorrectionLayouts ($) {

addToReport("corLayout", $report);

finishStage:
generateReport($asm);
resetIteration("cor-filterCorrectionLayouts");

Expand Down Expand Up @@ -334,53 +341,54 @@ sub generateCorrectedReadsConfigure ($) {
# Generate a script to compute the partitioning of correction jobs, given
# the memory allowed per process and memory needed for a correction.

open(F, "> $path/correctReadsPartition.sh") or caExit("can't open '$path/correctReadsPartition.sh' for writing: $!", undef);
if (! -e "$path/correctReadsPartition.batches") {
open(F, "> $path/correctReadsPartition.sh") or caExit("can't open '$path/correctReadsPartition.sh' for writing: $!", undef);

print F "#!" . getGlobal("shell") . "\n";
print F "\n";
print F getBinDirectoryShellCode();
print F "\n";
print F "\$bin/falconsense \\\n";
print F " -partition $mem $cnsmem $par $rds \\\n";
print F " -S ../../$asm.seqStore \\\n";
print F " -C ../$asm.corStore \\\n";
print F " -R ./$asm.readsToCorrect \\\n" if ( fileExists("$path/$asm.readsToCorrect"));
print F " -t " . getGlobal("corThreads") . " \\\n";
print F " -cc " . getGlobal("corMinCoverage") . " \\\n";
print F " -cl " . getGlobal("minReadLength") . " \\\n";
print F " -oi " . getCorIdentity($asm) . " \\\n";
print F " -ol " . getGlobal("minOverlapLength") . " \\\n";
print F " -p ./correctReadsPartition.WORKING \\\n";
print F "&& \\\n";
print F "mv ./correctReadsPartition.WORKING.batches ./correctReadsPartition.batches \\\n";
print F "&& \\\n";
print F "exit 0\n";
print F "\n";
print F "exit 1\n";
print F "#!" . getGlobal("shell") . "\n";
print F "\n";
print F getBinDirectoryShellCode();
print F "\n";
print F "\$bin/falconsense \\\n";
print F " -partition $mem $cnsmem $par $rds \\\n";
print F " -S ../../$asm.seqStore \\\n";
print F " -C ../$asm.corStore \\\n";
print F " -R ./$asm.readsToCorrect \\\n" if ( fileExists("$path/$asm.readsToCorrect"));
print F " -t " . getGlobal("corThreads") . " \\\n";
print F " -cc " . getGlobal("corMinCoverage") . " \\\n";
print F " -cl " . getGlobal("minReadLength") . " \\\n";
print F " -oi " . getCorIdentity($asm) . " \\\n";
print F " -ol " . getGlobal("minOverlapLength") . " \\\n";
print F " -p ./correctReadsPartition.WORKING \\\n";
print F "&& \\\n";
print F "mv ./correctReadsPartition.WORKING.batches ./correctReadsPartition.batches \\\n";
print F "&& \\\n";
print F "exit 0\n";
print F "\n";
print F "exit 1\n";

close(F);
close(F);

makeExecutable("$path/correctReadsPartition.sh");
stashFile("$path/correctReadsPartition.sh");
makeExecutable("$path/correctReadsPartition.sh");
stashFile("$path/correctReadsPartition.sh");

print STDERR "--\n";
print STDERR "-- Configuring correction jobs:\n";
print STDERR "-- Jobs limited to $mem GB per job (via option corMemory).\n";
print STDERR "-- Reads estimated to need at most $cnsmem GB for computation.\n";
print STDERR "-- Leaving $remain GB memory for read data.\n";

if ($remain < 1.0) {
#print STDERR "--\n";
#print STDERR "-- ERROR: Not enough memory for correction. Increase corMemory.\n";
caExit("not enough memory for correction; increase corMemory", undef);
}
print STDERR "--\n";
print STDERR "-- Configuring correction jobs:\n";
print STDERR "-- Jobs limited to $mem GB per job (via option corMemory).\n";
print STDERR "-- Reads estimated to need at most $cnsmem GB for computation.\n";
print STDERR "-- Leaving $remain GB memory for read data.\n";

if ($remain < 1.0) {
caExit("not enough memory for correction; increase corMemory", undef);
}

if (runCommand($path, "./correctReadsPartition.sh > ./correctReadsPartition.err 2>&1")) {
caExit("failed to partition reads for correction", "$path/correctReadsPartition.err");
}

if (runCommand($path, "./correctReadsPartition.sh > ./correctReadsPartition.err 2>&1")) {
caExit("failed to partition reads for correction", "$path/correctReadsPartition.err");
unlink("$path/correctReadsPartition.err");
}

stashFile("$path/correctReadsPartition.batches");
unlink("$path/correctReadsPartition.err");

# Generate a script for computing corrected reads, using the batches file
# as a template.
Expand Down
92 changes: 61 additions & 31 deletions src/pipelines/canu/OverlapStore.pm
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,66 @@ sub overlapStoreSorterCheck ($$$$$) {



sub overlapStoreIndexerCheck ($$$$$) {
my $base = shift @_;
my $asm = shift @_;
my $tag = shift @_;
my $numBuckets = shift @_;
my $numSlices = shift @_;

my $bin = getBinDirectory();
my $cmd;

# If running with showNext, ovStoreIndexer will be run (manually
# possibly) leaving an index file behind, but not renaming the store.
# In that case, jump straight to finishStage and rename it.

goto allDone if ((-d "$base/$asm.ovlStore") || (fileExists("$base/$asm.ovlStore.tar.gz")));

goto finishStage if (-e "$base/$asm.ovlStore.BUILDING/index");

# Fetch the stats and index data. If not using an object store, the
# fetch does nothing, and since there is no file, the gzip/tar are
# skipped.

for (my $ss=1; $ss<=$numSlices; $ss++) {
my $slice = substr("0000" . $ss, -4);

fetchFile("$base/$asm.ovlStore.BUILDING/$slice.statistics.tar.gz");

if (-e "$base/$asm.ovlStore.BUILDING/$slice.statistics.tar.gz") {
runCommandSilently("$base/$asm.ovlStore.BUILDING", "gzip -dc $slice.statistics.tar.gz | tar -xf -", 1);
unlink("$base/$asm.ovlStore.BUILDING/$slice.statistics.tar.gz");
}
}

$cmd = "$bin/ovStoreIndexer \\\n";
$cmd .= " -O ./$asm.ovlStore.BUILDING \\\n";
$cmd .= " -S ../$asm.seqStore \\\n";
$cmd .= " -C ./$asm.ovlStore.config \\\n";
$cmd .= " -delete \\\n";
$cmd .= "> ./$asm.ovlStore.BUILDING.index.err 2>&1";

if (runCommand("$base", $cmd)) {
caExit("failed to build index for overlap store", "$base/$asm.ovlStore.BUILDING.index.err");
}

unlink "$base/$asm.ovlStore.BUILDING.index.err";

finishStage:
print STDERR "-- Overlap store indexer finished.\n";

rename "$base/$asm.ovlStore.BUILDING", "$base/$asm.ovlStore";

renameStashedFile("$base/$asm.ovlStore.BUILDING", "$base/$asm.ovlStore");
stashOvlStore($asm, $base);

#resetIteration("$tag-overlapStoreIndexerCheck");

allDone:
}



sub checkOverlapStore ($$) {
my $base = shift @_;
Expand Down Expand Up @@ -889,37 +949,7 @@ sub createOverlapStore ($$) {
createOverlapStoreParallel ($base, $asm, $tag, $numBuckets, $numSlices, $sortMemory);
overlapStoreBucketizerCheck($base, $asm, $tag, $numBuckets, $numSlices) foreach (1..getGlobal("canuIterationMax") + 1);
overlapStoreSorterCheck ($base, $asm, $tag, $numBuckets, $numSlices) foreach (1..getGlobal("canuIterationMax") + 1);

# Fetch the stats and index data. If not using an object store, the fetch does nothing,
# and since there is no file, the gzip/tar are skipped.

for (my $ss=1; $ss<=$numSlices; $ss++) {
my $slice = substr("0000" . $ss, -4);

fetchFile("$base/$asm.ovlStore.BUILDING/$slice.statistics.tar.gz");

if (-e "$base/$asm.ovlStore.BUILDING/$slice.statistics.tar.gz") {
runCommandSilently("$base/$asm.ovlStore.BUILDING", "gzip -dc $slice.statistics.tar.gz | tar -xf -", 1);
unlink("$base/$asm.ovlStore.BUILDING/$slice.statistics.tar.gz");
}
}

$cmd = "$bin/ovStoreIndexer \\\n";
$cmd .= " -O ./$asm.ovlStore.BUILDING \\\n";
$cmd .= " -S ../$asm.seqStore \\\n";
$cmd .= " -C ./$asm.ovlStore.config \\\n";
$cmd .= " -delete \\\n";
$cmd .= "> ./$asm.ovlStore.BUILDING.index.err 2>&1";

if (runCommand("$base", $cmd)) {
caExit("failed to build index for overlap store", "$base/$asm.ovlStore.BUILDING.index.err");
}

unlink "$base/$asm.ovlStore.BUILDING.index.err";
rename "$base/$asm.ovlStore.BUILDING", "$base/$asm.ovlStore";

renameStashedFile("$base/$asm.ovlStore.BUILDING", "$base/$asm.ovlStore");
stashOvlStore($asm, $base);
overlapStoreIndexerCheck ($base, $asm, $tag, $numBuckets, $numSlices);
}

finishStage:
Expand Down

0 comments on commit 1987868

Please sign in to comment.