Skip to content

Commit

Permalink
Add support for minimap as an overlapper option
Browse files Browse the repository at this point in the history
  • Loading branch information
skoren committed Feb 24, 2016
1 parent 7a58403 commit 26569bb
Show file tree
Hide file tree
Showing 10 changed files with 835 additions and 7 deletions.
4 changes: 4 additions & 0 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,7 @@ all: UPDATE_VERSION MAKE_DIRS \
${TARGET_DIR}/lib/canu/OverlapErrorAdjustment.pm \
${TARGET_DIR}/lib/canu/OverlapInCore.pm \
${TARGET_DIR}/lib/canu/OverlapMhap.pm \
${TARGET_DIR}/lib/canu/OverlapMMap.pm \
${TARGET_DIR}/lib/canu/OverlapStore.pm \
${TARGET_DIR}/lib/canu/Unitig.pm
@echo ""
Expand Down Expand Up @@ -641,6 +642,9 @@ ${TARGET_DIR}/lib/canu/OverlapInCore.pm: pipelines/canu/OverlapInCore.pm
${TARGET_DIR}/lib/canu/OverlapMhap.pm: pipelines/canu/OverlapMhap.pm
cp -pf pipelines/canu/OverlapMhap.pm ${TARGET_DIR}/lib/canu/

${TARGET_DIR}/lib/canu/OverlapMMap.pm: pipelines/canu/OverlapMMap.pm
cp -pf pipelines/canu/OverlapMMap.pm ${TARGET_DIR}/lib/canu/

${TARGET_DIR}/lib/canu/OverlapStore.pm: pipelines/canu/OverlapStore.pm
cp -pf pipelines/canu/OverlapStore.pm ${TARGET_DIR}/lib/canu/

Expand Down
2 changes: 2 additions & 0 deletions src/main.mk
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,8 @@ SUBMAKEFILES := stores/gatekeeperCreate.mk \
mhap/mhap.mk \
mhap/mhapConvert.mk \
\
minimap/mmapConvert.mk \
\
correction/filterCorrectionOverlaps.mk \
correction/generateCorrectionLayouts.mk \
correction/readConsensus.mk \
Expand Down
128 changes: 128 additions & 0 deletions src/minimap/mmapConvert.C
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@

/******************************************************************************
*
* This file is part of canu, a software program that assembles whole-genome
* sequencing reads into contigs.
*
* This software is based on:
* 'Celera Assembler' (http://wgs-assembler.sourceforge.net)
* the 'kmer package' (http://kmer.sourceforge.net)
* both originally distributed by Applera Corporation under the GNU General
* Public License, version 2.
*
* Canu branched from Celera Assembler at its revision 4587.
* Canu branched from the kmer project at its revision 1994.
*
* Modifications by:
*
* Brian P. Walenz from 2015-MAR-27 to 2015-JUN-25
* are Copyright 2015 Battelle National Biodefense Institute, and
* are subject to the BSD 3-Clause License
*
* File 'README.licenses' in the root directory of this distribution contains
* full conditions and disclaimers for each license.
*/

#include "AS_global.H"
#include "ovStore.H"
#include "splitToWords.H"

#include <vector>

using namespace std;


int
main(int argc, char **argv) {
bool asCoords = true;

char *outName = NULL;

vector<char *> files;


int32 arg = 1;
int32 err = 0;
while (arg < argc) {
if (strcmp(argv[arg], "-o") == 0) {
outName = argv[++arg];

} else if (AS_UTL_fileExists(argv[arg])) {
files.push_back(argv[arg]);

} else {
fprintf(stderr, "ERROR: invalid arg '%s'\n", argv[arg]);
err++;
}

arg++;
}

if ((err) || (files.size() == 0)) {
fprintf(stderr, "usage: %s [options] file.mhap[.gz]\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, " Converts mhap native output to ovb\n");
fprintf(stderr, "\n");
fprintf(stderr, " -o out.ovb output file\n");
fprintf(stderr, "\n");

if (files.size() == 0)
fprintf(stderr, "ERROR: no overlap files supplied\n");

exit(1);
}

char *ovStr = new char [1024];

ovOverlap ov(NULL);
ovFile *of = new ovFile(outName, ovFileFullWrite);

for (uint32 ff=0; ff<files.size(); ff++) {
compressedFileReader *in = new compressedFileReader(files[ff]);

// $1 $2 $3 $4 $5 $6 $7 $8 $9 $10 $11 $12 $13
// 0 1 2 3 4 5 6 7 8 9 10 11 12
// 0f1bd7b6-a7f2-4bcb-8575-d617f1394b8a_Basecall_2D_2d 8189 1310 8014 + b74d9367-f45a-4684-8bfc-ff533629b030_Basecall_2D_2d 14205 7340 14051 277 6711 255 cm:i:32
// 0f1bd7b6-a7f2-4bcb-8575-d617f1394b8a_Basecall_2D_2d 8189 1152 7272 - a3026aca-57a7-4639-96bf-b76624cf2d34_Basecall_2D_2d 7731 1642 7547 157 6120 255 cm:i:24
// aiid alen bgn end bori biid blen bgn end #match minimizers alnlen ? cm:i:errori
//

while (fgets(ovStr, 1024, in->file()) != NULL) {
splitToWords W(ovStr);

ov.a_iid = W(0);
ov.b_iid = W(5);

if (ov.a_iid == ov.b_iid)
continue;

ov.dat.ovl.forUTG = true;
ov.dat.ovl.forOBT = true;
ov.dat.ovl.forDUP = true;

ov.dat.ovl.ahg5 = W(2);
ov.dat.ovl.ahg3 = W(1) - W(3);

if (W[4][0] == '+') {
ov.dat.ovl.bhg5 = W(7);
ov.dat.ovl.bhg3 = W(6) - W(8);
ov.flipped(false);
} else {
ov.dat.ovl.bhg3 = W(7);
ov.dat.ovl.bhg5 = W(6) - W(8);
ov.flipped(true);
}

ov.erate(1-((double)W(9)/W(10)));

of->writeOverlap(&ov);
}

arg++;
}

delete of;
delete [] ovStr;

exit(0);
}
19 changes: 19 additions & 0 deletions src/minimap/mmapConvert.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# If 'make' isn't run from the root directory, we need to set these to
# point to the upper level build directory.
ifeq "$(strip ${BUILD_DIR})" ""
BUILD_DIR := ../$(OSTYPE)-$(MACHINETYPE)/obj
endif
ifeq "$(strip ${TARGET_DIR})" ""
TARGET_DIR := ../$(OSTYPE)-$(MACHINETYPE)/bin
endif

TARGET := mmapConvert
SOURCES := mmapConvert.C

SRC_INCDIRS := .. ../AS_UTL ../stores liboverlap

TGT_LDFLAGS := -L${TARGET_DIR}
TGT_LDLIBS := -lcanu
TGT_PREREQS := libcanu.a

SUBMAKEFILES :=
8 changes: 8 additions & 0 deletions src/pipelines/canu.pl
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
use canu::Meryl;
use canu::OverlapInCore;
use canu::OverlapMhap;
use canu::OverlapMMap;
use canu::OverlapStore;

use canu::CorrectReads;
Expand Down Expand Up @@ -368,6 +369,13 @@ ($$$)

mhapCheck($wrk, $asm, $tag, $ovlType) foreach (1..getGlobal("canuIterationMax") + 1);

} elsif (getGlobal("${tag}overlapper") eq "minimap") {
mmapConfigure($wrk, $asm, $tag, $ovlType);

mmapPrecomputeCheck($wrk, $asm, $tag, $ovlType) foreach (1..getGlobal("canuIterationMax") + 1);

mmapCheck($wrk, $asm, $tag, $ovlType) foreach (1..getGlobal("canuIterationMax") + 1);

} else {
overlapConfigure($wrk, $asm, $tag, $ovlType);

Expand Down
25 changes: 25 additions & 0 deletions src/pipelines/canu/Configure.pm
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,7 @@ sub getAllowedResources ($$$$) {
elsif ($alg eq "ovs") { $nam = "overlap store parallel sorting"; }
elsif ($alg eq "red") { $nam = "read error detection (overlap error adjustment)"; }
elsif ($alg eq "mhap") { $nam = "mhap (overlapper)"; }
elsif ($alg eq "mmap") { $nam = "minimap (overlapper)"; }
elsif ($alg eq "ovl") { $nam = "overlapper"; }
else {
caFailure("unknown task '$alg' in getAllowedResources().", undef);
Expand Down Expand Up @@ -459,6 +460,11 @@ sub configureAssembler () {
setGlobalIfUndef("obtMhapMemory", "4-6"); setGlobalIfUndef("obtMhapThreads", "1-16");
setGlobalIfUndef("utgMhapMemory", "4-6"); setGlobalIfUndef("utgMhapThreads", "1-16");

setGlobalIfUndef("corMMapMemory", "4-6"); setGlobalIfUndef("corMMapThreads", "1-16");
setGlobalIfUndef("obtMMapMemory", "4-6"); setGlobalIfUndef("obtMMapThreads", "1-16");
setGlobalIfUndef("utgMMapMemory", "4-6"); setGlobalIfUndef("utgMMapThreads", "1-16");


} elsif (getGlobal("genomeSize") < adjustGenomeSize("500m")) {
setGlobalIfUndef("corOvlMemory", "2-6"); setGlobalIfUndef("corOvlThreads", "1");
setGlobalIfUndef("obtOvlMemory", "4-8"); setGlobalIfUndef("obtOvlThreads", "1-8");
Expand All @@ -468,6 +474,10 @@ sub configureAssembler () {
setGlobalIfUndef("obtMhapMemory", "8-13"); setGlobalIfUndef("obtMhapThreads", "1-16");
setGlobalIfUndef("utgMhapMemory", "8-13"); setGlobalIfUndef("utgMhapThreads", "1-16");

setGlobalIfUndef("corMMapMemory", "4-6"); setGlobalIfUndef("corMMapThreads", "1-16");
setGlobalIfUndef("obtMMapMemory", "4-6"); setGlobalIfUndef("obtMMapThreads", "1-16");
setGlobalIfUndef("utgMMapMemory", "4-6"); setGlobalIfUndef("utgMMapThreads", "1-16");

} elsif (getGlobal("genomeSize") < adjustGenomeSize("2g")) {
setGlobalIfUndef("corOvlMemory", "2-8"); setGlobalIfUndef("corOvlThreads", "1");
setGlobalIfUndef("obtOvlMemory", "4-12"); setGlobalIfUndef("obtOvlThreads", "1-8");
Expand All @@ -477,6 +487,10 @@ sub configureAssembler () {
setGlobalIfUndef("obtMhapMemory", "16-32"); setGlobalIfUndef("obtMhapThreads", "4-16");
setGlobalIfUndef("utgMhapMemory", "16-32"); setGlobalIfUndef("utgMhapThreads", "4-16");

setGlobalIfUndef("corMMapMemory", "4-6"); setGlobalIfUndef("corMMapThreads", "1-16");
setGlobalIfUndef("obtMMapMemory", "4-6"); setGlobalIfUndef("obtMMapThreads", "1-16");
setGlobalIfUndef("utgMMapMemory", "4-6"); setGlobalIfUndef("utgMMapThreads", "1-16");

} elsif (getGlobal("genomeSize") < adjustGenomeSize("5g")) {
setGlobalIfUndef("corOvlMemory", "2-8"); setGlobalIfUndef("corOvlThreads", "1");
setGlobalIfUndef("obtOvlMemory", "4-16"); setGlobalIfUndef("obtOvlThreads", "1-8");
Expand All @@ -486,6 +500,10 @@ sub configureAssembler () {
setGlobalIfUndef("obtMhapMemory", "16-48"); setGlobalIfUndef("obtMhapThreads", "4-16");
setGlobalIfUndef("utgMhapMemory", "16-48"); setGlobalIfUndef("utgMhapThreads", "4-16");

setGlobalIfUndef("corMMapMemory", "4-6"); setGlobalIfUndef("corMMapThreads", "1-16");
setGlobalIfUndef("obtMMapMemory", "4-6"); setGlobalIfUndef("obtMMapThreads", "1-16");
setGlobalIfUndef("utgMMapMemory", "4-6"); setGlobalIfUndef("utgMMapThreads", "1-16");

} else {
setGlobalIfUndef("corOvlMemory", "2-8"); setGlobalIfUndef("corOvlThreads", "1");
setGlobalIfUndef("obtOvlMemory", "4-16"); setGlobalIfUndef("obtOvlThreads", "1-8");
Expand All @@ -494,6 +512,10 @@ sub configureAssembler () {
setGlobalIfUndef("corMhapMemory", "32-64"); setGlobalIfUndef("corMhapThreads", "4-16");
setGlobalIfUndef("obtMhapMemory", "32-64"); setGlobalIfUndef("obtMhapThreads", "4-16");
setGlobalIfUndef("utgMhapMemory", "32-64"); setGlobalIfUndef("utgMhapThreads", "4-16");

setGlobalIfUndef("corMMapMemory", "4-6"); setGlobalIfUndef("corMMapThreads", "1-16");
setGlobalIfUndef("obtMMapMemory", "4-6"); setGlobalIfUndef("obtMMapThreads", "1-16");
setGlobalIfUndef("utgMMapMemory", "4-6"); setGlobalIfUndef("utgMMapThreads", "1-16");
}

# Overlapper block sizes probably don't need to be modified based on genome size.
Expand Down Expand Up @@ -623,6 +645,9 @@ sub configureAssembler () {
($err, $all) = getAllowedResources("utg", "ovl", $err, $all);
($err, $all) = getAllowedResources("", "meryl", $err, $all);
($err, $all) = getAllowedResources("", "cor", $err, $all);
($err, $all) = getAllowedResources("cor", "mmap", $err, $all);
($err, $all) = getAllowedResources("obt", "mmap", $err, $all);
($err, $all) = getAllowedResources("utg", "mmap", $err, $all);

print STDERR "--\n" if (defined($err));
print STDERR $err if (defined($err));
Expand Down
25 changes: 19 additions & 6 deletions src/pipelines/canu/Defaults.pm
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ sub setGlobal ($$) {

# Translate from generic to specialized var

foreach my $alg ("ovl", "mhap") {
foreach my $alg ("ovl", "mhap", "mmap") {
foreach my $opt ("gridoptions") {
$set += setGlobalSpecialization($val, ("${opt}cor${alg}", "${opt}obt${alg}", "${opt}utg${alg}")) if ($var eq "${opt}${alg}");
}
Expand Down Expand Up @@ -638,11 +638,19 @@ sub setOverlapDefaults ($$$) {
$global{"${tag}MhapMerSize"} = ($tag eq "cor") ? 16 : 22;
$synops{"${tag}MhapMerSize"} = "K-mer size for seeds in mhap";

$global{"${tag}MhapReAlign"} = undef;
$synops{"${tag}MhapReAlign"} = "Compute actual alignments from mhap overlaps; 'raw' from mhap output, 'final' from overlap store; uses either obtErrorRate or ovlErrorRate, depending on which overlaps are computed";

$global{"${tag}MhapSensitivity"} = "normal";
$synops{"${tag}MhapSensitivity"} = "Coarse sensitivity level: 'normal' or 'high'; default 'normal'";

$global{"${tag}MMapBlockSize"} = 6000;
$synops{"${tag}MMapBlockSize"} = "Number of reads per 1GB; memory * blockSize = the size of block loaded into memory per job";

# minimap parameters.
$global{"${tag}MMapMerSize"} = ($tag eq "cor") ? 15 : 22;
$synops{"${tag}MMapMerSize"} = "K-mer size for seeds in minmap";

# shared parameters for alignment-free overlappers
$global{"${tag}ReAlign"} = undef;
$synops{"${tag}ReAlign"} = "Compute actual alignments from overlaps; 'raw' from output, 'final' from overlap store; uses either obtErrorRate or ovlErrorRate, depending on which overlaps are computed";
}


Expand Down Expand Up @@ -794,6 +802,10 @@ sub setDefaults () {
setExecDefaults("obtmhap", "mhap overlaps for trimming");
setExecDefaults("utgmhap", "mhap overlaps for unitig construction");

setExecDefaults("cormmap", "mmap overlaps for correction");
setExecDefaults("obtmmap", "mmap overlaps for trimming");
setExecDefaults("utgmmap", "mmap overlaps for unitig construction");

setExecDefaults("ovb", "overlap store bucketizing");
setExecDefaults("ovs", "overlap store sorting");

Expand Down Expand Up @@ -1012,8 +1024,9 @@ sub checkParameters () {

foreach my $tag ("cor", "obt", "utg") {
if ((getGlobal("${tag}Overlapper") ne "mhap") &&
(getGlobal("${tag}Overlapper") ne "ovl")) {
addCommandLineError("ERROR: Invalid '${tag}Overlapper' specified (" . getGlobal("${tag}Overlapper") . "); must be 'mhap' or 'ovl'\n");
(getGlobal("${tag}Overlapper") ne "ovl") &&
(getGlobal("${tag}Overlapper") ne "minimap")) {
addCommandLineError("ERROR: Invalid '${tag}Overlapper' specified (" . getGlobal("${tag}Overlapper") . "); must be 'mhap', 'ovl', or 'minimap'\n");
}
}

Expand Down
5 changes: 5 additions & 0 deletions src/pipelines/canu/Meryl.pm
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,11 @@ sub merylParameters ($$$) {
$ffile = "$wrk/0-mercounts/$asm.ms$merSize.frequentMers.ignore"; # The mhap-specific file we should be creating (ends in IGNORE).
$ofile = "$wrk/0-mercounts/$asm.ms$merSize"; # The meryl database 'intermediate file'.

} elsif (getGlobal("${tag}Overlapper") eq "minimap") {
# do nothing
$ffile = "$wrk/0-mercounts/$asm.ms$merSize.skip";
make_path("$wrk/0-mercounts") if (! -d "$wrk/0-mercounts");
touch($ffile);
} else {
caFailure("unknown ${tag}Overlapper '" . getGlobal("${tag}Overlapper") . "'", undef);
}
Expand Down
Loading

0 comments on commit 26569bb

Please sign in to comment.