Skip to content

Commit

Permalink
Disable symmetrizeOverlaps() for layoutReads.
Browse files Browse the repository at this point in the history
  • Loading branch information
brianwalenz committed Jul 29, 2021
1 parent 20e5f8f commit 1591829
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 24 deletions.
24 changes: 4 additions & 20 deletions src/bogart/AS_BAT_OverlapCache.C
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ OverlapCache::OverlapCache(const char *ovlStorePath,
double maxErate,
uint32 minOverlap,
uint64 memlimit,
uint64 genomeSize) {
uint64 genomeSize,
bool symmetrize) {

_prefix = prefix;

Expand Down Expand Up @@ -159,7 +160,8 @@ OverlapCache::OverlapCache(const char *ovlStorePath,
delete [] _ovsTmp; _ovsTmp = NULL; // it loaded updated erates into memory), so release
delete ovlStore; ovlStore = NULL; // these before symmetrizing overlaps.

symmetrizeOverlaps();
if (symmetrize == true)
symmetrizeOverlaps();

delete [] _minSco; _minSco = NULL;
}
Expand Down Expand Up @@ -288,21 +290,6 @@ OverlapCache::computeOverlapLimit(ovStore *ovlStore, uint64 genomeSize) {
_maxPer += increase;
}

// We used to (pre 6 Jul 2017) do the symmetry check only if we didn't load all overlaps.
// However, symmetry can also break if we use an error rate cutoff because - for reasons not
// explored - the error rate on symmetric overlaps differs. So, just enable this always.
//
// On a moderate coverage human nanopore assembly, it does:
//
// OverlapCache()-- Symmetrizing overlaps -- finding missing twins.
// OverlapCache()-- -- found 8609 missing twins in 51413413 overlaps, 8002 are strong.
// OverlapCache()-- Symmetrizing overlaps -- dropping weak non-twin overlaps.
// OverlapCache()-- -- dropped 454 overlaps.
// OverlapCache()-- Symmetrizing overlaps -- adding 8155 missing twin overlaps.

_checkSymmetry = (numAbove > 0) ? true : false;
_checkSymmetry = true;

if (_maxPer < _minPer)
writeStatus("OverlapCache()-- Not enough memory to load the minimum number of overlaps; increase -M.\n"), exit(1);

Expand Down Expand Up @@ -650,9 +637,6 @@ OverlapCache::symmetrizeOverlaps(void) {
uint32 numThreads = omp_get_max_threads();
uint32 blockSize = (fiLimit < 1000 * numThreads) ? numThreads : fiLimit / 999;

if (_checkSymmetry == false)
return;

uint32 *nNonSymPerRead = new uint32 [fiLimit];
uint32 *nFiltPerRead = new uint32 [fiLimit];
uint32 *nMissPerRead = new uint32 [fiLimit];
Expand Down
5 changes: 2 additions & 3 deletions src/bogart/AS_BAT_OverlapCache.H
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,8 @@ public:
double maxErate,
uint32 minOverlap,
uint64 maxMemory,
uint64 genomeSize);
uint64 genomeSize,
bool symmetrize=true);
~OverlapCache();

bool compareOverlaps(const BAToverlap &a, const BAToverlap &b) const; // we can almost do templated but the fields are functions in one and just members in the other
Expand Down Expand Up @@ -293,8 +294,6 @@ private:

uint64 *_minSco; // The minimum score accepted for each read

bool _checkSymmetry;

uint32 _ovsMax; // For loading overlaps
ovOverlap *_ovs; //
uint64 *_ovsSco; // For scoring overlaps during the load
Expand Down
10 changes: 9 additions & 1 deletion src/bogart/layoutReads.C
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,8 @@ main (int argc, char **argv) {
uint64 genomeSize = 0;
bool doContainPlacement = true;

bool doSymmetrize = false;

argc = AS_configure(argc, argv);

uint32 seed = time(NULL);
Expand Down Expand Up @@ -315,6 +317,10 @@ main (int argc, char **argv) {
doContainPlacement = false;
}

else if (strcmp(argv[arg], "-symmetrize") == 0) {
doSymmetrize = true;
}

else {
char *s = new char [1024];
snprintf(s, 1024, "Unknown option '%s'.\n", argv[arg]);
Expand Down Expand Up @@ -344,6 +350,8 @@ main (int argc, char **argv) {
fprintf(stderr, " -eM erate Max error rate of overlaps to load.\n");
fprintf(stderr, " -eg erate Max error rate of overlaps to use for placing contained reads.\n");
fprintf(stderr, " -nocontains Do not place contained reads.\n");
fprintf(stderr, "\n");
fprintf(stderr, " -symmetrize Check and fix symmetry of overlaps (slow).\n");

for (uint32 ii=0; ii<err.size(); ii++)
if (err[ii])
Expand All @@ -361,7 +369,7 @@ main (int argc, char **argv) {
setLogFile(prefix, "loadInformation");

RI = new ReadInfo(seqStorePath, prefix, minReadLen, maxReadLen);
OC = new OverlapCache(ovlStorePath, prefix, std::max(erateMax, erateGraph), minOverlapLen, ovlCacheMemory, genomeSize);
OC = new OverlapCache(ovlStorePath, prefix, std::max(erateMax, erateGraph), minOverlapLen, ovlCacheMemory, genomeSize, doSymmetrize);

//

Expand Down

0 comments on commit 1591829

Please sign in to comment.