Skip to content

Commit

Permalink
Add functions to get non-contained first/last read and standardize th…
Browse files Browse the repository at this point in the history
…eir usage
  • Loading branch information
skoren committed Nov 13, 2020
1 parent a58af29 commit ab08d0c
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 37 deletions.
4 changes: 2 additions & 2 deletions src/bogart/AS_BAT_FindCircular.C
Original file line number Diff line number Diff line change
Expand Up @@ -233,8 +233,8 @@ findCircularContigs(TigVector &tigs,
// Grab the first and last reads in the tig, then find the edge that
// points out of the tig.

ufNode *fRead = tig->firstRead();
ufNode *lRead = tig->lastRead();
ufNode *fRead = tig->firstBackboneRead();
ufNode *lRead = tig->lastBackboneRead();

uint32 circularLength = 0;
uint32 ovlLen = 0;
Expand Down
40 changes: 6 additions & 34 deletions src/bogart/AS_BAT_MergeOrphans.C
Original file line number Diff line number Diff line change
Expand Up @@ -144,35 +144,6 @@ bool isCycle(TigVector &tigs,
return false;
}

ufNode* findFirstRead(Unitig *tig) {
ufNode *read = tig->firstRead();

for (uint32 fi=1; fi < tig->ufpath.size(); fi++) {
if (OG->isBackbone(read->ident) && read->position.min() == 0)
break;
if (tig->ufpath[fi].position.min() == 0)
read=&tig->ufpath[fi];
}
assert(read->position.min() == 0);

return read;
}

ufNode* findLastRead(Unitig *tig) {
ufNode *read = tig->lastRead();

for (uint32 fi=tig->ufpath.size()-1; (fi-- > 0); ) {
if (OG->isBackbone(read->ident) && read->position.max() == tig->getLength())
break;
if (tig->ufpath[fi].position.max() == tig->getLength())
read=&tig->ufpath[fi];
}
assert(read->position.max() == tig->getLength());

return read;
}


// Decide which tigs can be orphans. Any unitig where (nearly) every dovetail
// read has an overlap to some other unitig is a candidate for orphan popping.
//
Expand Down Expand Up @@ -204,8 +175,8 @@ findPotentialOrphans(TigVector &tigs,

// If the first or last read has no best edge, that's it, we're done.

ufNode *fRead = findFirstRead(tig);
ufNode *lRead = findLastRead(tig);
ufNode *fRead = tig->firstBackboneRead();
ufNode *lRead = tig->lastBackboneRead();

// Count the number of reads that have an overlap to some other tig. tigOlapsTo[otherTig] = count.

Expand Down Expand Up @@ -941,7 +912,7 @@ mergeOrphans(TigVector &tigs,

// Scan the orphan, decide if there are _ANY_ read placements. Log appropriately.

if (placeAnchor(orphan, placed, findFirstRead(orphan), findLastRead(orphan)) == false) {
if (placeAnchor(orphan, placed, orphan->firstBackboneRead(), orphan->lastBackboneRead()) == false) {
writeLog("\n");
writeLog("ANCHOR READS FAILED TO PLACE.\n");
continue;
Expand All @@ -953,8 +924,8 @@ mergeOrphans(TigVector &tigs,
// read -------
// orphan -------------------------

ufNode *fRead = findFirstRead(orphan);
ufNode *lRead = findLastRead(orphan);
ufNode *fRead = orphan->firstBackboneRead();
ufNode *lRead = orphan->lastBackboneRead();

map<uint32, intervalList<int32> *> targetIntervals;

Expand Down Expand Up @@ -1105,6 +1076,7 @@ mergeOrphans(TigVector &tigs,
for (uint32 fi=0; fi<orphan->ufpath.size(); fi++) { // Flag them as being an orphan, and reset backbone
OG->setOrphan(orphan->ufpath[fi].ident); // status - they're not part of the backbone
OG->setBackbone(orphan->ufpath[fi].ident, false); // in the tig they've been placed into.
orphan->ufpath[fi].contained=true;
}

tigs[orphan->id()] = NULL; // Delete the original tig.
Expand Down
2 changes: 1 addition & 1 deletion src/bogart/AS_BAT_PlaceContains.C
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ placeUnplacedUsingAllOverlaps(TigVector &tigs,

if (tig) {
frg.ident = fid;
frg.contained = 0;
frg.contained = OG->isContained(fid);
frg.parent = 0;
frg.ahang = 0;
frg.bhang = 0;
Expand Down
31 changes: 31 additions & 0 deletions src/bogart/AS_BAT_Unitig.H
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

using namespace std;

#define MAX_BASES_UNCONTAINED 150

class BestEdgeOverlap;
class optPos;
Expand Down Expand Up @@ -354,6 +355,36 @@ public:
return(rd3);
};

// return first or last read that is not contained or merged closest to the end of the tig
// allowing wiggle within start/end because of optimize position which may make a conatined read the only one touching
ufNode* firstBackboneRead() {
ufNode *read = firstRead();

for (uint32 fi=1; fi < ufpath.size(); fi++) {
if (read->contained == false && read->position.min() == 0)
break;
if (ufpath[fi].position.min() <= MAX_BASES_UNCONTAINED)
read=&ufpath[fi];
}
assert(read->position.min() <= MAX_BASES_UNCONTAINED);

return read;
};

ufNode* lastBackboneRead() {
ufNode *read = lastRead();

for (uint32 fi=ufpath.size()-1; (fi-- > 0); ) {
if (read->contained == false && read->position.max() == getLength())
break;
if (ufpath[fi].position.max() + MAX_BASES_UNCONTAINED >= getLength())
read=&ufpath[fi];
}
assert(read->position.max() + MAX_BASES_UNCONTAINED >= getLength());

return read;
};

// Public Member Variables
public:
vector<ufNode> ufpath;
Expand Down

0 comments on commit ab08d0c

Please sign in to comment.