Skip to content

Commit

Permalink
Add option -RL to filter short reads. WILL NOT WORK WITH CGW.
Browse files Browse the repository at this point in the history
  • Loading branch information
brianwalenz committed Jun 18, 2013
1 parent 0c4b688 commit c4cf486
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 8 deletions.
2 changes: 1 addition & 1 deletion src/AS_BAT/AS_BAT_Datatypes.H
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ public:

class FragmentInfo {
public:
FragmentInfo(gkStore *gkpStore, const char *prefix);
FragmentInfo(gkStore *gkpStore, const char *prefix, uint32 minReadLen);
~FragmentInfo();

uint64 memoryUsage(void) {
Expand Down
21 changes: 17 additions & 4 deletions src/AS_BAT/AS_BAT_FragmentInfo.C
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,19 @@ const uint64 fiMagicNumber = 0x6f666e4967617266llu; // 'fragInfo' until it g
const uint64 fiVersionNumber = 1;


FragmentInfo::FragmentInfo(gkStore *gkpStore, const char *prefix) {
FragmentInfo::FragmentInfo(gkStore *gkpStore,
const char *prefix,
uint32 minReadLen) {

if (load(prefix))
return;

writeLog("FragmentInfo()-- Loading fragment information\n");

if (minReadLen > 0)
writeLog("FragmentInfo()-- Reads shorter than "F_U32" bases are forced to be singleton.\n",
minReadLen);

gkStream *fs = new gkStream(gkpStore, 0, 0, GKFRAGMENT_INF);
gkFragment fr;

Expand Down Expand Up @@ -71,11 +77,16 @@ FragmentInfo::FragmentInfo(gkStore *gkpStore, const char *prefix) {
}

uint32 numDeleted = 0;
uint32 numSkipped = 0;
uint32 numLoaded = 0;

while(fs->next(&fr)) {
if (fr.gkFragment_getIsDeleted()) {
numDeleted++;

} else if (fr.gkFragment_getClearRegionLength() < minReadLen) {
numSkipped++;

} else {
uint32 iid = fr.gkFragment_getReadIID();
uint32 lib = fr.gkFragment_getLibraryIID();
Expand All @@ -92,8 +103,9 @@ FragmentInfo::FragmentInfo(gkStore *gkpStore, const char *prefix) {
numLoaded++;
}

if (((numDeleted + numLoaded) % 10000000) == 0)
writeLog("FragmentInfo()-- Loading fragment information deleted:%9d active:%9d\n", numDeleted, numLoaded);
if (((numDeleted + numSkipped + numLoaded) % 10000000) == 0)
writeLog("FragmentInfo()-- Loading fragment information deleted:%9d skipped:%9d active:%9d\n",
numDeleted, numSkipped, numLoaded);
}

for (uint32 i=0; i<_numLibraries + 1; i++)
Expand Down Expand Up @@ -121,7 +133,8 @@ FragmentInfo::FragmentInfo(gkStore *gkpStore, const char *prefix) {
if (numBroken > 0)
writeLog("FragmentInfo()-- WARNING! Removed "F_U32" mate relationships.\n", numBroken);

writeLog("FragmentInfo()-- Loaded %d alive fragments, skipped %d dead fragments.\n", numLoaded, numDeleted);
writeLog("FragmentInfo()-- Loaded %d alive fragments, skipped %d short and %d dead fragments.\n",
numLoaded, numSkipped, numDeleted);

delete fs;

Expand Down
10 changes: 9 additions & 1 deletion src/AS_BAT/bogart.C
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ main (int argc, char * argv []) {
bool enableReconstructRepeats = false;
bool enablePromoteToSingleton = true;

uint32 minReadLen = 0;

argc = AS_configure(argc, argv);

int err = 0;
Expand Down Expand Up @@ -129,6 +131,9 @@ main (int argc, char * argv []) {
} else if (strcmp(argv[arg], "-DP") == 0) {
enablePromoteToSingleton = false;

} else if (strcmp(argv[arg], "-RL") == 0) {
minReadLen = atoi(argv[++arg]);

} else if (strcmp(argv[arg], "-threads") == 0) {
numThreads = atoi(argv[++arg]);

Expand Down Expand Up @@ -250,13 +255,16 @@ main (int argc, char * argv []) {
fprintf(stderr, " -E Shatter repeats, extend unique unitigs\n");
fprintf(stderr, " -DP When -R or -E, don't promote shattered leftovers to unitigs.\n");
fprintf(stderr, " This WILL cause CGW to fail; diagnostic only.\n");
fprintf(stderr, " -RL len Force reads below 'len' bases to be singletons.\n");
fprintf(stderr, " This WILL cause CGW to fail; diagnostic only.\n");
fprintf(stderr, "\n");
fprintf(stderr, " -threads N Use N compute threads during repeat detection. EXPERIMENTAL!\n");
fprintf(stderr, " 0 - use OpenMP default\n");
fprintf(stderr, " 1 - use one thread (default)\n");
fprintf(stderr, "\n");
fprintf(stderr, "Overlap Selection - an overlap will be considered for use in a unitig if either of\n");
fprintf(stderr, " the following conditions hold:\n");
fprintf(stderr, "\n");
fprintf(stderr, " When constructing the Best Overlap Graph and Promiscuous Unitigs ('g'raph):\n");
fprintf(stderr, " -eg 0.020 no more than 0.020 fraction (2.0%%) error\n");
fprintf(stderr, " -Eg 2 no more than 2 errors (useful with short reads)\n");
Expand Down Expand Up @@ -342,7 +350,7 @@ main (int argc, char * argv []) {

setLogFile(output_prefix, NULL);

FI = new FragmentInfo(gkpStore, output_prefix);
FI = new FragmentInfo(gkpStore, output_prefix, minReadLen);

// Initialize where we've been to nowhere
Unitig::resetFragUnitigMap(FI->numFragments());
Expand Down
2 changes: 1 addition & 1 deletion src/AS_BAT/petey.C
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ main (int argc, char * argv []) {
OverlapStore *ovlStoreUniq = AS_OVS_openOverlapStore(ovlStoreUniqPath);
OverlapStore *ovlStoreRept = ovlStoreReptPath ? AS_OVS_openOverlapStore(ovlStoreReptPath) : NULL;

FI = new FragmentInfo(gkpStore, output_prefix);
FI = new FragmentInfo(gkpStore, output_prefix, 0);
OC = new OverlapCache(ovlStoreUniq, ovlStoreRept, output_prefix, 6.0, 1.5, 0, 0, false, false);
OG = new BestOverlapGraph(erate, elimit, output_prefix, false, false);
CG = new ChunkGraph(output_prefix);
Expand Down
2 changes: 1 addition & 1 deletion src/AS_BAT/rewriteCache.C
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ main(int argc, char **argv) {


gkStore *gkpStore = new gkStore("salmon.gkpStore", FALSE, FALSE);
FragmentInfo *FI = new FragmentInfo(gkpStore, "salmon");
FragmentInfo *FI = new FragmentInfo(gkpStore, "salmon", 0);


sprintf(name, "salmon.ovlCache");
Expand Down

0 comments on commit c4cf486

Please sign in to comment.