Skip to content

Commit

Permalink
Extend histogran and lookup to 64-bit values.
Browse files Browse the repository at this point in the history
  • Loading branch information
brianwalenz committed Feb 14, 2019
1 parent bfbf88e commit 9f31726
Show file tree
Hide file tree
Showing 20 changed files with 636 additions and 365 deletions.
10 changes: 5 additions & 5 deletions src/meryl/meryl-import.C
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ main(int argc, char **argv) {

// Allocate a bunch of counting arrays. The naming here follows merylOp-count.C.

merylCountArray *data = new merylCountArray [nPrefix];
merylCountArray<uint64> *data = new merylCountArray<uint64> [nPrefix];

for (uint32 pp=0; pp<nPrefix; pp++) {
data[pp].initialize(pp, wData);
Expand All @@ -182,7 +182,7 @@ main(int argc, char **argv) {

// Read each kmer and value, stuff into a merylCountArray, writing when the array is full.

uint32 persistentValue = 1;
uint64 persistentValue = 1;

while (AS_UTL_readLine(L, Llen, Lmax, K) == true) {
W.split(L);
Expand All @@ -193,15 +193,15 @@ main(int argc, char **argv) {
// Decode the line, make a kmer.

char *kstr = W[0];
uint32 vv = persistentValue;
uint64 vv = persistentValue;

if (kstr[0] == '#') {
persistentValue = strtouint32(kstr + 1);
persistentValue = strtouint64(kstr + 1);
continue;
}

if (W.numWords() > 1)
vv = W.touint32(1);
vv = W.touint64(1);

for (uint32 ii=0; kstr[ii]; ii++)
kmerF.addL(kstr[ii]);
Expand Down
80 changes: 49 additions & 31 deletions src/meryl/meryl-lookup.C
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,43 @@


#define OP_NONE 0
#define OP_EXISTENCE 1
#define OP_DUMP 1
#define OP_EXISTENCE 2



void
dumpExistence(dnaSeqFile *sf,
kmerCountExactLookup *kl) {
uint64 nameMax = 0;
char *name = NULL;
uint64 seqLen = 0;
uint64 seqMax = 0;
char *seq = NULL;
uint8 *qlt = NULL;

char fString[64];
char rString[64];

while (sf->loadSequence(name, nameMax, seq, qlt, seqMax, seqLen)) {
kmerIterator kiter(seq, seqLen);

while (kiter.nextMer()) {
uint64 fValue = kl->value(kiter.fmer());
uint64 rValue = kl->value(kiter.rmer());

fprintf(stdout, "%s\t%s\t%lu\t%s\t%lu\n",
name,
kiter.fmer().toString(fString), fValue,
kiter.rmer().toString(rString), rValue);
}
}

delete [] name;
delete [] seq;
delete [] qlt;
}



void
Expand Down Expand Up @@ -70,8 +106,8 @@ int
main(int argc, char **argv) {
char *inputSeqName = NULL;
char *inputDBname = NULL;
uint32 minV = 0;
uint32 maxV = UINT32_MAX;
uint64 minV = 0;
uint64 maxV = UINT64_MAX;
uint32 threads = 1;
uint32 reportType = OP_NONE;

Expand All @@ -87,14 +123,17 @@ main(int argc, char **argv) {
inputDBname = argv[++arg];

} else if (strcmp(argv[arg], "-min") == 0) {
minV = strtouint32(argv[++arg]);
minV = strtouint64(argv[++arg]);

} else if (strcmp(argv[arg], "-max") == 0) {
maxV = strtouint32(argv[++arg]);
maxV = strtouint64(argv[++arg]);

} else if (strcmp(argv[arg], "-threads") == 0) {
threads = strtouint32(argv[++arg]);

} else if (strcmp(argv[arg], "-dump") == 0) {
reportType = OP_DUMP;

} else if (strcmp(argv[arg], "-existence") == 0) {
reportType = OP_EXISTENCE;

Expand Down Expand Up @@ -163,42 +202,21 @@ main(int argc, char **argv) {

dnaSeqFile *seqFile = new dnaSeqFile(inputSeqName);


// Do something.

if (reportType == OP_DUMP) {
dumpExistence(seqFile, kmerLookup);
}

if (reportType == OP_EXISTENCE) {
reportExistence(seqFile, kmerLookup);
}

#if 0
rr = new kmerCountFileReader(queryDBname);

uint64 tested = 0;
uint64 kmerFound = 0;
uint64 valuFound = 0;

while (rr->nextMer()) {
kmer k = rr->theFMer();
uint32 c = rr->theCount();
uint32 v = ll->value(k);

tested++;

if (v > 0)
kmerFound++;

if (c == v)
valuFound++;
}
#endif
// Done!

delete seqFile;
delete kmerLookup;

//fprintf(stderr, "Tested %12lu kmers.\n", tested);
//fprintf(stderr, "Found %12lu kmers. %12lu missed.\n", kmerFound, tested-kmerFound);
//fprintf(stderr, "Found %12lu kmers with correct value. %12lu missed.\n", valuFound, tested-valuFound);

exit(0);
}

Expand Down
Loading

0 comments on commit 9f31726

Please sign in to comment.