Skip to content

Commit

Permalink
Update consensus to use max error rate and regular error rate and spe…
Browse files Browse the repository at this point in the history
…cify which reads are allowed to use higher error rate

Update canu pipeline to use matching error rates
  • Loading branch information
skoren committed Apr 23, 2022
1 parent f2974cd commit 15f3418
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 7 deletions.
3 changes: 2 additions & 1 deletion src/pipelines/canu/Consensus.pm
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ sub utgcns ($$) {
print F " -P \$jobid \\\n";
print F " -O ./ctgcns/\$jobid.cns.WORKING \\\n";
print F " -maxcoverage " . getGlobal('cnsMaxCoverage') . " \\\n";
print F " -e " . getGlobal("cnsErrorRate") . " \\\n";
print F " -e " . getGlobal("cnsErrorRate") . " \\\n";
print F " -em " . getGlobal("cnsErrorRate") . " \\\n";
print F " -quick \\\n" if (getGlobal("cnsConsensus") eq "quick");
print F " -pbdagcon \\\n" if (getGlobal("cnsConsensus") eq "pbdagcon");
print F " -edlib \\\n" if (getGlobal("canuIteration") >= 0);
Expand Down
11 changes: 9 additions & 2 deletions src/utgcns/unitigConsensus.C
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ abSequence::abSequence(uint32 readID,
unitigConsensus::unitigConsensus(sqStore *seqStore_,
double errorRate_,
double errorRateMax_,
uint32 errorRateMaxID_,
uint32 minOverlap_,
uint32 minCoverage_) {

Expand All @@ -96,6 +97,7 @@ unitigConsensus::unitigConsensus(sqStore *seqStore_,
_minOverlap = minOverlap_;
_errorRate = errorRate_;
_errorRateMax = errorRateMax_;
_errorRateMaxID = errorRateMaxID_;
_minCoverage = minCoverage_;
}

Expand Down Expand Up @@ -407,6 +409,11 @@ unitigConsensus::generateTemplateStitch(void) {

assert(nr != 0);

if (_utgpos[rid].ident() <= _errorRateMaxID || _utgpos[nr].ident() <= _errorRateMaxID) {
if (showAlgorithm()) fprintf(stderr, "generateTemplateStitch()-- Increasing threshold because either %d (%d) or %d (%d) is below requested ID %d\n", rid, _utgpos[rid].ident(), nr, _utgpos[nr].ident(), _errorRateMaxID);
_errorRate = _errorRateMax;
}

rid = nr; // We'll place read 'nr' in the template.

seq = getSequence(rid);
Expand Down Expand Up @@ -911,7 +918,7 @@ unitigConsensus::generatePBDAG(tgTig *tig_,
_utgpos[ii],
seq->getBases(), seq->length(),
tigseq, tiglen,
_errorRate,
_errorRateMax,
showAlgorithm());

if (aligned == false) {
Expand Down Expand Up @@ -1169,7 +1176,7 @@ unitigConsensus::findCoordinates(void) {

int32 ext5 = readLen * 0.01; // Try an initial alignment nice and tight. This is
int32 ext3 = readLen * 0.01; // important for the first/last reads so we get them
double era = _errorRate; // aligned at the end of the tig.
double era = _errorRateMax; // aligned at the end of the tig.

if (showPlacement()) {
fprintf(stderr, "\n");
Expand Down
2 changes: 2 additions & 0 deletions src/utgcns/unitigConsensus.H
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ public:
unitigConsensus(sqStore *seqStore_,
double errorRate_,
double errorRateMax_,
uint32 errorRateMaxID_,
uint32 minOverlap_,
uint32 minCoverage_);
~unitigConsensus();
Expand Down Expand Up @@ -152,6 +153,7 @@ private:
uint32 _minCoverage;
double _errorRate;
double _errorRateMax;
uint32 _errorRateMaxID;
};


Expand Down
14 changes: 10 additions & 4 deletions src/utgcns/utgcns.C
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,10 @@ public:
double partitionScaling = 1.00; // Estimated tig length is 100% of actual tig length.
double partitionReads = 0.05; // 5% of all reads can end up in a single partition.

double errorRate = 0.12;
double errorRateMax = 0.40;
double errorRate = 0.12;
double errorRateMax = 0.40;
uint32 errorRateMaxID = 0;

uint32 minOverlap = 500;
uint32 minCoverage = 0;

Expand Down Expand Up @@ -188,7 +190,7 @@ processImportedTigs(cnsParameters &params) {

tig->_utgcns_verboseLevel = params.verbosity;

unitigConsensus *utgcns = new unitigConsensus(params.seqStore, params.errorRate, params.errorRateMax, params.minOverlap, params.minCoverage);
unitigConsensus *utgcns = new unitigConsensus(params.seqStore, params.errorRate, params.errorRateMax, params.errorRateMaxID, params.minOverlap, params.minCoverage);
bool success = utgcns->generate(tig, params.algorithm, params.aligner, &reads);

// Show the result, if requested.
Expand Down Expand Up @@ -401,7 +403,7 @@ processTigs(cnsParameters &params) {

tig->_utgcns_verboseLevel = params.verbosity;

unitigConsensus *utgcns = new unitigConsensus(params.seqStore, params.errorRate, params.errorRateMax, params.minOverlap, params.minCoverage);
unitigConsensus *utgcns = new unitigConsensus(params.seqStore, params.errorRate, params.errorRateMax, params.errorRateMaxID, params.minOverlap, params.minCoverage);
bool success = utgcns->generate(tig, params.algorithm, params.aligner, params.seqReads);

// Show the result, if requested.
Expand Down Expand Up @@ -559,6 +561,10 @@ main(int argc, char **argv) {
params.errorRateMax = strtodouble(argv[++arg]);
}

else if (strcmp(argv[arg], "-EM") == 0) {
params.errorRateMaxID = strtouint32(argv[++arg]);
}

else if (strcmp(argv[arg], "-l") == 0) {
params.minOverlap = strtouint32(argv[++arg]);
}
Expand Down

0 comments on commit 15f3418

Please sign in to comment.