Skip to content

Commit

Permalink
Make mean,stddev,median,mad local variables instead of class members.
Browse files Browse the repository at this point in the history
  • Loading branch information
brianwalenz committed Oct 20, 2020
1 parent 2caad53 commit 504b7b4
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 34 deletions.
37 changes: 15 additions & 22 deletions src/bogart/AS_BAT_BestOverlapGraph.C
Original file line number Diff line number Diff line change
Expand Up @@ -201,19 +201,20 @@ BestOverlapGraph::findErrorRateThreshold(FILE *report) {

sort(erates.begin(), erates.end());

// Find mean and stddev with an online calculation.
// Find mean/stddev (with an online calculation) and the median/mad.

stdDev<double> edgeStats;

for (uint32 ii=0; ii<erates.size(); ii++)
edgeStats.insert(erates[ii]);

_mean = edgeStats.mean();
_stddev = edgeStats.stddev();
double mean = edgeStats.mean();
double stddev = edgeStats.stddev();

// Find the median and absolute deviations.
double median = 0.0;
double mad = 0.0;

computeMedianAbsoluteDeviation(erates, _median, _mad, true);
computeMedianAbsoluteDeviation(erates, median, mad, true);

// Pick an error threshold.
//
Expand All @@ -230,15 +231,15 @@ BestOverlapGraph::findErrorRateThreshold(FILE *report) {

double Tgraph = _erateGraph; // was Tinput = _errorLimit
double Tmax = _erateMax;
double Tmean = _mean + _deviationGraph * _stddev;
double Tmad = _median + _deviationGraph * 1.4826 * _mad;
double Tmean = mean + _deviationGraph * stddev;
double Tmad = median + _deviationGraph * 1.4826 * mad;
uint32 pos = (uint32)((erates.size()+1) * _percentileError);
double Tperct = erates[pos] + 1e-5;

assert((_erateGraph == _errorLimit) || // Either erateGraph or erateMax, as
(_erateMax == _errorLimit)); // set in findInitialEdges().

double TpickedTight = min(_errorLimit, ((_median > 1e-10) ? Tmad : Tperct));
double TpickedTight = min(_errorLimit, ((median > 1e-10) ? Tmad : Tperct));
double TpickedLoose = min(_errorLimit, Tmean);
double Tpicked = 0.0;

Expand Down Expand Up @@ -272,11 +273,11 @@ BestOverlapGraph::findErrorRateThreshold(FILE *report) {
fprintf(report, "%-12u" " fraction error fraction percent\n", edgeStats.size());
fprintf(report, "samples (1e-5) error error\n");
fprintf(report, " -------------------------- -------- --------\n");
fprintf(report, "command line (-eg) -> %8.2f %8.4f%%%s\n", 1e5 * Tgraph, 100.0 * Tgraph, (_errorLimit == Tgraph) ? " (enabled)" : "");
fprintf(report, "command line (-eM) -> %8.2f %8.4f%%%s\n", 1e5 * Tmax, 100.0 * Tmax, (_errorLimit == Tmax) ? " (enabled)" : "");
fprintf(report, "mean + std.dev %8.2f +- %3.0f * %8.2f" " -> %8.2f %8.4f%%%s\n", 1e5 * _mean, _deviationGraph, 1e5 * _stddev, 1e5 * Tmean, 100.0 * Tmean, (_errorLimit == Tmean) ? " (enabled)" : "");
fprintf(report, "median + mad %8.2f +- %3.0f * %8.2f" " -> %8.2f %8.4f%%%s\n", 1e5 * _median, _deviationGraph, 1e5 * _mad, 1e5 * Tmad, 100.0 * Tmad, (_errorLimit == Tmad) ? " (enabled)" : "");
fprintf(report, "90th percentile -> %8.2f %8.4f%%%s\n", 1e5 * Tperct, 100.0 * Tperct, (_errorLimit == Tperct) ? " (enabled)" : "");
fprintf(report, "command line (-eg) -> %8.2f %8.4f%%%s\n", 1e5 * Tgraph, 100.0 * Tgraph, (_errorLimit == Tgraph) ? " (enabled)" : "");
fprintf(report, "command line (-eM) -> %8.2f %8.4f%%%s\n", 1e5 * Tmax, 100.0 * Tmax, (_errorLimit == Tmax) ? " (enabled)" : "");
fprintf(report, "mean + std.dev %8.2f +- %3.0f * %8.2f" " -> %8.2f %8.4f%%%s\n", 1e5 * mean, _deviationGraph, 1e5 * stddev, 1e5 * Tmean, 100.0 * Tmean, (_errorLimit == Tmean) ? " (enabled)" : "");
fprintf(report, "median + mad %8.2f +- %3.0f * %8.2f" " -> %8.2f %8.4f%%%s\n", 1e5 * median, _deviationGraph, 1e5 * mad, 1e5 * Tmad, 100.0 * Tmad, (_errorLimit == Tmad) ? " (enabled)" : "");
fprintf(report, "90th percentile -> %8.2f %8.4f%%%s\n", 1e5 * Tperct, 100.0 * Tperct, (_errorLimit == Tperct) ? " (enabled)" : "");
fprintf(report, "\n");
fprintf(report, "BEST EDGE FILTERING\n");
fprintf(report, "-------------------\n");
Expand Down Expand Up @@ -1600,19 +1601,11 @@ BestOverlapGraph::BestOverlapGraph(double erateGraph,
((2 * sizeof(BestEdgeOverlap) * (RI->numReads() + 1)) >> 20));

_reads = new BestEdgeRead [RI->numReads() + 1];
_errorLimit = erateGraph;

_best5score = new uint64 [RI->numReads() + 1]; // Cleared in findEdges().
_best3score = new uint64 [RI->numReads() + 1];

_mean = erateGraph;
_stddev = 0.0;

_median = erateGraph;
_mad = 0.0;

_errorLimit = erateGraph;


// If there is a BOG supplied, copy the reads from there to here.
//
// This sets the status of orphans and bubbles. Then we'll further flag
Expand Down
22 changes: 10 additions & 12 deletions src/bogart/AS_BAT_BestOverlapGraph.H
Original file line number Diff line number Diff line change
Expand Up @@ -281,16 +281,9 @@ private:

private:
BestEdgeRead *_reads; // Nodes in the graph.
double _errorLimit; // Final error rate limit.

uint64 *_best5score; // Temporary data for computing
uint64 *_best3score; // best edges.

double _mean;
double _stddev;

double _median;
double _mad;

// Input parameters.
private:
double const _erateMax;
double const _erateGraph;
Expand All @@ -299,12 +292,17 @@ private:
double const _minOlapPercent;
double const _minReadsBest;

// Temporary data for computing best edges.
// Set to nullptr once the graph is built.
private:
double _errorLimit;
uint64 *_best5score;
uint64 *_best3score;

// These are used for communicating stats between stages.
// Not for use once the graph is built.
private:
uint32 _nReadsEP[2]; // These are used for communicating stats between stages,
uint32 _nReadsEF[2]; // not for use once the graph is built.
uint32 _nReadsEP[2];
uint32 _nReadsEF[2];
};


Expand Down

0 comments on commit 504b7b4

Please sign in to comment.