forked from OpenGene/fastp
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstats.h
100 lines (89 loc) · 2.57 KB
/
stats.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#ifndef STATS_H
#define STATS_H
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <vector>
#include <map>
#include "read.h"
#include "options.h"
using namespace std;
class Stats{
public:
// this @guessedCycles parameter should be calculated using the first several records
Stats(Options* opt, bool isRead2 = false, int guessedCycles = 0, int bufferMargin = 1024);
~Stats();
int getCycles();
long getReads();
long getBases();
long getQ20();
long getQ30();
long getGCNumber();
// by default the qualified qual score is Q20 ('5')
void statRead(Read* r);
static Stats* merge(vector<Stats*>& list);
void print();
void summarize(bool forced = false);
// a port of JSON report
void reportJson(ofstream& ofs, string padding);
// a port of HTML report
void reportHtml(ofstream& ofs, string filteringType, string readName);
void reportHtmlQuality(ofstream& ofs, string filteringType, string readName);
void reportHtmlContents(ofstream& ofs, string filteringType, string readName);
void reportHtmlKMER(ofstream& ofs, string filteringType, string readName);
void reportHtmlORA(ofstream& ofs, string filteringType, string readName);
bool isLongRead();
void initOverRepSeq();
int getMeanLength();
public:
static string list2string(double* list, int size);
static string list2string(double* list, int size, long* coords);
static string list2string(long* list, int size);
static int base2val(char base);
private:
void extendBuffer(int newBufLen);
string makeKmerTD(int i, int j);
string kmer3(int val);
string kmer2(int val);
void deleteOverRepSeqDist();
bool overRepPassed(string& seq, long count);
private:
Options* mOptions;
bool mIsRead2;
long mReads;
int mEvaluatedSeqLen;
/*
why we use 8 here?
map A/T/C/G/N to 0~7 by their ASCII % 8:
'A' % 8 = 1
'T' % 8 = 4
'C' % 8 = 3
'G' % 8 = 7
'N' % 8 = 6
*/
long *mCycleQ30Bases[8];
long *mCycleQ20Bases[8];
long *mCycleBaseContents[8];
long *mCycleBaseQual[8];
long *mCycleTotalBase;
long *mCycleTotalQual;
long *mKmer;
map<string, double*> mQualityCurves;
map<string, double*> mContentCurves;
map<string, long> mOverRepSeq;
map<string, long*> mOverRepSeqDist;
int mCycles;
int mBufLen;
long mBases;
long mQ20Bases[8];
long mQ30Bases[8];
long mBaseContents[8];
long mQ20Total;
long mQ30Total;
bool summarized;
long mKmerMax;
long mKmerMin;
int mKmerBufLen;
long mLengthSum;
};
#endif