forked from marbl/canu
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathunitigConsensus.H
205 lines (162 loc) · 6.41 KB
/
unitigConsensus.H
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
/******************************************************************************
*
* This file is part of canu, a software program that assembles whole-genome
* sequencing reads into contigs.
*
* This software is based on:
* 'Celera Assembler' (http://wgs-assembler.sourceforge.net)
* the 'kmer package' (http://kmer.sourceforge.net)
* both originally distributed by Applera Corporation under the GNU General
* Public License, version 2.
*
* Canu branched from Celera Assembler at its revision 4587.
* Canu branched from the kmer project at its revision 1994.
*
* This file is derived from:
*
* src/AS_CNS/MultiAlignment_CNS.h
* src/AS_CNS/MultiAlignment_CNS_private.H
* src/AS_CNS/MultiAlignment_CNS_private.h
* src/utgcns/libcns/MultiAlignment_CNS_private.H
* src/utgcns/libcns/unitigConsensus.H
*
* Modifications by:
*
* Gennady Denisov from 2005-MAY-23 to 2007-OCT-25
* are Copyright 2005-2007 J. Craig Venter Institute, and
* are subject to the GNU General Public License version 2
*
* Brian P. Walenz from 2005-JUL-08 to 2013-AUG-01
* are Copyright 2005-2009,2011,2013 J. Craig Venter Institute, and
* are subject to the GNU General Public License version 2
*
* Eli Venter from 2006-FEB-13 to 2008-FEB-13
* are Copyright 2006,2008 J. Craig Venter Institute, and
* are subject to the GNU General Public License version 2
*
* Sergey Koren from 2008-JAN-28 to 2009-SEP-25
* are Copyright 2008-2009 J. Craig Venter Institute, and
* are subject to the GNU General Public License version 2
*
* Jason Miller on 2011-SEP-21
* are Copyright 2011 J. Craig Venter Institute, and
* are subject to the GNU General Public License version 2
*
* Brian P. Walenz from 2014-NOV-17 to 2015-AUG-05
* are Copyright 2014-2015 Battelle National Biodefense Institute, and
* are subject to the BSD 3-Clause License
*
* Brian P. Walenz beginning on 2015-OCT-09
* are a 'United States Government Work', and
* are released in the public domain
*
* Sergey Koren beginning on 2015-DEC-28
* are a 'United States Government Work', and
* are released in the public domain
*
* File 'README.licenses' in the root directory of this distribution contains
* full conditions and disclaimers for each license.
*/
#ifndef UNITIGCONSENSUS_H
#define UNITIGCONSENSUS_H
#include "AS_global.H"
#include "tgStore.H"
class ALNoverlap;
class NDalign;
#define CNS_MIN_QV 0
#define CNS_MAX_QV 60
class abSequence {
public:
abSequence() {
_iid = 0;
_complement = false;
_length = 0;
_bases = NULL;
_quals = NULL;
};
abSequence(uint32 readID,
uint32 length,
char *seq,
uint8 *qlt,
uint32 complemented);
~abSequence() {
delete [] _bases;
delete [] _quals;
};
uint32 seqIdent(void) { return(_iid); };
bool isForward(void) { return(_complement == false); };
uint32 length(void) { return(_length); };
char getBase(uint32 o) { return(_bases[o]); };
uint8 getQual(uint32 o) { return(_quals[o]); };
char *getBases(void) { return(_bases); };
private:
uint32 _iid; // external, aka seqStore, ID;
bool _complement;
uint32 _length;
char *_bases;
uint8 *_quals;
};
class unitigConsensus {
public:
unitigConsensus(sqStore *seqStore_,
double errorRate_,
double errorRateMax_,
uint32 minOverlap_);
~unitigConsensus();
private:
void addRead(uint32 readID,
uint32 askip, uint32 bskip,
bool complemented,
map<uint32, sqRead *> *inPackageRead,
map<uint32, sqReadData *> *inPackageReadData);
bool initialize(map<uint32, sqRead *> *reads,
map<uint32, sqReadData *> *datas);
public:
bool generate(tgTig *tig_,
char algorithm_,
char aligner_,
map<uint32, sqRead *> *reads_ = NULL,
map<uint32, sqReadData *> *datas_ = NULL);
private:
char *generateTemplateStitch(void);
bool initializeGenerate(tgTig *tig,
map<uint32, sqRead *> *reads = NULL,
map<uint32, sqReadData *> *datas = NULL);
bool generatePBDAG(tgTig *tig,
char aligner,
map<uint32, sqRead *> *reads = NULL,
map<uint32, sqReadData *> *datas = NULL);
bool generateQuick(tgTig *tig,
map<uint32, sqRead *> *reads = NULL,
map<uint32, sqReadData *> *datas = NULL);
bool generateSingleton(tgTig *tig,
map<uint32, sqRead *> *reads = NULL,
map<uint32, sqReadData *> *datas = NULL);
void findCoordinates(void);
void findRawAlignments(void);
public:
bool showProgress(void) { return(_tig->_utgcns_verboseLevel >= 1); }; // -V displays which reads are processing
bool showAlgorithm(void) { return(_tig->_utgcns_verboseLevel >= 2); }; // -V -V displays some details on the algorithm
bool showPlacement(void) { return(_tig->_utgcns_verboseLevel >= 3); }; // -V -V -V displays aligns for placement
bool showAlignments(void) { return(_tig->_utgcns_verboseLevel >= 4); }; // -V -V -V -V displays aligns and multialigns
abSequence *getSequence(uint32 id) {
assert(id < _sequencesLen);
return(_sequences[id]);
};
private:
sqStore *_seqStore;
tgTig *_tig;
uint32 _numReads; // == tig->numberOfChildren()
uint32 _sequencesMax;
uint32 _sequencesLen;
abSequence **_sequences;
// The two positions below are storing the low/high coords for the read.
// They do not encode the orientation in the coordinates.
//
tgPosition *_utgpos; // Original unitigger location.
tgPosition *_cnspos; // Actual location in frankenstein.
uint32 _minOverlap;
double _errorRate;
double _errorRateMax;
};
#endif