forked from marbl/canu
-
Notifications
You must be signed in to change notification settings - Fork 0
/
AS_BAT_AssemblyGraph.H
133 lines (102 loc) · 3.75 KB
/
AS_BAT_AssemblyGraph.H
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
/******************************************************************************
*
* This file is part of canu, a software program that assembles whole-genome
* sequencing reads into contigs.
*
* This software is based on:
* 'Celera Assembler' (http://wgs-assembler.sourceforge.net)
* the 'kmer package' (http://kmer.sourceforge.net)
* both originally distributed by Applera Corporation under the GNU General
* Public License, version 2.
*
* Canu branched from Celera Assembler at its revision 4587.
* Canu branched from the kmer project at its revision 1994.
*
* Modifications by:
*
* Brian P. Walenz beginning on 2016-JUL-21
* are a 'United States Government Work', and
* are released in the public domain
*
* File 'README.licenses' in the root directory of this distribution contains
* full conditions and disclaimers for each license.
*/
#ifndef INCLUDE_AS_BAT_ASSEMBLYGRAPH
#define INCLUDE_AS_BAT_ASSEMBLYGRAPH
#include "AS_global.H"
#include "AS_BAT_OverlapCache.H"
#include "AS_BAT_BestOverlapGraph.H" // For ReadEnd
#include "AS_BAT_Unitig.H" // For SeqInterval
#include "AS_BAT_TigVector.H"
class BestPlacement {
public:
BestPlacement() {
tigID = UINT32_MAX;
placedBgn = INT32_MIN;
placedEnd = INT32_MAX;
olapBgn = INT32_MIN;
olapEnd = INT32_MAX;
isContig = false;
isUnitig = false;
isBubble = false;
isRepeat = false;
};
~BestPlacement() {
};
uint32 tigID; // Which tig this is placed in.
int32 placedBgn; // Position in the tig. Can extend negative.
int32 placedEnd; //
int32 olapBgn; // Position in the tig covered by overlaps.
int32 olapEnd; //
bool isContig; // This placement is in a contig
bool isUnitig; // This placement is in a unitig
bool isBubble; // This placement is to an unambiguous region in a contig
bool isRepeat; // This placement is to an ambiguous region in a contig that was split
BAToverlap bestC;
BAToverlap best5;
BAToverlap best3;
};
class BestReverse {
public:
BestReverse() {
readID = 0;
placeID = 0;
};
BestReverse(uint32 id, uint32 pp) {
readID = id;
placeID = pp;
};
~BestReverse() {
};
uint32 readID; // readID we have an overlap from; Index into _pForward
uint32 placeID; // index into the vector for _pForward[readID]
};
class AssemblyGraph {
public:
AssemblyGraph(const char *prefix,
double deviationRepeat,
TigVector &tigs,
bool tigEndsOnly = false) {
buildGraph(prefix, deviationRepeat, tigs, tigEndsOnly);
}
~AssemblyGraph() {
delete [] _pForward;
delete [] _pReverse;
};
public:
vector<BestPlacement> &getForward(uint32 fi) { return(_pForward[fi]); };
vector<BestReverse> &getReverse(uint32 fi) { return(_pReverse[fi]); };
public:
void buildReverseEdges(void);
void buildGraph(const char *prefix,
double deviationRepeat,
TigVector &tigs,
bool tigEndsOnly);
void rebuildGraph(TigVector &tigs);
void filterEdges(TigVector &tigs);
void reportReadGraph(TigVector &tigs, const char *prefix, const char *label);
private:
vector<BestPlacement> *_pForward; // Where each read is placed in other tigs
vector<BestReverse> *_pReverse; // What reads overlap to me
};
#endif // INCLUDE_AS_BAT_ASSEMBLYGRAPH