forked from marbl/canu
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbogusUtil.H
132 lines (107 loc) · 3.72 KB
/
bogusUtil.H
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
/******************************************************************************
*
* This file is part of canu, a software program that assembles whole-genome
* sequencing reads into contigs.
*
* This software is based on:
* 'Celera Assembler' (http://wgs-assembler.sourceforge.net)
* the 'kmer package' (http://kmer.sourceforge.net)
* both originally distributed by Applera Corporation under the GNU General
* Public License, version 2.
*
* Canu branched from Celera Assembler at its revision 4587.
* Canu branched from the kmer project at its revision 1994.
*
* This file is derived from:
*
* src/AS_BAT/AS_BAT_bogusUtil.H
* src/bogart/AS_BAT_bogusUtil.H
*
* Modifications by:
*
* Brian P. Walenz from 2010-NOV-23 to 2013-AUG-01
* are Copyright 2010-2011,2013 J. Craig Venter Institute, and
* are subject to the GNU General Public License version 2
*
* Brian P. Walenz from 2014-OCT-09 to 2014-DEC-23
* are Copyright 2014 Battelle National Biodefense Institute, and
* are subject to the BSD 3-Clause License
*
* File 'README.licenses' in the root directory of this distribution contains
* full conditions and disclaimers for each license.
*/
#ifndef INCLUDE_BOGUSUTIL
#define INCLUDE_BOGUSUTIL
#include "AS_global.H"
#include "splitToWords.H"
#include "intervalList.H"
#include <string>
#include <vector>
#include <map>
#include <algorithm>
using namespace std;
class genomeAlignment {
public:
genomeAlignment() {
frgIID = frgBgn = frgEnd = 0;
genIID = 0;
genBgn = genEnd = 0;
identity = 0.0;
isDeleted = isReverse = isSpanned = isRepeat = false;
};
int32 frgIID;
int32 frgBgn;
int32 frgEnd;
int32 genIID; // Position in the actual sequence
int32 genBgn;
int32 genEnd;
int32 chnBgn; // Position in the chained sequences
int32 chnEnd;
double identity; // Percent identity of the alignment
bool isDeleted; // Used by bogusness
bool isReverse;
bool isSpanned;
bool isRepeat;
};
class referenceSequence {
public:
referenceSequence(int32 cb, int32 ce, int32 rl, char *cn) {
rschnBgn = cb;
rschnEnd = ce;
rsrefLen = rl;
assert(strlen(cn) < 256);
strcpy(rsrefName, cn);
};
int32 rschnBgn;
int32 rschnEnd;
int32 rsrefLen;
char rsrefName[256];
};
bool byFragmentID(const genomeAlignment &A, const genomeAlignment &B);
bool byGenomePosition(const genomeAlignment &A, const genomeAlignment &B);
void addAlignment(vector<genomeAlignment> &genome,
int32 frgIID,
int32 frgBgn, int32 frgEnd, bool isReverse,
int32 chnBgn, int32 chnEnd,
double identity,
int32 genIID,
int32 genBgn, int32 genEnd);
void loadNucmer(char *nucmerName,
vector<genomeAlignment> &genome,
map<string, int32> &IIDmap,
vector<string> &IIDname,
vector<referenceSequence> &refList,
map<string,uint32> &refMap,
double minIdentity);
void loadSnapper(char *snapperName,
vector<genomeAlignment> &genome,
map<string, int32> &IIDmap,
vector<string> &IIDname,
vector<referenceSequence> &refList,
map<string,uint32> &refMap,
double minIdentity);
void
loadReferenceSequence(char *refName,
vector<referenceSequence> &refList,
map<string,uint32> &refMap);
#endif // INCLUDE_BOGUSUTIL