-
Notifications
You must be signed in to change notification settings - Fork 1
/
readreps.cpp
126 lines (108 loc) · 2.86 KB
/
readreps.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#include "piler2.h"
// Destructive: pokes Rec.Attrs
static char *GetRepeat(GFFRecord &Rec)
{
char *Attrs = Rec.Attrs;
char *Start = strstr(Attrs, "Repeat");
if (0 == Start)
Quit("GFF line %d, repeat record does not have Repeat attribute", GFFLineNr);
char *SemiColon = strchr(Start, ';');
if (0 != SemiColon)
*SemiColon = 0;
char *Space = strchr(Start, ' ');
if (0 == Space)
Quit("GFF repeat record line %d, missing space in attrs", GFFLineNr);
return Space + 1;
}
// "Repeat HETRP_DM Satellite 1518 1669 0"
// 0 1 2 3 4
void ParseRepeat(char *Str, RepData &Rep)
{
char *Fields[5];
int FieldCount = GetFields(Str, Fields, 5, ' ');
if (FieldCount != 5)
Quit("GFF line %d, Repeat attribute does not have 5 fields");
const char *RepeatName = Fields[0];
const char *RepeatClass = Fields[1];
const char *RepeatFrom = Fields[2];
const char *RepeatTo = Fields[3];
const char *RepeatLeft = Fields[4];
Rep.RepeatName = strsave(RepeatName);
Rep.RepeatClass = strsave(RepeatClass);
if (0 == strcmp(RepeatFrom, "."))
{
Rep.RepeatFrom = -1;
Rep.RepeatTo = -1;
Rep.RepeatLeft = -1;
}
else
{
Rep.RepeatFrom = atoi(RepeatFrom) - 1;
Rep.RepeatTo = atoi(RepeatTo) - 1;
Rep.RepeatLeft = atoi(RepeatLeft);
}
}
static int ReadRepsPass1(FILE *f)
{
GFFLineNr = 0;
int RepCount = 0;
GFFRecord Rec;
while (GetNextGFFRecord(f, Rec))
{
if (0 != strcmp(Rec.Feature, "repeat"))
{
static bool WarningIssued = false;
if (!WarningIssued)
{
Warning("GFF record feature '%s' is not a repeat", Rec.Feature);
WarningIssued = true;
}
continue;
}
if (Rec.Start <= 0 || Rec.End <= 0 || Rec.Start > Rec.End)
Warning("GFF line %d: invalid start %d / end %d",
GFFLineNr, Rec.Start, Rec.End);
++RepCount;
}
return RepCount;
}
static int ReadRepsPass2(FILE *f, RepData *Reps)
{
rewind(f);
GFFLineNr = 0;
int RepCount = 0;
GFFRecord Rec;
while (GetNextGFFRecord(f, Rec))
{
if (0 != strcmp(Rec.Feature, "repeat"))
continue;
static char *Repeat = GetRepeat(Rec);
RepData &Rep = Reps[RepCount];
ParseRepeat(Repeat, Rep);
if (Rec.Start <= 0 || Rec.End <= 0 || Rec.Start > Rec.End)
Warning("GFF line %d: invalid start %d / end %d",
GFFLineNr, Rec.Start, Rec.End);
const int Length = Rec.End - Rec.Start + 1;
Rep.ContigLabel = strsave(Rec.SeqName);
Rep.ContigFrom = Rec.Start - 1;
Rep.ContigTo = Rep.ContigFrom + Length - 1;
if (Rec.Strand == '+')
Rep.Rev = false;
else if (Rec.Strand == '-')
Rep.Rev = true;
else
Quit("GFF line %d, Invalid strand %c", GFFLineNr, Rec.Strand);
++RepCount;
}
return RepCount;
}
RepData *ReadReps(const char *FileName, int *ptrRepCount)
{
FILE *f = OpenStdioFile(FileName);
int RepCount = ReadRepsPass1(f);
RepData *Reps = all(RepData, RepCount);
ReadRepsPass2(f, Reps);
fclose(f);
*ptrRepCount = RepCount;
return Reps;
}