-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathvcf.h
132 lines (88 loc) · 4.22 KB
/
vcf.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
//
// written by Quang M Trinh <[email protected]>
//
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#ifndef VCF_H
#define VCF_H
#include "input_data.h"
#include "output_data.h"
#include "fasta.h"
#define MODE_VCF_FILTER_PASS_FILTERS_ONLY 1000
#define MODE_VCF_FILTER_SNV_OR_INDEL 1005
#define MODE_VCF_SWITCH_SAMPLE_ORDER 1010
#define MODE_VCF_PARSE_INFO_COLUMN_FOR_KEY 1500
#define MODE_VCF_PARSE_FORMAT_COLUMN_FOR_KEY 1600
#define MODE_VCF_PARSE_SNPEFF_ANN 1800
#define MODE_VCF_PARSE_ANNOTATED_DATABASE 2000
#define MODE_VCF_ANNOTATE_SEQUENCE_CONTEXT 2500
#define MODE_VCF_ANNOTATE_VARIANT_CLASS 2501
#define MODE_VCF_ANNOTATE_SUBSTITUTION_SUB_TYPES 2505
#define MODE_VCF_ANNOTATE_GENOTYPE 2510
#define MODE_VCF_GENERATE_SUBSTITUTION_MATRIX 3000
#define MODE_VCF_CHECK_REFERENCE_BASE_AGAINST_FASTA_FILE 3100
#define VCF_COLUMN_ALT_SEPARATOR ','
enum VCF_COLUMNS {
VCF_COLUMN,
VCF_COLUMN_CHR,
VCF_COLUMN_POS,
VCF_COLUMN_ID,
VCF_COLUMN_REF,
VCF_COLUMN_ALT,
VCF_COLUMN_QUAL,
VCF_COLUMN_FILTER,
VCF_COLUMN_INFO,
VCF_COLUMN_FORMAT,
VCF_COLUMN_FIRST_SAMPLE
};
struct vcf {
int columnOffSet;
int numberOfSamples;
// first sample is at index 0;
char **sampleNames;
// first attribute and value are at index 0
char **formatKeys; // array of strings
char ***formatValues;
int formatN;
// 3D int array to store SNV substitution changes
// first dimention: sample
// second dimention: reference
// third dimention: alt ( i.e., changed to )
int ***snvSubstitutionMatrix;
// 5D int array to store SNV substitution changes
// first dimention: ???
// second dimention: ???
// third dimention: ???
int *****snvSubstitutionMatrixNNN;
int *totalNumberOfCallsPerSample;
// https://www.mun.ca/biology/scarr/Transitions_vs_Transversions.html
int *Ts, *Tv;
// text headers
char **textHeaders;
int textHeadersN;
};
struct vcf * vcf_init() ;
void vcf_setInputFileToVCFformat(struct input_data *id) ;
vcf_compare_result_t vcf_compareEntry(struct input_data *id, char *a[], int an, char *b[], int bn);
void vcf_MODE_VCF_PARSE_INFO_COLUMN_FOR_KEY(struct input_data *id, struct output_data *od, char key[], double value) ;
void vcf_MODE_VCF_FILTER_PASS_FILTERS_ONLY(struct input_data *id, struct output_data *od) ;
void vcf_parseHEADER(struct input_data * id, struct vcf *vcf) ;
void vcf_parseFORMAT(struct input_data * id, struct vcf *vcf) ;
void vcf_printFORMAT(struct input_data * id, struct vcf *vcf) ;
void vcf_MODE_VCF_PARSE_FORMAT_COLUMN_FOR_KEY(struct input_data *id, struct output_data *od, char key[], double value, int sampleIndex2Process) ;
void vcf_MODE_VCF_PARSE_ANNOTATED_DATABASE(struct input_data *id, struct output_data *od, char key[], char recordSeparator[], char columnSeparator[]) ;
void vcf_parseAnnotatedDatabase(int verbose, char line[], char infoColumn[], char databasePrefix[], int *inDatabase, int *matches, char recordSeparator[], char columnSeparator[]) ;
void vcf_MODE_VCF_ANNOTATE_SEQUENCE_CONTEXT(struct input_data *id, struct output_data *od, struct fasta *fa, int flanking) ;
void vcf_MODE_VCF_GENERATE_SUBSTITUTION_MATRIX(struct input_data *id, struct output_data *od, struct vcf *myVCF, int bySample) ;
void vcf_MODE_VCF_GENERATE_SUBSTITUTION_MATRIX_WITH_SEQUENCE_CONTEXT(struct input_data *id, struct output_data *od, struct vcf *myVCF, int bySample, struct fasta *fa ) ;
void vcf_printTsTv(struct input_data *id, struct output_data *od, struct vcf *myVCF, int bySample) ;
void vcf_printSubstitutionMatrix(struct input_data *id, struct output_data *od, struct vcf *myVCF, int bySample) ;
void vcf_printSubstitutionMatrixHorizontal(struct input_data *id, struct output_data *od, struct vcf *myVCF, int bySample) ;
void vcf_addAnnotation2VCFINFO(struct input_data *id, char *columns[], int n, char data2add[]) ;
void vcf_constructSubstitutionData12(struct input_data *id, struct output_data *od, struct vcf *myVCF, int bySample) ;
void vcf_constructSubstitutionData96(struct input_data *id, struct output_data *od, struct vcf *myVCF, int bySample) ;
int vcf_isATransitionChange(char ref, char alt) ;
int vcf_isATransversionChange(char ref, char alt) ;
void vcf_MODE_VCF_CHECK_REFERENCE_BASE_AGAINST_FASTA_FILE(struct input_data *id, struct output_data *od, struct fasta *fa) ;
#endif