forked from RAHenriksen/NGSNGS
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMapDMGlen.cpp
140 lines (123 loc) · 4.17 KB
/
MapDMGlen.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#include <cstdio>
#include <cstring>
#include <cstdlib>
#include <ctime>
#include <cassert>
#include <cstdint>
#include <cmath>
#include <string>
#include <vector>
#include <algorithm> //std::reverse
#include <iostream>
#include <htslib/faidx.h>
#include <htslib/sam.h>
#include <htslib/vcf.h>
#include <htslib/bgzf.h>
#include <htslib/kstring.h>
#include <zlib.h>
#include <errno.h>
#include <stdint.h>
#include <random>
#include <pthread.h>
typedef struct{
const char *MapDMG_len_in;
const char *NGSNGS_len_out;
}argStruct;
int HelpPage(FILE *fp){
fprintf(fp,"Fragment Length converter - converting MapDamage 2.0 lgdistribution files into NGSNGS length distribution format\n\n");
fprintf(fp,"Usage\n./LenConvert -i <MapDamage Length Distribution> -o <NGSNGS length distribution file>\n");
fprintf(fp,"\nExample\n./LenConvert -i lgdistribution.txt -o Ancient_hg19_CDF.txt\n");
fprintf(fp,"./LenConvert --input lgdistribution.txt --output Ancient_hg19_CDF.txt\n");
fprintf(fp,"\nOptions: \n");
fprintf(fp,"-h | --help: \t Print help page.\n");
fprintf(fp,"-v | --version: Print version.\n\n");
fprintf(fp,"-i | --input: \t MapDamage fragment length intput in .txt or .txt.gz format.\n");
fprintf(fp,"-o | --output: \t NGSNGS fragment length output in .txt format.\n");
exit(1);
return 0;
}
argStruct *getpars(int argc,char ** argv){
argStruct *mypars = new argStruct;
mypars->MapDMG_len_in = NULL;
mypars->NGSNGS_len_out = NULL;
++argv;
while(*argv){
//fprintf(stderr,"ARGV %s\n",*argv);
if(strcasecmp("-i",*argv)==0 || strcasecmp("--input",*argv)==0){
mypars->MapDMG_len_in = strdup(*(++argv));
}
else if(strcasecmp("-o",*argv)==0 || strcasecmp("--output",*argv)==0){
mypars->NGSNGS_len_out = strdup(*(++argv));
}
else{
fprintf(stderr,"unrecognized input option %s, see NGSNGS help page\n\n",*(argv));
exit(0);
}
++argv;
}
return mypars;
}
int main_Length(int argc,char **argv){
argStruct *mypars = NULL;
if(argc==1||(argc==2&&(strcasecmp(argv[1],"--version")==0||strcasecmp(argv[1],"-v")==0||
strcasecmp(argv[1],"--help")==0||strcasecmp(argv[1],"-h")==0))){
HelpPage(stderr);
return 0;
}
else{
mypars = getpars(argc,argv);
const char* MapDMG_input = mypars->MapDMG_len_in;
const char* NGSNGS_output = mypars->NGSNGS_len_out;
int length = 6000;
char buf[length];
gzFile gz = Z_NULL;
int row_no = 0;
int* Len_Val_tmp = new int[256];
int* Len_Obs_tmp = new int[256];
int Len_sum = 0;
int Len_Val[256];
int Len_Obs[256];
gz = gzopen(MapDMG_input,"r");
assert(gz!=Z_NULL);
while(gzgets(gz,buf,length)){
if (buf[0] == '+' || buf[0] == '-'){
// buf : + 50 10074
char* tok_len; char* tok_count;
strtok(buf,"\t"); // +
tok_len = strtok(NULL,"\t"); // 50
Len_Val_tmp[atoi(tok_len)] = atoi(tok_len);
tok_count = strtok(NULL,"\t"); // 10074
Len_sum += atoi(tok_count);
Len_Obs_tmp[atoi(tok_len)] += atoi(tok_count);
}
else{
continue;
}
}
gzclose(gz);
double CumCount = 0;
//Identify the lowest fragment length
int low_len = 0;
for (int elem=0; elem<256; ++elem){if (Len_Val_tmp[elem] != 0){low_len = Len_Val_tmp[elem];break;}};
low_len--; // substract with one
FILE *ReadSizeFile = fopen(NGSNGS_output, "w");
fprintf(ReadSizeFile,"%d \t %f\n",low_len,CumCount);
for (int elem=0; elem<256; ++elem){
if (Len_Val_tmp[elem] != 0){
CumCount += (double)Len_Obs_tmp[elem]/(double)Len_sum;
fprintf(ReadSizeFile,"%d \t %f\n",Len_Val_tmp[elem],CumCount);
}
}
fclose(ReadSizeFile);
delete[] Len_Val_tmp;
delete[] Len_Obs_tmp;
}
return 0;
}
#ifdef __WITH_MAIN__
int main(int argc,char **argv){
main_Length(argc,argv);
return 0;
}
#endif
//g++ MapDMGlen.cpp -std=c++11 -lm -lz -D __WITH_MAIN__ -o LenConvert