Skip to content

Commit 02bf257

Browse files
authored
Misc cleanups and improvements (e.g. new adapter mode)
1 parent ed1292b commit 02bf257

File tree

1 file changed

+18
-18
lines changed

1 file changed

+18
-18
lines changed

src/shi7_trimmer.c

+18-18
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#include <stdlib.h>
55
#include <zlib.h>
66
#define SHOW_USAGE() {\
7-
printf( "\nSHort-read Iterative Trimmer (SHI7) v0.92d, by Gabe. Usage:\n");\
7+
printf( "\nSHort-read Iterative Trimmer (SHI7) v0.92f, by Gabe. Usage:\n");\
88
printf( "shi7_trimmer in_seqs.fastq out_prefix MINLEN QUAL <QUAL_RIGHT> ...\n");\
99
printf( "Choose one of the following optional trimming methods:\n");\
1010
printf( " [ROLLING X [PREROLL/POSTROLL]] For rolling average over X bases\n" );\
@@ -105,28 +105,29 @@ int main(int argc, char *argv[]) {
105105
setvbuf(out2,0,_IOFBF,1<<20);
106106
if (!in || !out || (pair && (!in2 || !out2))) {
107107
puts("Can't open input or output file(s)!"); exit(1);}
108-
char *Head = malloc(UINT16_MAX), *Seq = malloc(UINT16_MAX),
109-
*Shi7 = malloc(UINT16_MAX), *Qual = malloc(UINT16_MAX),
108+
long maxLen = 16e6; // 16MB should be enough for anyone.
109+
char *Head = malloc(maxLen), *Seq = malloc(maxLen),
110+
*Shi7 = malloc(maxLen), *Qual = malloc(maxLen),
110111
*Head2 = 0, *Seq2 = 0, *Shi72 = 0, *Qual2 = 0;
111-
if (pair) Head2 = malloc(UINT16_MAX), Seq2 = malloc(UINT16_MAX),
112-
Shi72 = malloc(UINT16_MAX), Qual2 = malloc(UINT16_MAX);
112+
if (pair) Head2 = malloc(maxLen), Seq2 = malloc(maxLen),
113+
Shi72 = malloc(maxLen), Qual2 = malloc(maxLen);
113114
if (palincut && !pair) puts("WARNING: Can't do palincut on SE reads!");
114115
long crap = 0, decency = 0, totI = 0, totJ = 0, totLen = 0, length=0, length2=LONG_MAX;
115116
int *Scores = calloc(doRoll,sizeof(*Scores));
116-
while (Head = gzgets(in,Head,UINT16_MAX)) {
117-
Seq = gzgets(in,Seq, UINT16_MAX);
118-
Shi7 = gzgets(in,Shi7, UINT16_MAX);
119-
Qual = gzgets(in,Qual, UINT16_MAX);
117+
while (Head = gzgets(in,Head,maxLen)) {
118+
Seq = gzgets(in,Seq, maxLen);
119+
Shi7 = gzgets(in,Shi7, maxLen);
120+
Qual = gzgets(in,Qual, maxLen);
120121
length = strlen(Qual);
121122
//printf("Seq: %s qual: %s len %d\n",Seq,Qual,length);
122123
if (Qual[length-1]=='\n') --length;
123124
if (Qual[length-1]=='\r') --length;
124125
//printf("--> Length is now %d\n",length);
125126
if (pair) {
126-
Head2 = gzgets(in2,Head2, UINT16_MAX);
127-
Seq2 = gzgets(in2,Seq2, UINT16_MAX);
128-
Shi72 = gzgets(in2,Shi72, UINT16_MAX);
129-
Qual2 = gzgets(in2,Qual2, UINT16_MAX);
127+
Head2 = gzgets(in2,Head2, maxLen);
128+
Seq2 = gzgets(in2,Seq2, maxLen);
129+
Shi72 = gzgets(in2,Shi72, maxLen);
130+
Qual2 = gzgets(in2,Qual2, maxLen);
130131
length2 = strlen(Qual2);
131132
if (Qual2[length2-1]=='\n') --length2;
132133
if (Qual2[length2-1]=='\r') --length2;
@@ -144,7 +145,7 @@ int main(int argc, char *argv[]) {
144145
if (pair && doAdap2) {
145146
adMin = adMin < endpt ? adMin : endpt; // truncate both reads where first adap found
146147
// Truncate at partial matches up to 2 bases near the end
147-
if (!r && !adap) // Only run on R1; second read is redundant due to equality
148+
if (!r && !adap) // Only run on R1; second read is redundant due to assumed equality of length (!)
148149
for (int w = *TLen - (AdapLens[z] < *TLen ? AdapLens[z] : 0) + 1; w < *TLen-1; ++w) {
149150
if (!memcmp(Adaps[z],TSeq+w,*TLen - w) && !memcmp(Seq+w,Seq2+w,*TLen-w))
150151
{adMin = adMin < w ? adMin : w; break;}
@@ -249,22 +250,21 @@ int main(int argc, char *argv[]) {
249250
// Regurgitate the sequence(s). They're (prolly) OK enough if they made it this far down.
250251
for (int z = L; z < R; z++) Seq[z] = Qual[z] - 33 > nify ? Seq[z] : 'N';
251252
Seq[R]=0; Qual[R]='\n', Qual[R+1] = 0;
252-
//printf(" ------> seq [%d] R1 '%s', L = %d, R = %d, len %d\n",crap+decency,Seq,L,R, R-L);
253-
if (fasta_out) Head[L]='>', Shi7[0]=0, Qual[L]=0;
253+
if (fasta_out) Head[0]='>', Shi7[0]=0, Qual[L]=0;
254254
if (strip) fprintf(out,"%c%d\n%s\n%s%s",fasta_out? '>':'@',crap+decency,
255255
Seq + L,fasta_out? "":"+\n",Qual + L);
256256
else fprintf(out,"%s%s\n%s%s",Head,Seq + L,Shi7,Qual + L);
257257
if (pair) {
258258
for (int z = L2; z < R2; z++) Seq2[z] = Qual2[z] - 33 > nify ? Seq2[z] : 'N';
259259
Seq2[R2]=0; Qual2[R2]='\n'; Qual2[R2+1]=0;
260-
if (fasta_out) Head2[L2]='>', Shi72[0]=0, Qual2[L2]=0;
260+
if (fasta_out) Head2[0]='>', Shi72[0]=0, Qual2[L2]=0;
261261
if (strip) fprintf(out2,"%c%d\n%s\n%s%s",fasta_out? '>':'@',crap+decency,
262262
Seq2 + L2,fasta_out? "":"+\n",Qual2 + L2);
263263
else fprintf(out2,"%s%s\n%s%s",Head2,Seq2 + L2,Shi72,Qual2 + L2);
264264
}
265265
++decency;
266266
totLen += (R - L) + (R2 - L2);
267-
totI += L+L2, totJ += (length - R) + (length2 - R2);
267+
totI += L+L2, totJ += (length - R) + (pair ? (length2 - R2) : 0 );
268268

269269
HELL:NULL; // HEREIN LIE DEVILS
270270
if (decency >= head) break;

0 commit comments

Comments
 (0)