-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathrmPairedNs.py
executable file
·39 lines (31 loc) · 1.4 KB
/
rmPairedNs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/usr/bin/env python
import os, sys, argparse
import itertools
def ParseArg():
p=argparse.ArgumentParser( description = 'Remove paired fastq reads where at least one sequence contains a series of Ns', epilog = 'Library dependency: itertools')
p.add_argument('input1',type=str,metavar='reads1',help='forward input fastq file')
p.add_argument('input2',type=str,metavar='reads2',help='reverse input fastq file')
p.add_argument('ntimes',type=int,metavar='Nx', help='Number of Ns to look for')
if len(sys.argv)==1:
print >>sys.stderr,p.print_help()
exit(0)
return p.parse_args()
if __name__ == '__main__':
args=ParseArg()
outfile1 = open(args.input1+"_rmN"+str(args.ntimes),"w")
outfile2 = open(args.input2+"_rmN"+str(args.ntimes),"w")
linecount = 0
with open(args.input1, 'rt') as read1, open(args.input2, 'rt') as read2:
for r1, r2 in itertools.izip(read1,read2):
keepr1 = r1
keepr2 = r2
nextr1 = read1.next()
nextr2 = read2.next()
if "N"*args.ntimes in str(nextr1.rstrip()) or "N"*args.ntimes in str(nextr2.rstrip()):
read1.next()
read1.next()
read2.next()
read2.next()
else:
outfile1.write(keepr1+nextr1+read1.next()+read1.next())
outfile2.write(keepr2+nextr2+read2.next()+read2.next())