-
Notifications
You must be signed in to change notification settings - Fork 4
/
psl2genephed.py
93 lines (82 loc) · 2.54 KB
/
psl2genephed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/usr/bin/python
import sys
import os
if len(sys.argv) >= 3:
psl_filename = sys.argv[1]
skip_Nine = int(sys.argv[2])
Lgap = int(sys.argv[3])
output_filename = sys.argv[4]
else:
print("usage:psl2genephed.py psl_file skip_Nine Lgap output_filename")
print("or ")
sys.exit(1)
################################################################################
def GetPathAndName(pathfilename):
ls=pathfilename.split('/')
filename=ls[-1]
path='/'.join(ls[0:-1])+'/'
if path == "/":
path = "./"
return path, filename
path, filename = GetPathAndName(output_filename)
singleexon_output_filename = path + "singleexon_" + filename
psl = open(psl_filename,'r')
output = open(output_filename,'w')
singleexon_output = open(singleexon_output_filename,'w')
singleexon_psl_output = open(singleexon_output_filename + ".psl",'w')
i=0
for line in psl:
if i<skip_Nine:
i+=1
continue
result_ls = []
jun_start_ls = []
jun_end_ls = []
ls = line.strip().split('\t')
strand =ls[8]
readname = ls[9]
leftstart = ls[15]
rightend = ls[16]
size_ls = ls[18].strip(',').split(',')
start_ls = ls[20].strip(',').split(',')
# print size_ls
# print start_ls
chr_name = ls[13]
j=0
for start in start_ls[:-1]:
start = int(start)
size = int(size_ls[j])
jun_start = start + size
jun_end = int(start_ls[j+1])
# print start
# print size
# print jun_start,jun_end
if jun_end - jun_start >= Lgap:
jun_start_ls.append(str(jun_start))
jun_end_ls.append(str(jun_end))
j+=1
if 1:
result_ls.append(readname)
result_ls.append('.')
result_ls.append(chr_name)
result_ls.append(strand)
result_ls.append(leftstart)
result_ls.append(rightend)
result_ls.append(leftstart)
result_ls.append(rightend)
result_ls.append(str(len(jun_start_ls)+1))
jun_end_ls.insert(0,leftstart)
jun_start_ls.append(rightend)
result_ls.append(','.join(jun_end_ls)+',')
result_ls.append(','.join(jun_start_ls)+',')
if len(jun_start_ls) > 1:
output.write('\t'.join(result_ls)+'\t'+ ls[0] + '\t' + ls[10] + '\n' )
else:
singleexon_output.write('\t'.join(result_ls)+'\t'+ ls[0] + '\t' + ls[10] + '\n' )
singleexon_psl_output.write(line)
i+=1
output.close()
psl.close()
singleexon_output.close()
singleexon_psl_output.close()
################################################################################