forked from mjhubisz/argweaver
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patharg-extract-treelen
executable file
·70 lines (51 loc) · 1.76 KB
/
arg-extract-treelen
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/env python
import optparse
import os
import sys
import argweaver
from rasmus import intervals
from rasmus import stats
from rasmus import util
o = optparse.OptionParser()
o.add_option("-s", "--start", default=0, type="int",
help="Starting sample iteration to use")
o.add_option("-e", "--end", default=5000, type="int",
help="Last sample iteration to use")
o.add_option("-d", "--step", default=1, type="int",
help="Step size through samples")
conf, args = o.parse_args()
#=============================================================================
def treelen(tree):
return sum(x.dist for x in tree)
def iter_trees(filename):
chrom = "chr"
for item in argweaver.iter_smc_file(filename, parse_trees=True,
apply_spr=True):
if item["tag"] == "REGION":
chrom = item["chrom"]
if item["tag"] == "TREE":
yield item["start"]-1, item["end"], chrom, item["tree"]
#=============================================================================
if len(args) == 0:
o.print_help()
sys.exit(1)
# get filenames
filename = args[0]
if "%d" in filename:
filenames = []
for i in range(conf.start, conf.end, conf.step):
fn = filename % i
if os.path.exists(fn):
filenames.append(fn)
else:
filenames = args
treelens = []
for filename in filenames:
treelens.extend([s, e, c, treelen(t)]
for s, e, c, t in iter_trees(filename))
treelens.sort()
for start, end, group in intervals.iter_intersections(treelens):
chrom = group[0][2]
vals = util.cget(group, 3)
util.print_row(chrom, start, end, stats.mean(vals),
stats.percentile(vals, .025), stats.percentile(vals, .975))