forked from mjhubisz/argweaver
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patharg-extract-popsize
executable file
·89 lines (67 loc) · 2.3 KB
/
arg-extract-popsize
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/env python
import optparse
import os
import sys
import argweaver
from rasmus import intervals
from rasmus import stats
from rasmus import util
o = optparse.OptionParser(
usage="%prog SMC_FILE ...",
description="""
Extract the local effective population size.
SMC_FILE can be a pattern where '%d' denotes the sample iteration.
""")
o.add_option("-s", "--start", default=0, type="int",
help="Starting sample iteration to use")
o.add_option("-e", "--end", default=5000, type="int",
help="Last sample iteration to use")
o.add_option("-d", "--step", default=1, type="int",
help="Step size through samples")
conf, args = o.parse_args()
#=============================================================================
def mle_popsize_coal_times(k, times, mintime=0):
s = 0
i = k
last = 0
for t in times:
s += i*(i-1) * max(t - last, mintime)
i -= 1
last = t
return s / float(4 * k - 4)
def mle_popsize_tree(tree, mintime=0):
times = sorted([x.data["age"] for x in tree if not x.is_leaf()])
return mle_popsize_coal_times(len(tree.leaves()), times, mintime)
def iter_popsize(filename, mintime=0):
chrom = "chr"
for item in argweaver.iter_smc_file(filename, parse_trees=True,
apply_spr=True):
if item["tag"] == "REGION":
chrom = item["chrom"]
if item["tag"] == "TREE":
tree = item["tree"]
popsize = mle_popsize_tree(tree, mintime)
yield item["start"], item["end"], chrom, popsize
#=============================================================================
if len(args) == 0:
o.print_help()
sys.exit(1)
filename = args[0]
if "%d" in filename:
filenames = []
for i in range(conf.start, conf.end, conf.step):
fn = filename % i
if os.path.exists(fn):
filenames.append(fn)
else:
filenames = args
mintime = 0
popsizes = []
for filename in filenames:
popsizes.extend(iter_popsize(filename, mintime))
popsizes.sort()
for start, end, group in intervals.iter_intersections(popsizes):
chrom = group[0][2]
vals = util.cget(group, 3)
util.print_row(chrom, start, end, stats.mean(vals),
stats.percentile(vals, .025), stats.percentile(vals, .975))