-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
98 lines (70 loc) · 2.82 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import pysam
import tempfile
import pandas as pd
def process_vcf(vcf_file):
vcf_data = []
with pysam.VariantFile(vcf_file) as vcf_in:
columns = ['CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'DP', 'AF']
for record in vcf_in:
dp = record.info.get('DP', 'NA') # 'NA' se não houver DP
af = record.info.get('AF', ['NA'])[0] # 'NA' se não houver AF
row = [
record.chrom,
record.pos,
record.id,
record.ref,
','.join(str(alt) for alt in record.alts),
record.qual,
record.filter.keys() if record.filter else "PASS",
dp,
af,
]
vcf_data.append(row)
return pd.DataFrame(vcf_data, columns=columns)
def get_frequencies(snp_data, studies):
frequencies = []
for snp_info in snp_data:
if 'result' in snp_info:
for snp_id, snp in snp_info['result'].items():
if isinstance(snp, dict):
snp_frequencies = {}
global_mafs = snp.get('global_mafs', [])
for entry in global_mafs:
if isinstance(entry, dict):
study = entry.get('study')
freq = entry.get('freq')
if study in studies and freq:
snp_frequencies[study] = freq
if snp_frequencies:
frequencies.append(snp_frequencies)
return frequencies
def extract_ids(file_content):
variant_ids = []
try:
with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix=".vcf") as temp_file:
temp_file.write(file_content)
temp_file.seek(0) # Voltar ao início do arquivo
vcf = pysam.VariantFile(temp_file.name)
for record in vcf:
if record.id:
variant_ids.append(record.id)
except Exception as e:
print(f"Erro ao processar o arquivo VCF: {e}")
return variant_ids
def extract_ref_alt(file_content):
ref_alt_list = []
try:
with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix=".vcf") as temp_file:
temp_file.write(file_content)
temp_file.seek(0)
vcf = pysam.VariantFile(temp_file.name)
for record in vcf:
id = record.id
if id and id.startswith("rs"):
chrom = record.chrom
ref = record.ref
alt = ','.join(str(a) for a in record.alts)
ref_alt_list.append({"chrom": chrom, "ref": ref, "alt": alt})
except Exception as e:
print(f"Erro ao processar o arquivo VCF: {e}")
return ref_alt_list