Skip to content

Commit

Permalink
Merge pull request #7 from tomdcsmith/rm_getters_setters
Browse files Browse the repository at this point in the history
Getters and setters replaced by properties
  • Loading branch information
cyenyxe committed Apr 18, 2016
2 parents ffc114f + 6e435dd commit d2d3ced
Show file tree
Hide file tree
Showing 12 changed files with 763 additions and 322 deletions.
5 changes: 2 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
/resources/ignore_file.txt
/resources/rs_ids_2015_05.txt
/resources/bad_snp2gene
/.python-version
Expand Down Expand Up @@ -28,8 +27,8 @@
/resources/rs_ids_2015_05.txt
/eva_cttv_pipeline/resources/schema_local
/resources/bad_snp2gene
.cache/
/.eggs
.cache
.eggs
*.coverage

# General
Expand Down
43 changes: 28 additions & 15 deletions eva_cttv_pipeline/clinvar_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ def __init__(self, a_dictionary=None):
else:
dict.__init__(self, a_dictionary)

def get_gene_id(self):
@property
def gene_id(self):
j = 0
measure = self['referenceClinVarAssertion']['measureSet']['measure']
found = False
Expand All @@ -69,7 +70,8 @@ def get_gene_id(self):
else:
return self['referenceClinVarAssertion']['measureSet']['measure'][0]['name'][0]['elementValue']['value']

def get_ensembl_id(self):
@property
def ensembl_id(self):
global ensembl_json
j = 0
measure = self['referenceClinVarAssertion']['measureSet']['measure']
Expand Down Expand Up @@ -128,16 +130,20 @@ def get_ensembl_id(self):

return None

def get_date(self):
@property
def date(self):
return datetime.fromtimestamp(self['referenceClinVarAssertion']['dateLastUpdated'] / 1000).isoformat()

def get_score(self):
@property
def score(self):
return self.score_map[self['referenceClinVarAssertion']['clinicalSignificance']['reviewStatus']]

def get_acc(self):
@property
def accession(self):
return self['referenceClinVarAssertion']['clinVarAccession']['acc']

def get_traits(self):
@property
def traits(self):
trait_list = []
for trait_record in self['referenceClinVarAssertion']['traitSet']['trait']:
trait_list.append([])
Expand All @@ -150,7 +156,8 @@ def get_traits(self):

return trait_list

def get_trait_pubmed_refs(self):
@property
def trait_pubmed_refs(self):
pubmed_refs_list = []
for trait_record in self['referenceClinVarAssertion']['traitSet']['trait']:
pubmed_refs_list.append([])
Expand All @@ -162,7 +169,8 @@ def get_trait_pubmed_refs(self):

return pubmed_refs_list

def get_observed_pubmed_refs(self):
@property
def observed_pubmed_refs(self):
pubmedrefsList = []
if 'observedIn' in self['referenceClinVarAssertion']:
for observedInRecord in self['referenceClinVarAssertion']['observedIn']:
Expand All @@ -174,7 +182,8 @@ def get_observed_pubmed_refs(self):
pubmedrefsList.append(int(citationRecord['id']['value']))
return pubmedrefsList

def get_measure_set_pubmed_refs(self):
@property
def measure_set_pubmed_refs(self):
pubmed_refs_list = []
for measure_record in self['referenceClinVarAssertion']['measureSet']['measure']:
if 'citation' in measure_record:
Expand All @@ -184,7 +193,8 @@ def get_measure_set_pubmed_refs(self):
pubmed_refs_list.append(int(ciration_record['id']['value']))
return pubmed_refs_list

def get_hgvs(self):
@property
def hgvs(self):
hgvs_list = []
for measure_record in self['referenceClinVarAssertion']['measureSet']['measure']:
for attribute_set_record in measure_record['attributeSet']:
Expand All @@ -193,18 +203,19 @@ def get_hgvs(self):

return hgvs_list

def get_clinical_significance(self):
@property
def clinical_significance(self):
return self['referenceClinVarAssertion']['clinicalSignificance']['description']

def get_rs(self, rcv_to_rs):
try:
return rcv_to_rs[self.get_acc()]
return rcv_to_rs[self.accession]
except KeyError:
return None

def get_nsv(self, rcv_to_nsv):
try:
return rcv_to_nsv[self.get_acc()]
return rcv_to_nsv[self.accession]
except KeyError:
return None

Expand All @@ -216,10 +227,12 @@ def get_main_consequence_types(self, consequence_type_dict, rcv_to_rs):
else:
return None

def get_variant_type(self):
@property
def variant_type(self):
return self['referenceClinVarAssertion']['measureSet']['measure'][0]['type']

def get_allele_origins(self):
@property
def allele_origins(self):
allele_origins = set()
for clinvar_assetion_document in self['clinVarAssertion']:
for observed_in_document in clinvar_assetion_document['observedIn']:
Expand Down
70 changes: 32 additions & 38 deletions eva_cttv_pipeline/clinvar_to_evidence_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def clinvar_to_evidence_strings(dir_out, allowed_clinical_significance=None, ign
for record in curr_result_list:
n_ev_strings_per_record = 0
clinvarRecord = clinvar_record.ClinvarRecord(record['clinvarSet'])
clin_sig = clinvarRecord.get_clinical_significance().lower()
clin_sig = clinvarRecord.clinical_significance.lower()
n_nsvs += (clinvarRecord.get_nsv(rcv_to_nsv) is not None)
if clin_sig in allowed_clinical_significance:
if record['reference'] != record['alternate']:
Expand All @@ -114,18 +114,18 @@ def clinvar_to_evidence_strings(dir_out, allowed_clinical_significance=None, ign
# Mapping rs->Gene was found at Mick's file and therefore ensembl_gene_id will never be None
if consequenceType is not None:

for ensembl_gene_id in consequenceType.get_ensembl_gene_ids():
for ensembl_gene_id in consequenceType.ensembl_gene_ids:

rcv_to_gene_evidence_codes = ['http://identifiers.org/eco/cttv_mapping_pipeline'] # Evidence codes provided by Mick
ensembl_gene_id_uri = 'http://identifiers.org/ensembl/' + ensembl_gene_id
trait_refs_list = [['http://europepmc.org/abstract/MED/' + str(ref) for ref in refList] for refList in clinvarRecord.get_trait_pubmed_refs()]
observed_regs_list = ['http://europepmc.org/abstract/MED/' + str(ref) for ref in clinvarRecord.get_observed_pubmed_refs()]
measure_set_refs_list = ['http://europepmc.org/abstract/MED/' + str(ref) for ref in clinvarRecord.get_measure_set_pubmed_refs()]
for trait_counter, trait_list in enumerate(clinvarRecord.get_traits()):
trait_refs_list = [['http://europepmc.org/abstract/MED/' + str(ref) for ref in refList] for refList in clinvarRecord.trait_pubmed_refs]
observed_regs_list = ['http://europepmc.org/abstract/MED/' + str(ref) for ref in clinvarRecord.observed_pubmed_refs]
measure_set_refs_list = ['http://europepmc.org/abstract/MED/' + str(ref) for ref in clinvarRecord.measure_set_pubmed_refs]
for trait_counter, trait_list in enumerate(clinvarRecord.traits):
clinvar_trait_list, efo_list = map_efo(trait_2_efo, trait_list)
# Only ClinVar records associated to a trait with mapped EFO term will generate evidence_strings
if len(efo_list) > 0:
clinvar_record_allele_origins = clinvarRecord.get_allele_origins()
clinvar_record_allele_origins = clinvarRecord.allele_origins
n_multiple_allele_origin += (len(clinvar_record_allele_origins) > 1)
n_germline_somatic += (('germline' in clinvar_record_allele_origins) and (
'somatic' in clinvar_record_allele_origins))
Expand Down Expand Up @@ -156,7 +156,7 @@ def clinvar_to_evidence_strings(dir_out, allowed_clinical_significance=None, ign
evidence_string_list,
n_ev_strings_per_record)
evidence_list.append(
[clinvarRecord.get_acc(), rs, ','.join(clinvar_trait_list),
[clinvarRecord.accession, rs, ','.join(clinvar_trait_list),
','.join(efo_list)])
n_valid_rs_and_nsv += (clinvarRecord.get_nsv(rcv_to_nsv) is not None)
elif alleleOrigin == 'somatic':
Expand All @@ -179,7 +179,7 @@ def clinvar_to_evidence_strings(dir_out, allowed_clinical_significance=None, ign
evidence_string_list,
n_ev_strings_per_record)
evidence_list.append(
[clinvarRecord.get_acc(), rs, ','.join(clinvar_trait_list),
[clinvarRecord.accession, rs, ','.join(clinvar_trait_list),
','.join(efo_list)])
n_valid_rs_and_nsv += (clinvarRecord.get_nsv(rcv_to_nsv) is not None)
elif alleleOrigin not in n_unrecognised_allele_origin:
Expand Down Expand Up @@ -285,38 +285,34 @@ def get_cttv_genetics_evidence_string(efo_list, clin_sig, clin_sig_2_activity, c
unrecognised_clin_sigs):
ev_string = evidence_strings.CTTVGeneticsEvidenceString()
ev_string.add_unique_association_field('gene', ensembl_gene_id)
ev_string.add_unique_association_field('clinvarAccession', clinvarRecord.get_acc())
ev_string.add_unique_association_field('clinvarAccession', clinvarRecord.accession)
ev_string.add_unique_association_field('alleleOrigin', 'germline')
try:
ev_string.set_target(ensembl_gene_id_uri, clin_sig_2_activity[clin_sig])
except KeyError:
unrecognised_clin_sigs.add(clin_sig)
ev_string.set_target(ensembl_gene_id_uri, 'http://identifiers.org/cttv.activity/unknown')
ev_string.set_variant('http://identifiers.org/dbsnp/' + rs, get_cttv_variant_type(record['reference'], record['alternate']))
ev_string.set_date(clinvarRecord.get_date())
ev_string.set_db_xref_url('http://identifiers.org/clinvar.record/' + clinvarRecord.get_acc())
ev_string.set_url('http://www.ncbi.nlm.nih.gov/clinvar/' + clinvarRecord.get_acc())
ev_string.set_association(
clin_sig != 'non-pathogenic' and clin_sig != 'probable-non-pathogenic'
and clin_sig != 'likely benign' and clin_sig != 'benign')
ev_string.set_gene_2_var_ev_codes(rcv_to_gene_evidence_codes)
most_severe_so_term = consequenceType.getMostSevereSo()
if most_severe_so_term.get_accession() is None:
ev_string.set_gene_2_var_func_consequence(
'http://targetvalidation.org/sequence/' + most_severe_so_term.get_name())
ev_string.date = clinvarRecord.date
ev_string.db_xref_url = 'http://identifiers.org/clinvar.record/' + clinvarRecord.accession
ev_string.url = 'http://www.ncbi.nlm.nih.gov/clinvar/' + clinvarRecord.accession
ev_string.association = clin_sig != 'non-pathogenic' and clin_sig != 'probable-non-pathogenic' and clin_sig != 'likely benign' and clin_sig != 'benign'
ev_string.gene_2_var_ev_codes = rcv_to_gene_evidence_codes
most_severe_so_term = consequenceType.most_severe_so
if most_severe_so_term.accession is None:
ev_string.gene_2_var_func_consequence = 'http://targetvalidation.org/sequence/' + most_severe_so_term.so_name
else:
ev_string.set_gene_2_var_func_consequence(
'http://purl.obolibrary.org/obo/' + most_severe_so_term.get_accession().replace(':', '_'))
ev_string.gene_2_var_func_consequence = 'http://purl.obolibrary.org/obo/' + most_severe_so_term.accession.replace(':', '_')

ref_list = list(set(traits_ref_list[trait_counter] + observed_refs_list + measure_set_refs_list))
if len(ref_list) > 0:
ev_string.set_var_2_disease_literature(ref_list)
# Arbitrarily select only one reference among all
ev_string.set_unique_reference(ref_list[0])
ev_string.set_top_level_literature(ref_list)
ev_string.unique_reference = ref_list[0]
ev_string.top_level_literature = ref_list
efo_list.sort()
# Just (arbitrarily) adding one of the potentially multiple EFO terms because of schema constraints
ev_string.set_disease(efo_list[0])
ev_string.disease = efo_list[0]
ev_string.add_unique_association_field('phenotype', efo_list[0])
n_more_than_one_efo_term += (len(efo_list) > 1)
traits.update(set(efo_list))
Expand All @@ -330,31 +326,29 @@ def get_cttv_somatic_evidence_string(efo_list, clin_sig, clin_sig_2_activity, cl
unrecognised_clin_sigs, consequenceType):
ev_string = evidence_strings.CTTVSomaticEvidenceString()
ev_string.add_unique_association_field('gene', ensembl_gene_id)
ev_string.add_unique_association_field('clinvarAccession', clinvarRecord.get_acc())
ev_string.add_unique_association_field('clinvarAccession', clinvarRecord.accession)
ev_string.add_unique_association_field('alleleOrigin', 'somatic')
try:
ev_string.set_target(ensembl_gene_id_uri, clin_sig_2_activity[clin_sig])
except KeyError:
unrecognised_clin_sigs.add(clin_sig)
ev_string.set_target(ensembl_gene_id_uri, 'http://identifiers.org/cttv.activity/unknown')

ev_string.set_date(clinvarRecord.get_date())
ev_string.set_db_xref_url('http://identifiers.org/clinvar.record/' + clinvarRecord.get_acc())
ev_string.set_url('http://www.ncbi.nlm.nih.gov/clinvar/' + clinvarRecord.get_acc())
ev_string.set_association(
clin_sig != 'non-pathogenic' and clin_sig != 'probable-non-pathogenic'
and clin_sig != 'likely benign' and clin_sig != 'benign')
ev_string.date = clinvarRecord.date
ev_string.db_xref_url = 'http://identifiers.org/clinvar.record/' + clinvarRecord.accession
ev_string.url = 'http://www.ncbi.nlm.nih.gov/clinvar/' + clinvarRecord.accession
ev_string.association = (clin_sig != 'non-pathogenic' and clin_sig != 'probable-non-pathogenic' and clin_sig != 'likely benign' and clin_sig != 'benign')

ev_string.set_known_mutations(consequenceType)

ref_list = list(set(trait_refs_list[trait_counter] + observed_refs_list + measure_set_refs_list))
if len(ref_list) > 0:
ev_string.set_evidence_literature(ref_list)
ev_string.set_top_level_literature(ref_list)
ev_string.evidence_literature = ref_list
ev_string.top_level_literature = ref_list

efo_list.sort()
# Just (arbitrarily) adding one of the potentially multiple EFO terms because of schema constraints
ev_string.set_disease(efo_list[0])
ev_string.disease = efo_list[0]
ev_string.add_unique_association_field('phenotype', efo_list[0])
n_more_than_one_efo_term += (len(efo_list) > 1)
traits.update(set(efo_list))
Expand All @@ -369,13 +363,13 @@ def add_evidence_string(clinvarRecord, ev_string, evidence_string_list, n_eviden
n_evidence_strings_per_record += 1
except jsonschema.exceptions.ValidationError as err:
print('Error: evidence_string does not validate against schema.')
print('ClinVar accession: ' + clinvarRecord.get_acc())
print('ClinVar accession: ' + clinvarRecord.accession)
print(err)
print(json.dumps(ev_string))
sys.exit(1)
except efo_term.EFOTerm.IsObsoleteException as err:
print('Error: obsolete EFO term.')
print('Term: ' + ev_string.get_disease().get_id())
print('Term: ' + ev_string.get_disease().efoid)
print(err)
print(json.dumps(ev_string))
sys.exit(1)
Expand Down
Loading

0 comments on commit d2d3ced

Please sign in to comment.