Skip to content

Commit

Permalink
keylookup - experimental changes for drugbank source
Browse files Browse the repository at this point in the history
  • Loading branch information
greg-k-taylor committed Mar 5, 2019
1 parent db4c25b commit 5d1eaa8
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 6 deletions.
10 changes: 6 additions & 4 deletions src/hub/dataload/sources/drugbank/drugbank_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from biothings.utils.common import unzipall
from mychem_utils import ExcludeFieldsById
from hub.datatransform.keylookup import MyChemKeyLookup
from biothings.hub.datatransform import CIIDStruct


SRC_META = {
Expand All @@ -33,13 +34,14 @@ class DrugBankUploader(BaseDrugUploader):
])
keylookup = MyChemKeyLookup([
("inchikey", "drugbank.inchi_key"),
("drugbank", "drugbank.drugbank_id"),
("drugbank", "drugbank.id"),
# the following keys could possible be used to lookup 'inchikey' or 'unii'
("chebi", "drugbank.xrefs.chebi"),
("chembl", "drugbank.xrefs.chembl"),
("pubchem", "drugbank.xrefs.pubchem.cid"),
("drugname", "drugbank.name"), # can be used to lookup unii
("inchi", "drugbank.inchi")],
("inchi", "drugbank.inchi"),
# ("drugname", "drugbank.name"), # can be used to lookup unii, disabled for now
],
copy_from_doc=True)

def load_data(self,data_folder):
Expand All @@ -52,7 +54,7 @@ def load_data(self,data_folder):
assert len(xmlfiles) == 1, "Expecting one xml file, got %s" % repr(xmlfiles)
input_file = xmlfiles.pop()
assert os.path.exists(input_file), "Can't find input file '%s'" % input_file
return self.exclude_fields(self.keylookup(load_data))(input_file)
return self.exclude_fields(self.keylookup(load_data, debug=True))(input_file)

def post_update_data(self, *args, **kwargs):
for idxname in ["drugbank.id","drugbank.chebi","drugbank.inchi"]:
Expand Down
6 changes: 4 additions & 2 deletions src/hub/datatransform/mychem_api_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,9 @@
###############################################################################
# Drug name Unii lookup
###############################################################################
graph_mychem.add_edge('drugname', 'unii',
object=MyChemInfoEdge('unii.preferred_term', 'unii.unii', url="http://localhost:8000"))
# Drugname key lookup is currently skipped. This edge lead to duplicate key errors.
#graph_mychem.add_edge('drugname', 'unii',
# object=MyChemInfoEdge('unii.preferred_term', 'unii.unii', url="http://localhost:8000"))


class MyChemKeyLookup(DataTransformMDB):
Expand All @@ -91,3 +92,4 @@ def __init__(self, input_types, *args, **kwargs):
output_types=['inchikey', 'unii', 'rxnorm', 'drugbank',
'chebi', 'chembl', 'pubchem', 'drugname'],
*args, **kwargs)

0 comments on commit 5d1eaa8

Please sign in to comment.