-
Notifications
You must be signed in to change notification settings - Fork 33
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
11 changed files
with
1,326 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
#!/usr/bin/env python | ||
|
||
'''Given a pdb file and an sdf file of the same ligand, find the correspondance | ||
between the atoms and output a pdb with the same atom names as the pdb input | ||
but with the coordinates of the sdf file. Atoms missing in the sdf | ||
will not have their coordinates changed, but note that hydrogens are stripped''' | ||
|
||
import sys | ||
from rdkit.Chem import AllChem | ||
|
||
if len(sys.argv) != 3: | ||
print "Need sdf and pdb files" | ||
sys.exit(1) | ||
|
||
sdf = sys.argv[1] | ||
pdb = sys.argv[2] | ||
|
||
#figure out starting atom number | ||
startnum = 1 | ||
for line in open(pdb): | ||
if line.startswith('ATOM') or line.startswith('HETATM'): | ||
startnum = int(line[6:11]) | ||
break | ||
|
||
pmol = AllChem.MolFromPDBFile(pdb) | ||
smol = AllChem.SDMolSupplier("newsnappy.sdf").next() | ||
pmol = AllChem.AssignBondOrdersFromTemplate(smol,pmol) | ||
|
||
if smol.HasSubstructMatch(pmol): | ||
m = smol.GetSubstructMatch(pmol) | ||
pconf = pmol.GetConformer() | ||
sconf = smol.GetConformer() | ||
for (pi, si) in enumerate(m): | ||
pconf.SetAtomPosition(pi, sconf.GetAtomPosition(si)) | ||
pdbout = AllChem.MolToPDBBlock(pmol, flavor=2).split('\n') | ||
num = startnum | ||
for line in pdbout: | ||
if line.startswith('ATOM') or line.startswith('HETATM'): | ||
print '%s%5d%s' % (line[:6],num,line[11:]) | ||
num += 1 | ||
else: | ||
print "Could not matchup pdb and sdf" | ||
sys.exit(1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,177 @@ | ||
#!/usr/bin/python | ||
|
||
#given a smarts rxn file, a core scaffold smarts file (with name containing connecting atoms) | ||
# and an sdf file, extract the matching scaffolds from of the sdf file and cluster | ||
#them greedily to identify a set of scaffold conformations | ||
|
||
import sys,gzip,argparse | ||
from rdkit.Chem import AllChem | ||
|
||
def subMol(mol, match): | ||
#not sure why this functionality isn't implemented natively | ||
#but get the interconnected bonds for the match | ||
atoms = set(match) | ||
bonds = set() | ||
for a in atoms: | ||
atom = mol.GetAtomWithIdx(a) | ||
for b in atom.GetBonds(): | ||
if b.GetOtherAtomIdx(a) in atoms: | ||
bonds.add(b.GetIdx()) | ||
return AllChem.PathToSubmol(mol,list(bonds)) | ||
|
||
#compute the distances between the matching connecting atoms | ||
#and return true if all distance are small enough | ||
def checkConnect(center, cmatch, mol, match, connectIndices, connect): | ||
cconf = center.GetConformer(0) | ||
mconf = mol.GetConformer(0) | ||
for i in connectIndices: | ||
cidx = cmatch[i] | ||
midx = match[i] | ||
cpt = cconf.GetAtomPosition(cidx) | ||
mpt = mconf.GetAtomPosition(midx) | ||
dist = cpt.Distance(mpt) | ||
if dist > connect: | ||
return False | ||
return True | ||
|
||
#find all the scaffolds in mols that are within rmsd of center and where the connecting | ||
#atoms are within connect, return new cluster and new mols | ||
def createCluster(center,cmatch, mols, pattern, core, rmsd, connectIndices, connect): | ||
cluster = list() | ||
newmols = list() | ||
for (mol,match) in mols: | ||
r = AllChem.GetBestRMS(mol,center,maps=[zip(cmatch,match)]) | ||
if r < rmsd and checkConnect(center, cmatch, mol,match,connectIndices, connect): | ||
cluster.append((mol,match,r)) | ||
else: | ||
newmols.append((mol,match)) | ||
cluster.sort(key = lambda (m,mtch,r): r ) | ||
return (cluster, newmols) | ||
|
||
#find the mol in mols that has the maximum minimum distance between the first | ||
#mol in each cluster | ||
#ACTUALLY, for the tight tolerances we need, this really doesn't make a difference | ||
#and just slows things down, so just pick the first available conformer | ||
def computeNext(clusters,mols): | ||
if len(mols) > 0: | ||
return mols[0] | ||
else: | ||
return (None,None) | ||
max = 0 | ||
best = (None,None) | ||
for (mol,match) in mols: | ||
min = float('inf') | ||
for cl in clusters: | ||
cmol = cl[0][0] | ||
cmatch = cl[0][1] | ||
r = AllChem.GetBestRMS(cmol,mol,maps=[zip(match,cmatch)]) | ||
if r < min: | ||
min = r | ||
if min > max: | ||
max = min | ||
best = (mol,match) | ||
return best | ||
|
||
#MAIN | ||
if len(sys.argv) < 5: | ||
print "Need reaction file, core scaffold file, sdf input file and sdf output" | ||
sys.exit(1) | ||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument('-r','--rxn', help="Reaction file") | ||
parser.add_argument('-c','--core',help="Core scaffold with connecting atoms in name") | ||
parser.add_argument('-i','--input',help="Input conformers") | ||
parser.add_argument('-o','--output',help="Clustered core scaffold output") | ||
parser.add_argument("--rmsd",type=float,default=0.5,help="Maximum RMSD for cluster membership") | ||
parser.add_argument("--connect",type=float,default=0.1,help="Maximum allowed deviation of connecting atoms for cluster membership") | ||
parser.add_argument("--sample",type=int,default=1,help="Amount to sample conformations") | ||
args = parser.parse_args() | ||
|
||
rxnf = open(args.rxn) | ||
rxnsm = rxnf.readline().split()[0] #ignore any name | ||
rxn = AllChem.ReactionFromSmarts(rxnsm) | ||
rxn.Initialize() | ||
|
||
if rxn.GetNumProductTemplates() == 1: | ||
product = rxn.GetProductTemplate(0) | ||
reactants = list() | ||
for i in xrange(rxn.GetNumReactantTemplates()): | ||
reactants.append(rxn.GetReactantTemplate(i)) | ||
elif rxn.GetNumReactantTemplates() == 1: | ||
product = rxn.GetReactantTemplate(0) | ||
reactants = list() | ||
for i in xrange(rxn.GetNumProductTemplates()): | ||
reactants.append(rxn.GetProductTemplate(i)) | ||
else: | ||
print "Can have only one product" | ||
sys.exit(1) | ||
|
||
coref = open(args.core) | ||
corel = coref.readline() | ||
coreconnects = corel.split()[1:] | ||
core = AllChem.MolFromSmarts(corel.split()[0]) | ||
|
||
inmols = AllChem.SDMolSupplier(args.input) | ||
if inmols is None: | ||
print "Could not open ",args.input | ||
sys.exit(-1) | ||
|
||
sdwriter = AllChem.SDWriter(args.output) | ||
if sdwriter is None: | ||
print "Could not open ",args.output | ||
sys.exit(-1) | ||
|
||
smart = AllChem.MolToSmarts(product) | ||
pattern = AllChem.MolFromSmarts(smart) | ||
|
||
#figure out the indices of connected atoms in the smart core pattern | ||
connectIndices = list() | ||
for c in coreconnects: | ||
cm = AllChem.MolFromSmarts(c) | ||
a = cm.GetAtoms()[0] | ||
if a.HasProp('molAtomMapNumber'): | ||
mapnum = a.GetProp('molAtomMapNumber') | ||
for sma in core.GetAtoms(): | ||
if sma.HasProp('molAtomMapNumber') and sma.GetProp('molAtomMapNumber') == mapnum: | ||
connectIndices.append(sma.GetIdx()) | ||
|
||
#read all core scaffold molecules into memory | ||
mols = list() | ||
cnt = 0 | ||
for mol in inmols: | ||
if cnt % args.sample == 0 and mol is not None: | ||
try: | ||
mol = AllChem.AddHs(mol) | ||
match = mol.GetSubstructMatch(pattern) #just one? why not, we're only sampling | ||
if match: | ||
sub = subMol(mol, match) | ||
cmatch = sub.GetSubstructMatch(core) | ||
if cmatch: | ||
sub = subMol(sub,cmatch) | ||
mols.append((sub,sub.GetSubstructMatch(core))) | ||
except (KeyboardInterrupt, SystemExit): | ||
raise | ||
except Exception as e: | ||
print "Exception occurred",mol.GetProp('_Name'),e | ||
cnt += 1 | ||
|
||
if len(mols) == 0: | ||
print "No molecules!" | ||
sys.exit(-1) | ||
print "Done reading" | ||
clusters = list() #these are just defined by a list of all the scffolds assigned to the cluster | ||
(center, cmatch) = mols[0] | ||
|
||
while len(mols) > 0: | ||
(cluster, mols) = createCluster(center,cmatch,mols, pattern, core, args.rmsd, connectIndices, args.connect) | ||
clusters.append(cluster) | ||
(center, cmatch) = computeNext(clusters,mols) | ||
|
||
print len(clusters) | ||
for cl in clusters: | ||
cmol = cl[0][0] | ||
cmol.SetProp("ClusterSize",str(len(cl))) | ||
AllChem.GetBestRMS(clusters[0][0][0],cmol) #align to very first | ||
sdwriter.write(cmol) | ||
sdwriter.close() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
#!/usr/bin/python | ||
|
||
#given a rxn smarts (either forward or backwards, but there is assumed to be | ||
#only one product) identify a minimal core scaffold that has every connecting atom | ||
#from each reactant and only those scaffold atoms that are necessary to minimally | ||
#connect these atoms | ||
#this assume the input smarts is fully annoated with atom mappings for the heavy atoms | ||
#output the smarts (with atom numbers) of the core scaffold | ||
|
||
import sys | ||
from rdkit.Chem import AllChem | ||
import igraph | ||
|
||
Debug = False | ||
|
||
if len(sys.argv) < 1: | ||
print "Need reaction file" | ||
sys.exit(1) | ||
if len(sys.argv) > 2: | ||
Debug = True | ||
|
||
rxnf = open(sys.argv[1]) | ||
rxnsm = rxnf.readline() | ||
rxn = AllChem.ReactionFromSmarts(rxnsm) | ||
rxn.Initialize() | ||
|
||
if rxn.GetNumProductTemplates() == 1: | ||
product = rxn.GetProductTemplate(0) | ||
reactants = list() | ||
for i in xrange(rxn.GetNumReactantTemplates()): | ||
reactants.append(rxn.GetReactantTemplate(i)) | ||
elif rxn.GetNumReactantTemplates() == 1: | ||
product = rxn.GetReactantTemplate(0) | ||
reactants = list() | ||
for i in xrange(rxn.GetNumProductTemplates()): | ||
reactants.append(rxn.GetProductTemplate(i)) | ||
else: | ||
print "Can have only one product" | ||
sys.exit(1) | ||
|
||
#record what atom mapping number belong to each reactant | ||
reactantMapNumbers = dict() | ||
for (i, react) in enumerate(reactants): | ||
for a in react.GetAtoms(): | ||
if a.HasProp('molAtomMapNumber'): | ||
mapnum = a.GetProp('molAtomMapNumber') | ||
reactantMapNumbers[mapnum] = i | ||
|
||
#now create an igraph from the product | ||
molgraph = igraph.Graph() | ||
molgraph.add_vertices(product.GetNumAtoms()) | ||
|
||
for a in product.GetAtoms(): | ||
s = a.GetIdx() | ||
v = molgraph.vs[s] | ||
v['react'] = -1 #reactant this atom belongs to, -1 if none | ||
v['heavy'] = a.GetAtomicNum() != 1 #note this things [#1,#6] is H only, but that should be okay - if H can go there, no need to worry about scaffold connectivity | ||
if a.HasProp('molAtomMapNumber'): | ||
mapnum = a.GetProp('molAtomMapNumber') | ||
if mapnum in reactantMapNumbers: | ||
v['react'] = reactantMapNumbers[mapnum] | ||
for na in a.GetNeighbors(): | ||
t = na.GetIdx() | ||
molgraph.add_edge(s,t) | ||
|
||
#now identify the connecting heavy atoms for reactants | ||
connecting = list() | ||
for v in molgraph.vs: | ||
if v['react'] >= 0 and v['heavy']: | ||
r = v['react'] | ||
for nv in v.neighbors(): | ||
if nv['react'] != r and nv['heavy']: | ||
connecting.append(v.index) | ||
break | ||
|
||
if Debug: | ||
for i in connecting: | ||
print product.GetAtomWithIdx(i).GetSmarts(),molgraph.vs[i]['react'] | ||
|
||
#compute the shortest paths between all these connecting atoms | ||
#the atoms along these paths make the minimal scaffold | ||
scaffold = set() | ||
for i in connecting: | ||
paths = molgraph.get_all_shortest_paths(i,connecting,mode=igraph.ALL) | ||
for p in paths: | ||
for v in p: | ||
scaffold.add(v) | ||
|
||
if Debug: | ||
print "\nScaffold" | ||
for i in scaffold: | ||
print product.GetAtomWithIdx(i).GetSmarts(),molgraph.vs[i]['react'] | ||
|
||
#need to identify the bonds between the scaffold atoms | ||
bonds = set() | ||
for i in scaffold: | ||
a = product.GetAtomWithIdx(i) | ||
for b in a.GetBonds(): | ||
end = b.GetOtherAtomIdx(i) | ||
if end in scaffold: | ||
bonds.add(b.GetIdx()) | ||
|
||
if Debug: | ||
print "Bonds",list(bonds) | ||
|
||
subMol = AllChem.PathToSubmol(product,list(bonds)) | ||
print AllChem.MolToSmarts(subMol), | ||
for i in connecting: | ||
print product.GetAtomWithIdx(i).GetSmarts(), | ||
print "" |
Oops, something went wrong.