Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
philopon committed Dec 28, 2015
1 parent 26c432a commit b5ca557
Show file tree
Hide file tree
Showing 29 changed files with 1,615 additions and 0 deletions.
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
__pycache__
*.pyc
*.egg-info/
.DS_Store
.ipynb_checkpoints
/.envrc
/.python3
/dist
1 change: 1 addition & 0 deletions mold/Aromatic/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from ._descriptor import *
26 changes: 26 additions & 0 deletions mold/Aromatic/_descriptor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from .._base import Descriptor


class AromaticAtomsCount(Descriptor):
descriptor_name = 'nAromAtom'

@property
def descriptor_key(self):
return self.make_key()

def calculate(self, mol):
return sum((1 for a in mol.GetAtoms() if a.GetIsAromatic()))


class AromaticBondsCount(Descriptor):
descriptor_name = 'nAromBond'

@property
def descriptor_key(self):
return self.make_key()

def calculate(self, mol):
return sum((1 for b in mol.GetBonds() if b.GetIsAromatic()))

_descriptors = [AromaticAtomsCount, AromaticBondsCount]
__all__ = [d.__name__ for d in _descriptors]
1 change: 1 addition & 0 deletions mold/AtomCount/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from ._descriptor import *
47 changes: 47 additions & 0 deletions mold/AtomCount/_descriptor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from .._base import Descriptor


class AtomCount(Descriptor):
descriptor_defaults = [
('Atom',), ('HeavyAtom',),
('H',), ('B',), ('C',), ('N',), ('O',), ('S',), ('P',),
('F',), ('Cl',), ('Br',), ('I',), ('X',),
]

@property
def explicitHydrogens(self):
return self.symbol in set(['H', 'Atom'])

@property
def descriptor_name(self):
return 'n' + self.symbol

@property
def descriptor_key(self):
return self.make_key(self.symbol)

def __init__(self, symbol):
self.symbol = symbol

if symbol == 'X':
self.f = self.calcX
elif symbol == 'Atom' or symbol == 'HeavyAtom':
self.f = self.calcAll
else:
self.f = self.calc

def calcX(self, mol):
X = set([9, 17, 35, 53, 85, 117])
return sum((a.GetAtomicNum() in X for a in mol.GetAtoms()))

def calc(self, mol):
return sum((a.GetSymbol() == self.symbol for a in mol.GetAtoms()))

def calcAll(self, mol):
return mol.GetNumAtoms()

def calculate(self, mol):
return self.f(mol)

_descriptors = [AtomCount]
__all__ = [d.__name__ for d in _descriptors]
1 change: 1 addition & 0 deletions mold/Autocorrelation/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from ._descriptor import *
205 changes: 205 additions & 0 deletions mold/Autocorrelation/_descriptor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
from .._base import Descriptor
from rdkit import Chem
from .. import _atomic_property
import numpy as np


class AutocorrelationBase(Descriptor):
explicitHydrogens = True

@property
def gasteigerCharges(self):
return getattr(self, 'attribute', 0) == _atomic_property.get_charge_explicitHs


class DistanceMatrix(AutocorrelationBase):
@property
def descriptor_key(self):
return self.make_key()

def calculate(self, mol):
return Chem.GetDistanceMatrix(mol)


class AVec(AutocorrelationBase):
@property
def descriptor_key(self):
return self.make_key(self.attribute)

def __init__(self, attribute):
self.attribute = attribute

def calculate(self, mol):
return np.array([self.attribute(a) for a in mol.GetAtoms()])


class CAVec(AutocorrelationBase):
@property
def descriptor_key(self):
return self.make_key(self.attribute)

@property
def dependencies(self):
return dict(avec=AVec.make_key(self.attribute))

def __init__(self, attribute):
self.attribute = attribute

def calculate(self, mol, avec):
return avec - avec.mean()


class GMat(AutocorrelationBase):
@property
def descriptor_key(self):
return self.make_key(self.distance)

@property
def dependencies(self):
return dict(dmat=DistanceMatrix.make_key())

def __init__(self, distance):
self.distance = distance

def calculate(self, mol, dmat):
return dmat == self.distance


class GSum(AutocorrelationBase):
@property
def descriptor_key(self):
return self.make_key(self.distance)

@property
def dependencies(self):
return dict(gmat=GMat.make_key(self.distance))

def __init__(self, distance):
self.distance = distance

def calculate(self, mol, gmat):
s = gmat.sum()
if s == 0:
return np.nan
else:
return s


class Autocorrelation(AutocorrelationBase):
@property
def descriptor_name(self):
return '{}{}{}'.format(self.__class__.__name__, self.distance, self.attr_name)

@property
def descriptor_key(self):
return self.__class__.make_key(self.attribute, self.distance)

def __init__(self, distance, attribute):
if attribute == 'c':
self.attr_name = 'c'
self.attribute = _atomic_property.get_charge_explicitHs

else:
self.attr_name, self.attribute = _atomic_property.getter(attribute)

self.distance = distance

@property
def _avec(self):
return AVec.make_key(self.attribute)

@property
def _cavec(self):
return CAVec.make_key(self.attribute)

@property
def _gmat(self):
return GMat.make_key(self.distance)

@property
def _gsum(self):
return GSum.make_key(self.distance)

@property
def _ATS(self):
return ATS.make_key(self.distance, self.attribute)

@property
def _ATSC(self):
return ATSC.make_key(self.distance, self.attribute)

@property
def _AATSC(self):
return AATSC.make_key(self.distance, self.attribute)

MAX_DISTANCE = 8


class ATS(Autocorrelation):
descriptor_defaults = [(d, a) for a in 'mvepis' for d in range(MAX_DISTANCE + 1)]

@property
def dependencies(self):
return dict(avec=self._avec, gmat=self._gmat)

def calculate(self, mol, avec, gmat):
r = float(avec.dot(gmat).dot(avec))
return r if self.distance == 0 else r / 2.0


class AATS(ATS):
@property
def dependencies(self):
return dict(ATS=self._ATS, gsum=self._gsum)

def calculate(self, mol, ATS, gsum):
r = ATS / gsum
return r if self.distance == 0 else r * 2.0


class ATSC(Autocorrelation):
descriptor_defaults = [(d, a) for a in 'cmvepis' for d in range(MAX_DISTANCE + 1)]

@property
def dependencies(self):
return dict(cavec=self._cavec, gmat=self._gmat)

def calculate(self, mol, cavec, gmat):
r = float(cavec.dot(gmat).dot(cavec))
return r if self.distance == 0 else r / 2.0


class AATSC(ATSC):
@property
def dependencies(self):
return dict(ATSC=self._ATSC, gsum=self._gsum)

def calculate(self, mol, ATSC, gsum):
r = ATSC / gsum
return r if self.distance == 0 else r * 2.0


class MATS(Autocorrelation):
descriptor_defaults = [(d, a) for a in 'cmvepis' for d in range(1, MAX_DISTANCE + 1)]

@property
def dependencies(self):
return dict(avec=self._avec, AATSC=self._AATSC, cavec=self._cavec)

def calculate(self, mol, avec, AATSC, cavec):
return len(avec) * AATSC / (cavec ** 2).sum()


class GATS(MATS):
@property
def dependencies(self):
return dict(avec=self._avec, gmat=self._gmat, gsum=self._gsum, cavec=self._cavec)

def calculate(self, mol, avec, gmat, gsum, cavec):
W = np.tile(avec, (len(avec), 1))
n = (gmat * (W - W.T) ** 2).sum() / (2 * gsum)
d = (cavec ** 2).sum() / (len(avec) - 1)
return n / d

_descriptors = [ATS, AATS, ATSC, AATSC, MATS, GATS]
__all__ = [d.__name__ for d in _descriptors]
1 change: 1 addition & 0 deletions mold/BondCount/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from ._descriptor import *
64 changes: 64 additions & 0 deletions mold/BondCount/_descriptor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from .._base import Descriptor


class BondCount(Descriptor):
descriptor_defaults = [
('',), ('2',),
('S',), ('S2',), ('S3',),
('D',), ('D2',),
('T',), ('Q',), ('M',),
]

@property
def descriptor_name(self):
return 'nBonds{}'.format(self.bond_type)

@property
def explicitHydrogens(self):
return self.bond_type in set(['2', 'S', 'S2'])

@property
def kekulize(self):
return self.bond_type != 'M'

@property
def descriptor_key(self):
return self.make_key(self.bond_type)

def __init__(self, bond_type):
self.bond_type = bond_type

def nBonds(self, n, mol):
return sum((1 for b in mol.GetBonds()
if b.GetBondTypeAsDouble() == n))

def nBondsNA(self, n, mol):
return sum((1 for b in mol.GetBonds()
if b.GetBondTypeAsDouble() == n
and not b.GetIsAromatic()))

def nBondsM(self, mol):
return sum((1 for b in mol.GetBonds()
if b.GetBondTypeAsDouble() > 1.0))

def calculate(self, mol):
bt = self.bond_type
if bt in ' 2':
return mol.GetNumBonds()
elif bt == 'S':
return self.nBonds(1, mol)
elif bt in ['S2', 'S3']:
return self.nBondsNA(1, mol)
elif bt == 'D':
return self.nBonds(2, mol)
elif bt == 'D2':
return self.nBondsNA(2, mol)
elif bt == 'T':
return self.nBonds(3, mol)
elif bt == 'Q':
return self.nBonds(4, mol)
elif bt == 'M':
return self.nBondsM(mol)

_descriptors = [BondCount]
__all__ = [d.__name__ for d in _descriptors]
1 change: 1 addition & 0 deletions mold/BondCount/_skelton/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from ._descriptor import *
14 changes: 14 additions & 0 deletions mold/BondCount/_skelton/_descriptor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from .._base import Descriptor

__all__ = []


class NAME(Descriptor):
@property
def descriptor_key(self):
pass

def calculate(self, mol):
pass

_descriptors = []
1 change: 1 addition & 0 deletions mold/CarbonTypes/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from ._descriptor import *
Loading

0 comments on commit b5ca557

Please sign in to comment.