Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Version 0.1.0 #21

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
5d2d2e3
Improved forward pass efficiency and general enhancements
muammar Apr 12, 2020
69fb154
Fixed prediction for new format.
muammar Apr 13, 2020
d21cf8d
Improvements to Gaussian class and new model for retention times.
muammar Apr 29, 2020
58b14a1
G2 symmetry function are vectorized.
muammar Jul 29, 2020
cdd971f
Merge branch 'master' into 0.1.0
muammar Aug 12, 2020
8e3dc78
calculate_G3 is now "vectorized" to help compute forces.
muammar Aug 12, 2020
ae532cb
Gaussian class now can set requires_grad to True for training forces.
muammar Aug 13, 2020
5f913f2
More changes to activate force training.
muammar Aug 14, 2020
136cc5f
AtomicMSELoss is now a class with a __call__ method.
muammar Aug 21, 2020
fa5e842
MinMaxScaler pure pytorch implementation and respective changes.
muammar Sep 17, 2020
f3e66da
Checkpoint.
muammar Sep 30, 2020
156251c
Merge branch 'master' into 0.1.0
muammar Oct 11, 2020
5c63474
Merge branch 'master' into 0.1.0
muammar Jan 19, 2021
dbdf626
fix try/except and black cleaned up.
muammar Jan 19, 2021
e849bf8
Fix Gaussian features.
muammar Jan 19, 2021
8635bc6
Commit fixes training, and general improvement.
May 15, 2021
e47ccb5
Updated parity() function in visualization
muammar Sep 1, 2021
5411556
Update visualization.py
muammar Sep 2, 2021
c2b43a3
Remove unused imports.
muammar Sep 2, 2021
adfdf16
Code is able to run in single core.
muammar Feb 26, 2022
3c98466
Pyflakes cleaning.
muammar Feb 26, 2022
dc658ca
Fixed feature computation on single core.
muammar Apr 9, 2022
c199cee
Blacked and some minimal improvements.
muammar May 20, 2023
bed710f
Updated environment.
muammar Dec 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Blacked and some minimal improvements.
  • Loading branch information
muammar committed May 20, 2023
commit c199cee4c443f73d3f558bd68040a154cfcaea60
6 changes: 5 additions & 1 deletion examples/ani/ani_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,11 @@ def train():
cutoff = {"radial": rcr, "angular": rca}

calc = Potentials(
features=AEV(cutoff=cutoff, normalized=normalized, custom=custom,),
features=AEV(
cutoff=cutoff,
normalized=normalized,
custom=custom,
),
model=NeuralNetwork(hiddenlayers=(n, n), activation=activation),
label="cu_training",
)
Expand Down
4 changes: 2 additions & 2 deletions ml4chem/active.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ class ActiveLearning(object):
Parameters
----------
labeled : list
List of graphs or objects.
List of graphs or objects.
unlabeled : object
List of graphs or objects.
List of graphs or objects.
atomistic : bool, optional
Atomistic similarities?, by default False.
"""
Expand Down
2 changes: 1 addition & 1 deletion ml4chem/atomistic/features/aev.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def get_symmetry_functions(
zetas : list
List of zetas to build the Gaussian function.
Rs : list
List to shift the center of the gaussian distributions.
List to shift the center of the gaussian distributions.
Rs_a : list
List to shift the center of the gaussian distributions of angular
symmetry functions.
Expand Down
1 change: 0 additions & 1 deletion ml4chem/atomistic/features/cartesian.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,6 @@ def calculate(self, images=None, purpose="training", data=None, svm=False):

feature_space[key].append((symbol, scaled))
else:

feature_space = OrderedDict(zip(hashes, feature_space))

fp_time = time.time() - initial_time
Expand Down
3 changes: 1 addition & 2 deletions ml4chem/atomistic/features/coulombmatrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,6 @@ def calculate(self, images=None, purpose="training", data=None, svm=False):
feature_space = []

if svm and purpose == "training":

for i, image in enumerate(images.items()):
restacked = client.submit(
self.restack_image, *(i, image, scaled_feature_space, svm)
Expand Down Expand Up @@ -319,7 +318,7 @@ def calculate(self, images=None, purpose="training", data=None, svm=False):
return self.feature_space

def stack_features(self, symbols, image_index, stacked_features):
"""Stack features """
"""Stack features"""

features = list(zip(symbols, stacked_features[image_index].result()))

Expand Down
20 changes: 11 additions & 9 deletions ml4chem/atomistic/features/gaussian.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,8 +313,11 @@ def calculate(self, images=None, purpose="training", data=None, svm=False, GP=No

if self.batch_size is None:
self.batch_size = data.get_total_number_atoms()
logger.info(f"Batch size: {self.batch_size}")

logger.info(f"Getting chunks with {self.batch_size}")
chunks = get_chunks(images, self.batch_size, svm=svm)
logger.info(f"Chunks are ready!")

ini = end = 0
self.coordinates = []
Expand Down Expand Up @@ -399,7 +402,7 @@ def calculate(self, images=None, purpose="training", data=None, svm=False, GP=No
if client is None:
pass
else:
intermediate = client.persist(intermediate, scheduler=self.scheduler)
intermediate = client.compute(intermediate, scheduler=self.scheduler)
stacked_features += intermediate
self.coordinates.append(coordinates_)
del intermediate
Expand All @@ -417,7 +420,6 @@ def calculate(self, images=None, purpose="training", data=None, svm=False, GP=No
logger.info("")

if self.preprocessor is not None and svm:

scaled_feature_space = []

# To take advantage of dask_ml we need to convert our numpy array
Expand Down Expand Up @@ -606,7 +608,7 @@ def to_pandas(self):
return pd.DataFrame.from_dict(self.feature_space, orient="index")

def stack_features(self, indices, stacked_features):
"""Stack features """
"""Stack features"""

features = []
for index in indices:
Expand Down Expand Up @@ -993,12 +995,12 @@ def calculate_G2(
Ris = np.array(Ris)
Rjs = np.array(Rjs)
Rij = np.linalg.norm(Rjs - Ris, axis=1)
feature = np.exp(-eta * (Rij ** 2.0) / (Rc ** 2.0)) * cutofffxn(Rij)
feature = np.exp(-eta * (Rij**2.0) / (Rc**2.0)) * cutofffxn(Rij)
else:
Ris = torch.stack(Ris)
Rjs = torch.stack(Rjs)
Rij = torch.norm(Rjs - Ris, dim=1)
feature = torch.exp(-eta * (Rij ** 2.0) / (Rc ** 2.0)) * cutofffxn(Rij)
feature = torch.exp(-eta * (Rij**2.0) / (Rc**2.0)) * cutofffxn(Rij)

if weighted:
feature *= weights
Expand Down Expand Up @@ -1095,7 +1097,7 @@ def calculate_G3(

cos_theta_ijk = angles_row_wise(Rij_vector, Rik_vector, numpy=True)
term = (1.0 + gamma * cos_theta_ijk) ** zeta
term *= np.exp(-eta * (Rij ** 2.0 + Rik ** 2.0 + Rjk ** 2.0) / (Rc ** 2.0))
term *= np.exp(-eta * (Rij**2.0 + Rik**2.0 + Rjk**2.0) / (Rc**2.0))
else:
neighborpositions_j = torch.stack(neighborpositions_j)
Rij_vector = neighborpositions_j - Ri
Expand All @@ -1110,7 +1112,7 @@ def calculate_G3(

cos_theta_ijk = angles_row_wise(Rij_vector, Rik_vector, numpy=False)
term = (1.0 + gamma * cos_theta_ijk) ** zeta
term *= torch.exp(-eta * (Rij ** 2.0 + Rik ** 2.0 + Rjk ** 2.0) / (Rc ** 2.0))
term *= torch.exp(-eta * (Rij**2.0 + Rik**2.0 + Rjk**2.0) / (Rc**2.0))

if weighted:
term *= weighted_h(image_molecule, n_indices)
Expand Down Expand Up @@ -1233,7 +1235,7 @@ def calculate_G4(
Rik = np.linalg.norm(Rik_vector)
cos_theta_ijk = np.dot(Rij_vector, Rik_vector) / Rij / Rik
term = (1.0 + gamma * cos_theta_ijk) ** zeta
term *= np.exp(-eta * (Rij ** 2.0 + Rik ** 2.0) / (Rc ** 2.0))
term *= np.exp(-eta * (Rij**2.0 + Rik**2.0) / (Rc**2.0))

if weighted:
term *= weighted_h(image_molecule, n_indices)
Expand All @@ -1246,7 +1248,7 @@ def calculate_G4(


def weighted_h(image_molecule, n_indices):
""" Calculate the atomic numbers of neighboring atoms for a molecule,
"""Calculate the atomic numbers of neighboring atoms for a molecule,
then multiplies each neighor atomic number by each other.

Parameters
Expand Down
8 changes: 5 additions & 3 deletions ml4chem/atomistic/models/autoencoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,6 @@ def prepare_model(
values = [h, mu, logvar]
encoder = torch.nn.ModuleDict(list(map(list, zip(keys, values))))
else:

encoder = torch.nn.Sequential(*encoder)

"""
Expand Down Expand Up @@ -821,7 +820,6 @@ def __init__(
lr_scheduler=None,
**kwargs
):

supported_keys = ["anneal", "penalize_latent"]

if len(kwargs.items()) == 0:
Expand Down Expand Up @@ -1148,7 +1146,11 @@ def train_batches(
}

else:
outputs, mus_latent, logvars_latent, = model(inputs)
(
outputs,
mus_latent,
logvars_latent,
) = model(inputs)

args = {
"outputs": outputs,
Expand Down
11 changes: 5 additions & 6 deletions ml4chem/atomistic/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,20 @@ def forward(self, X):

def feature_preparation(self, features, data, purpose="training"):
"""Vectorized data structure

Parameters
----------
features : dict, iter
An iterator or dictionary.
An iterator or dictionary.
data : obj
An ML4Chem data object.
An ML4Chem data object.
purpose : str, optional
Purpose of the features, by default "training"

Returns
-------
rearrengements, conditions
Rearranged features and conditions.
Rearranged features and conditions.
"""

data.get_largest_number_atoms(purpose)
Expand Down Expand Up @@ -77,7 +77,6 @@ def feature_preparation(self, features, data, purpose="training"):

tensor_size = tensors.size()[0]
if tensor_size < data.largest_number_atoms[symbol]:

diff = data.largest_number_atoms[symbol] - tensor_size
expand = torch.zeros(diff, self.input_dimension)
tensors = torch.cat([tensors, expand])
Expand Down
14 changes: 6 additions & 8 deletions ml4chem/atomistic/models/kernelridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,6 @@ def __init__(
weights=None,
**kwargs
):

np.set_printoptions(precision=30, threshold=999999999)
self.kernel = kernel
self.sigma = sigma
Expand Down Expand Up @@ -305,7 +304,6 @@ def get_kernel_matrix(self, feature_space, reference_features, purpose):
f_map.append(1)

if purpose == "training":

for j in range(counter, reference_lenght):
j_symbol, j_afp = reference_features[j]

Expand Down Expand Up @@ -515,7 +513,7 @@ def get_sigma(self, sigma, forcetraining=False):

@dask.delayed
def linear(feature_i, feature_j, i_symbol=None, j_symbol=None):
""" Compute a linear kernel
"""Compute a linear kernel

Parameters
----------
Expand Down Expand Up @@ -543,7 +541,7 @@ def linear(feature_i, feature_j, i_symbol=None, j_symbol=None):

@dask.delayed
def rbf(feature_i, feature_j, i_symbol=None, j_symbol=None, sigma=1.0):
""" Compute the rbf (AKA Gaussian) kernel.
"""Compute the rbf (AKA Gaussian) kernel.

Parameters
----------
Expand Down Expand Up @@ -586,14 +584,14 @@ def rbf(feature_i, feature_j, i_symbol=None, j_symbol=None, sigma=1.0):
return anisotropic_rbf
else:
rbf = np.exp(
-(np.linalg.norm(feature_i - feature_j) ** 2.0) / (2.0 * sigma ** 2.0)
-(np.linalg.norm(feature_i - feature_j) ** 2.0) / (2.0 * sigma**2.0)
)
return rbf


@dask.delayed
def exponential(feature_i, feature_j, i_symbol=None, j_symbol=None, sigma=1.0):
""" Compute the exponential kernel
"""Compute the exponential kernel

Parameters
----------
Expand Down Expand Up @@ -639,14 +637,14 @@ def exponential(feature_i, feature_j, i_symbol=None, j_symbol=None, sigma=1.0):
return anisotropic_exp
else:
exponential = np.exp(
-(np.linalg.norm(feature_i - feature_j)) / (2.0 * sigma ** 2)
-(np.linalg.norm(feature_i - feature_j)) / (2.0 * sigma**2)
)
return exponential


@dask.delayed
def laplacian(feature_i, feature_j, i_symbol=None, j_symbol=None, sigma=1.0):
""" Compute the laplacian kernel
"""Compute the laplacian kernel

Parameters
----------
Expand Down
18 changes: 14 additions & 4 deletions ml4chem/atomistic/models/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,11 @@ def name(self):
return "Atomistic Loss"

def __call__(
self, outputs, targets, atoms_per_image, uncertainty=None,
self,
outputs,
targets,
atoms_per_image,
uncertainty=None,
):
"""Call the AtomicMSELoss loss

Expand All @@ -50,7 +54,13 @@ def __call__(
"""

if uncertainty is None:
target_energy = torch.tensor(targets["energies"]).unsqueeze(1)
target_energy = (
torch.tensor(targets["energies"])
.unsqueeze(1)
.clone()
.detach()
.requires_grad_(True)
)
criterion = torch.nn.MSELoss(reduction="sum")
outputs_atom = torch.div(
outputs["energies"].unsqueeze(1), atoms_per_image.unsqueeze(1)
Expand Down Expand Up @@ -293,11 +303,11 @@ def get_pairwise_distances(x, y=None, squared=False):
distances
Pairwise distances.
"""
x_norm = (x ** 2).sum(1).view(-1, 1)
x_norm = (x**2).sum(1).view(-1, 1)

if y is not None:
y_t = torch.transpose(y, 0, 1)
y_norm = (y ** 2).sum(1).view(1, -1)
y_norm = (y**2).sum(1).view(1, -1)
else:
y_t = torch.transpose(x, 0, 1)
y_norm = x_norm.view(1, -1)
Expand Down
4 changes: 0 additions & 4 deletions ml4chem/atomistic/models/merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,6 @@ def train(
if isinstance(batch_size, int):
chunks = []
for inputs_ in inputs:

if inspect.ismethod(inputs_):
chunks.append(inputs_)
else:
Expand Down Expand Up @@ -393,7 +392,6 @@ def closure(self, index, model, independent_loss, name=None):
inputs = []
# FIXME this is not scaling to n number of models.
for chunk_index, chunk in enumerate(self.chunks[index - 1]):

inputs_ = self.chunks[index][chunk_index](OrderedDict(chunk.result()))
inputs.append(client.scatter(inputs_))

Expand Down Expand Up @@ -424,7 +422,6 @@ def closure(self, index, model, independent_loss, name=None):
return loss, outputs_

else: # Models are dependent on each other

running_loss = torch.tensor(0, dtype=torch.float)
accumulation = []

Expand Down Expand Up @@ -481,7 +478,6 @@ def train_batches(

losses = []
for model_index, model in enumerate(models):

output = outputs[model_index]

if model.name() == "PytorchPotentials":
Expand Down
6 changes: 2 additions & 4 deletions ml4chem/atomistic/models/neuralnetwork.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,11 +173,11 @@ def forward(self, X, conditions):
X : dict
Dictionary of inputs in the feature space.
condition : dict
A dict of tensors per atom type with conditions.
A dict of tensors per atom type with conditions.

Returns
-------
outputs
outputs
A dict of tensors with energies per atom.
"""
outputs = {}
Expand Down Expand Up @@ -235,7 +235,6 @@ def get_activations(self, images, model=None, numpy=True):

for hash, data in images.items():
for index, (symbol, features) in enumerate(data):

counter = 0
layer_counter = 0
for _, layer in enumerate(model.linears[symbol].modules()):
Expand Down Expand Up @@ -361,7 +360,6 @@ def __init__(
test=None,
forcetraining=False,
):

self.initial_time = time.time()

if lossfxn is None:
Expand Down
Loading