Blacked and some minimal improvements.

muammar · muammar · Apr 12, 2020 · Apr 13, 2020 · Apr 29, 2020 · Jul 29, 2020
commit c199cee4c443f73d3f558bd68040a154cfcaea60
diff --git a/examples/ani/ani_training.py b/examples/ani/ani_training.py
@@ -72,7 +72,11 @@ def train():
     cutoff = {"radial": rcr, "angular": rca}
 
     calc = Potentials(
-        features=AEV(cutoff=cutoff, normalized=normalized, custom=custom,),
+        features=AEV(
+            cutoff=cutoff,
+            normalized=normalized,
+            custom=custom,
+        ),
         model=NeuralNetwork(hiddenlayers=(n, n), activation=activation),
         label="cu_training",
     )

diff --git a/ml4chem/active.py b/ml4chem/active.py
@@ -12,9 +12,9 @@ class ActiveLearning(object):
     Parameters
     ----------
     labeled : list
-        List of graphs or objects. 
+        List of graphs or objects.
     unlabeled : object
-        List of graphs or objects. 
+        List of graphs or objects.
     atomistic : bool, optional
         Atomistic similarities?, by default False.
     """

diff --git a/ml4chem/atomistic/features/aev.py b/ml4chem/atomistic/features/aev.py
@@ -175,7 +175,7 @@ def get_symmetry_functions(
         zetas : list
             List of zetas to build the Gaussian function.
         Rs : list
-            List to shift the center of the gaussian distributions. 
+            List to shift the center of the gaussian distributions.
         Rs_a : list
             List to shift the center of the gaussian distributions of angular
             symmetry functions.

diff --git a/ml4chem/atomistic/features/cartesian.py b/ml4chem/atomistic/features/cartesian.py
@@ -245,7 +245,6 @@ def calculate(self, images=None, purpose="training", data=None, svm=False):
 
                     feature_space[key].append((symbol, scaled))
         else:
-
             feature_space = OrderedDict(zip(hashes, feature_space))
 
         fp_time = time.time() - initial_time

diff --git a/ml4chem/atomistic/features/coulombmatrix.py b/ml4chem/atomistic/features/coulombmatrix.py
@@ -238,7 +238,6 @@ def calculate(self, images=None, purpose="training", data=None, svm=False):
         feature_space = []
 
         if svm and purpose == "training":
-
             for i, image in enumerate(images.items()):
                 restacked = client.submit(
                     self.restack_image, *(i, image, scaled_feature_space, svm)
@@ -319,7 +318,7 @@ def calculate(self, images=None, purpose="training", data=None, svm=False):
             return self.feature_space
 
     def stack_features(self, symbols, image_index, stacked_features):
-        """Stack features """
+        """Stack features"""
 
         features = list(zip(symbols, stacked_features[image_index].result()))
 

diff --git a/ml4chem/atomistic/features/gaussian.py b/ml4chem/atomistic/features/gaussian.py
@@ -313,8 +313,11 @@ def calculate(self, images=None, purpose="training", data=None, svm=False, GP=No
 
         if self.batch_size is None:
             self.batch_size = data.get_total_number_atoms()
+        logger.info(f"Batch size: {self.batch_size}")
 
+        logger.info(f"Getting chunks with {self.batch_size}")
         chunks = get_chunks(images, self.batch_size, svm=svm)
+        logger.info(f"Chunks are ready!")
 
         ini = end = 0
         self.coordinates = []
@@ -399,7 +402,7 @@ def calculate(self, images=None, purpose="training", data=None, svm=False, GP=No
             if client is None:
                 pass
             else:
-                intermediate = client.persist(intermediate, scheduler=self.scheduler)
+                intermediate = client.compute(intermediate, scheduler=self.scheduler)
             stacked_features += intermediate
             self.coordinates.append(coordinates_)
             del intermediate
@@ -417,7 +420,6 @@ def calculate(self, images=None, purpose="training", data=None, svm=False, GP=No
         logger.info("")
 
         if self.preprocessor is not None and svm:
-
             scaled_feature_space = []
 
             # To take advantage of dask_ml we need to convert our numpy array
@@ -606,7 +608,7 @@ def to_pandas(self):
         return pd.DataFrame.from_dict(self.feature_space, orient="index")
 
     def stack_features(self, indices, stacked_features):
-        """Stack features """
+        """Stack features"""
 
         features = []
         for index in indices:
@@ -993,12 +995,12 @@ def calculate_G2(
         Ris = np.array(Ris)
         Rjs = np.array(Rjs)
         Rij = np.linalg.norm(Rjs - Ris, axis=1)
-        feature = np.exp(-eta * (Rij ** 2.0) / (Rc ** 2.0)) * cutofffxn(Rij)
+        feature = np.exp(-eta * (Rij**2.0) / (Rc**2.0)) * cutofffxn(Rij)
     else:
         Ris = torch.stack(Ris)
         Rjs = torch.stack(Rjs)
         Rij = torch.norm(Rjs - Ris, dim=1)
-        feature = torch.exp(-eta * (Rij ** 2.0) / (Rc ** 2.0)) * cutofffxn(Rij)
+        feature = torch.exp(-eta * (Rij**2.0) / (Rc**2.0)) * cutofffxn(Rij)
 
     if weighted:
         feature *= weights
@@ -1095,7 +1097,7 @@ def calculate_G3(
 
         cos_theta_ijk = angles_row_wise(Rij_vector, Rik_vector, numpy=True)
         term = (1.0 + gamma * cos_theta_ijk) ** zeta
-        term *= np.exp(-eta * (Rij ** 2.0 + Rik ** 2.0 + Rjk ** 2.0) / (Rc ** 2.0))
+        term *= np.exp(-eta * (Rij**2.0 + Rik**2.0 + Rjk**2.0) / (Rc**2.0))
     else:
         neighborpositions_j = torch.stack(neighborpositions_j)
         Rij_vector = neighborpositions_j - Ri
@@ -1110,7 +1112,7 @@ def calculate_G3(
 
         cos_theta_ijk = angles_row_wise(Rij_vector, Rik_vector, numpy=False)
         term = (1.0 + gamma * cos_theta_ijk) ** zeta
-        term *= torch.exp(-eta * (Rij ** 2.0 + Rik ** 2.0 + Rjk ** 2.0) / (Rc ** 2.0))
+        term *= torch.exp(-eta * (Rij**2.0 + Rik**2.0 + Rjk**2.0) / (Rc**2.0))
 
     if weighted:
         term *= weighted_h(image_molecule, n_indices)
@@ -1233,7 +1235,7 @@ def calculate_G4(
             Rik = np.linalg.norm(Rik_vector)
             cos_theta_ijk = np.dot(Rij_vector, Rik_vector) / Rij / Rik
             term = (1.0 + gamma * cos_theta_ijk) ** zeta
-            term *= np.exp(-eta * (Rij ** 2.0 + Rik ** 2.0) / (Rc ** 2.0))
+            term *= np.exp(-eta * (Rij**2.0 + Rik**2.0) / (Rc**2.0))
 
             if weighted:
                 term *= weighted_h(image_molecule, n_indices)
@@ -1246,7 +1248,7 @@ def calculate_G4(
 
 
 def weighted_h(image_molecule, n_indices):
-    """ Calculate the atomic numbers of neighboring atoms for a molecule,
+    """Calculate the atomic numbers of neighboring atoms for a molecule,
     then multiplies each neighor atomic number by each other.
 
     Parameters

diff --git a/ml4chem/atomistic/models/autoencoders.py b/ml4chem/atomistic/models/autoencoders.py
@@ -172,7 +172,6 @@ def prepare_model(
                 values = [h, mu, logvar]
                 encoder = torch.nn.ModuleDict(list(map(list, zip(keys, values))))
             else:
-
                 encoder = torch.nn.Sequential(*encoder)
 
             """
@@ -821,7 +820,6 @@ def __init__(
         lr_scheduler=None,
         **kwargs
     ):
-
         supported_keys = ["anneal", "penalize_latent"]
 
         if len(kwargs.items()) == 0:
@@ -1148,7 +1146,11 @@ def train_batches(
                 }
 
             else:
-                outputs, mus_latent, logvars_latent, = model(inputs)
+                (
+                    outputs,
+                    mus_latent,
+                    logvars_latent,
+                ) = model(inputs)
 
                 args = {
                     "outputs": outputs,

diff --git a/ml4chem/atomistic/models/base.py b/ml4chem/atomistic/models/base.py
@@ -28,20 +28,20 @@ def forward(self, X):
 
     def feature_preparation(self, features, data, purpose="training"):
         """Vectorized data structure
-        
+
         Parameters
         ----------
         features : dict, iter
-            An iterator or dictionary. 
+            An iterator or dictionary.
         data : obj
-            An ML4Chem data object. 
+            An ML4Chem data object.
         purpose : str, optional
             Purpose of the features, by default "training"
-        
+
         Returns
         -------
         rearrengements, conditions
-            Rearranged features and conditions. 
+            Rearranged features and conditions.
         """
 
         data.get_largest_number_atoms(purpose)
@@ -77,7 +77,6 @@ def feature_preparation(self, features, data, purpose="training"):
 
                         tensor_size = tensors.size()[0]
                         if tensor_size < data.largest_number_atoms[symbol]:
-
                             diff = data.largest_number_atoms[symbol] - tensor_size
                             expand = torch.zeros(diff, self.input_dimension)
                             tensors = torch.cat([tensors, expand])

diff --git a/ml4chem/atomistic/models/kernelridge.py b/ml4chem/atomistic/models/kernelridge.py
@@ -123,7 +123,6 @@ def __init__(
         weights=None,
         **kwargs
     ):
-
         np.set_printoptions(precision=30, threshold=999999999)
         self.kernel = kernel
         self.sigma = sigma
@@ -305,7 +304,6 @@ def get_kernel_matrix(self, feature_space, reference_features, purpose):
                         f_map.append(1)
 
                         if purpose == "training":
-
                             for j in range(counter, reference_lenght):
                                 j_symbol, j_afp = reference_features[j]
 
@@ -515,7 +513,7 @@ def get_sigma(self, sigma, forcetraining=False):
 
 @dask.delayed
 def linear(feature_i, feature_j, i_symbol=None, j_symbol=None):
-    """ Compute a linear kernel
+    """Compute a linear kernel
 
     Parameters
     ----------
@@ -543,7 +541,7 @@ def linear(feature_i, feature_j, i_symbol=None, j_symbol=None):
 
 @dask.delayed
 def rbf(feature_i, feature_j, i_symbol=None, j_symbol=None, sigma=1.0):
-    """ Compute the rbf (AKA Gaussian) kernel.
+    """Compute the rbf (AKA Gaussian) kernel.
 
     Parameters
     ----------
@@ -586,14 +584,14 @@ def rbf(feature_i, feature_j, i_symbol=None, j_symbol=None, sigma=1.0):
             return anisotropic_rbf
         else:
             rbf = np.exp(
-                -(np.linalg.norm(feature_i - feature_j) ** 2.0) / (2.0 * sigma ** 2.0)
+                -(np.linalg.norm(feature_i - feature_j) ** 2.0) / (2.0 * sigma**2.0)
             )
             return rbf
 
 
 @dask.delayed
 def exponential(feature_i, feature_j, i_symbol=None, j_symbol=None, sigma=1.0):
-    """ Compute the exponential kernel
+    """Compute the exponential kernel
 
     Parameters
     ----------
@@ -639,14 +637,14 @@ def exponential(feature_i, feature_j, i_symbol=None, j_symbol=None, sigma=1.0):
             return anisotropic_exp
         else:
             exponential = np.exp(
-                -(np.linalg.norm(feature_i - feature_j)) / (2.0 * sigma ** 2)
+                -(np.linalg.norm(feature_i - feature_j)) / (2.0 * sigma**2)
             )
             return exponential
 
 
 @dask.delayed
 def laplacian(feature_i, feature_j, i_symbol=None, j_symbol=None, sigma=1.0):
-    """ Compute the laplacian kernel
+    """Compute the laplacian kernel
 
     Parameters
     ----------

diff --git a/ml4chem/atomistic/models/loss.py b/ml4chem/atomistic/models/loss.py
@@ -27,7 +27,11 @@ def name(self):
         return "Atomistic Loss"
 
     def __call__(
-        self, outputs, targets, atoms_per_image, uncertainty=None,
+        self,
+        outputs,
+        targets,
+        atoms_per_image,
+        uncertainty=None,
     ):
         """Call the AtomicMSELoss loss
 
@@ -50,7 +54,13 @@ def __call__(
         """
 
         if uncertainty is None:
-            target_energy = torch.tensor(targets["energies"]).unsqueeze(1)
+            target_energy = (
+                torch.tensor(targets["energies"])
+                .unsqueeze(1)
+                .clone()
+                .detach()
+                .requires_grad_(True)
+            )
             criterion = torch.nn.MSELoss(reduction="sum")
             outputs_atom = torch.div(
                 outputs["energies"].unsqueeze(1), atoms_per_image.unsqueeze(1)
@@ -293,11 +303,11 @@ def get_pairwise_distances(x, y=None, squared=False):
     distances
         Pairwise distances.
     """
-    x_norm = (x ** 2).sum(1).view(-1, 1)
+    x_norm = (x**2).sum(1).view(-1, 1)
 
     if y is not None:
         y_t = torch.transpose(y, 0, 1)
-        y_norm = (y ** 2).sum(1).view(1, -1)
+        y_norm = (y**2).sum(1).view(1, -1)
     else:
         y_t = torch.transpose(x, 0, 1)
         y_norm = x_norm.view(1, -1)

diff --git a/ml4chem/atomistic/models/merger.py b/ml4chem/atomistic/models/merger.py
@@ -181,7 +181,6 @@ def train(
         if isinstance(batch_size, int):
             chunks = []
             for inputs_ in inputs:
-
                 if inspect.ismethod(inputs_):
                     chunks.append(inputs_)
                 else:
@@ -393,7 +392,6 @@ def closure(self, index, model, independent_loss, name=None):
             inputs = []
             # FIXME this is not scaling to n number of models.
             for chunk_index, chunk in enumerate(self.chunks[index - 1]):
-
                 inputs_ = self.chunks[index][chunk_index](OrderedDict(chunk.result()))
                 inputs.append(client.scatter(inputs_))
 
@@ -424,7 +422,6 @@ def closure(self, index, model, independent_loss, name=None):
             return loss, outputs_
 
         else:  # Models are dependent on each other
-
             running_loss = torch.tensor(0, dtype=torch.float)
             accumulation = []
 
@@ -481,7 +478,6 @@ def train_batches(
 
         losses = []
         for model_index, model in enumerate(models):
-
             output = outputs[model_index]
 
             if model.name() == "PytorchPotentials":

diff --git a/ml4chem/atomistic/models/neuralnetwork.py b/ml4chem/atomistic/models/neuralnetwork.py
@@ -173,11 +173,11 @@ def forward(self, X, conditions):
         X : dict
             Dictionary of inputs in the feature space.
         condition : dict
-            A dict of tensors per atom type with conditions. 
+            A dict of tensors per atom type with conditions.
 
         Returns
         -------
-        outputs 
+        outputs
             A dict of tensors with energies per atom.
         """
         outputs = {}
@@ -235,7 +235,6 @@ def get_activations(self, images, model=None, numpy=True):
 
         for hash, data in images.items():
             for index, (symbol, features) in enumerate(data):
-
                 counter = 0
                 layer_counter = 0
                 for _, layer in enumerate(model.linears[symbol].modules()):
@@ -361,7 +360,6 @@ def __init__(
         test=None,
         forcetraining=False,
     ):
-
         self.initial_time = time.time()
 
         if lossfxn is None:
-Original file line number
+Diff line change
@@ Expand Up @@
                         feature_space[key].append((symbol, scaled))
             else:
                 feature_space = OrderedDict(zip(hashes, feature_space))
             fp_time = time.time() - initial_time
@@ Expand Down @@