Milestone 2 : First Working version with dlra.TT class with torch_int…

…egrate for forward pass, and backward is implemented implicited via pytorch backward gradient descent (Adam)
EmilienDupont · mbaddar1 · Aug 14, 2022 · Aug 14, 2022 · Aug 31, 2022 · Sep 3, 2022
commit 78d121d0f0e9eab895712d0fc4e318391eaf1300
diff --git a/phd_experiments/tt_ode/ttode_config.yaml b/phd_experiments/tt_ode/ttode_config.yaml
@@ -32,7 +32,7 @@ train:
   print_freq: 10
   loss: "smoothl1loss"
   loss_window: 10
-  loss_threshold: 1e-4 # must be adjusted based on dataset
+  loss_threshold: 1e-3 # must be adjusted based on dataset
 
 # 6 ) Model-specific params
 resnet:
@@ -60,7 +60,7 @@ ttode: # tensor ode
   # TODO Understand : why when tf is large (e.g. 10) we get underflow
   # TODO more experiment with tf : seems crucial to accuracy and running time
   forward_impl_method: "ttode_als"
-  custom_autograd_fn : True
+  custom_autograd_fn : False
   lambda : 0.1 # TODO make it fn (Coeff tensor order)
   tt_rank : 5
 
diff --git a/phd_experiments/tt_ode/ttode_main.py b/phd_experiments/tt_ode/ttode_main.py
@@ -154,31 +154,6 @@ def get_loss(loss_name: str):
             W_old_norm = get_W_norm(model_.get_W())
             Q_old_norm = model_.get_Q().norm().item()
 
-            # update via gradient descent for parameters with required_grad=True
-            # FIXME , hack to amplify W grads
-            if isinstance(model_.get_W(), list) and all([isinstance(w, TensorTrain) for w in model_.get_W()]):
-                for w in model_.get_W():
-                    for G in w.comps:
-                        G.grad *= 1
-            # FIXME End hack
-
-            # FIXME , remove quick hack to manual compute parameters update and see how that compares to optimize steps
-            # P_new_manual = model_.get_P()-float(configs_['train']['lr'])* model_.get_P().grad
-            # FIXME hack , manually update W components till we find why it is not updated
-            # W = model_.get_W()
-            # # https://medium.com/@mrityu.jha/understanding-the-grad-of-autograd-fc8d266fd6cf
-            # if isinstance(W, TensorTrainFixedRank):
-            #     pass
-            # elif isinstance(W, list) and all([isinstance(w, TensorTrain) for w in W]):
-            #     for i, w in enumerate(model_.get_W()):
-            #         for j, G in enumerate(w.comps):
-            #             is_leaf = G.is_leaf
-            #             model_.get_W()[i].comps[j] -= float(configs_['train']['lr']) * model_.get_W()[i].comps[j].grad
-
-            # TODO
-            #   1. Understand why optimizer step doesn't update W even when W Grads are good enough ??
-            #   2. Emulate optimize.step with wit
-
             optimizer.step()
 
             # calculate delta norm

diff --git a/phd_experiments/tt_ode/ttode_model.py b/phd_experiments/tt_ode/ttode_model.py
@@ -113,9 +113,6 @@ def __init__(self, input_dimensions: List[int], output_dimensions: List[int],
         P_dims.extend(tensor_dimensions.copy())
         assert len(P_dims) == 2, "No support for the projection tensor P with n_dims > 2 , yet !"
 
-        # F_dims = tensor_dimensions.copy()
-        # F_dims.extend(output_dimensions.copy())
-
         # initialize model parameters
 
         # Initialize P