added color convolution

luczeng · Jun 21, 2020 · d0e0390 · d0e0390
1 parent c22220e
commit d0e0390
Show file tree

Hide file tree

Showing 11 changed files with 118 additions and 15 deletions.
diff --git a/driver_scripts/main_train.py b/driver_scripts/main_train.py
@@ -27,9 +27,10 @@
 
     # Net
     reds_size = [720, 1280]
-    net = MotionNet(config.n_layers, config.n_sublayers, config.n_features_first_layer, reds_size)
+    net = MotionNet(config.n_layers, config.n_sublayers, config.n_features_first_layer, reds_size, config.as_gray)
+    # reds_size = (720, 1280)
 
-    # Determine type (GPU or not)
+    # Determine type(GPU or not)
     if torch.cuda.is_available():
         net.to(device=torch.device("cuda"))
         net_type = torch.cuda.FloatTensor

diff --git a/motion_blur/libs/configs/config_reds_1_img.yml b/motion_blur/libs/configs/config_reds_1_img.yml
@@ -2,6 +2,7 @@ NET:
     N_LAYERS: 5
     N_SUBLAYERS: 2
     N_FEATURES_FIRST_LAYER: 32
+    AS_GRAY : 0
 TRAIN:
     TRAIN_DATASET_PATH: 'datasets/reds_small'
     SAVE_PATH: 'training_results/reds_small/run5'

diff --git a/motion_blur/libs/configs/read_config.py b/motion_blur/libs/configs/read_config.py
@@ -17,6 +17,7 @@ def __init__(self, config_path):
         self.n_layers = config_dict["NET"]["N_LAYERS"]
         self.n_sublayers = config_dict["NET"]["N_SUBLAYERS"]
         self.n_features_first_layer = config_dict["NET"]["N_FEATURES_FIRST_LAYER"]
+        self.as_gray = bool(config_dict["NET"]["AS_GRAY"])
 
         # Train parameters
         self.train_dataset_path = config_dict["TRAIN"]["TRAIN_DATASET_PATH"]

diff --git a/motion_blur/libs/data/dataset.py b/motion_blur/libs/data/dataset.py
@@ -38,7 +38,7 @@ def __len__(self):
 
 
 class Dataset_OneImage(Dataset):
-    def __init__(self, batch_size, root_dir, L_min, L_max, net_type):
+    def __init__(self, batch_size, root_dir, L_min, L_max, net_type, as_gray=True):
         """
             This dataset is being used for evaluating the capacity of the model on one image
             It only uses one image
@@ -56,19 +56,25 @@ def __init__(self, batch_size, root_dir, L_min, L_max, net_type):
         self.img_list = [img_path for img_path in Path(root_dir).iterdir() if img_path.is_file()]
         self.net_type = net_type
         self.batch_size = batch_size
+        self.as_gray = as_gray
 
     def __getitem__(self, idx):
 
         L = self.length_list[random.randint(0, self.n_lengths - 1)]
         theta = torch.rand(1) * 180
 
-        img = io.imread(self.img_list[0], as_gray=True)
+        img = io.imread(self.img_list[0], as_gray=self.as_gray)
 
         gt = torch.cat((theta, L)).type(self.net_type)
 
         kernel = motion_kernel(theta, int(L))
         H = Convolution(kernel)
-        img = torch.tensor((H * img)[None, :, :]).type(self.net_type)
+
+        if self.as_gray:
+            img = torch.tensor((H * img)[None, :, :]).type(self.net_type)
+        else:
+            img = torch.tensor((H * img)[:, :]).type(self.net_type)
+        img = img.permute(2, 0, 1)
 
         sample = {"image": img, "gt": gt}
 

diff --git a/motion_blur/libs/forward_models/linops/convolution.py b/motion_blur/libs/forward_models/linops/convolution.py
@@ -5,13 +5,35 @@
 
 class Convolution(linop):
     def __init__(self, kernel: np.ndarray):
+        """
+            Linop that performs convolution by overloading the * operator
+            The convolution is done in the Fourier domain
+
+            :param kernel kernel to convolve with
+        """
         self.kernel = kernel
 
+    def fourier_convolution(self, img: np.ndarray):
+        # TODO: Put some checks
+        return np.real(ifft2(fft2(img) * fft2(self.kernel, img.shape)))
+
     def __mul__(self, img: np.ndarray) -> np.ndarray:
         """
             Performs the convolution of the input image with a kernel using Fourier transforms.
             Zero pads the convolution kernel
+
+            :param img colored or grayscale image to be convolved
+            :return result
         """
 
-        # TODO: Put some checks
-        return np.real(ifft2(fft2(img) * fft2(self.kernel, img.shape)))
+        if img.ndim == 2:
+            return self.fourier_convolution(img)
+        else:
+            if img.shape[2] == 3:
+                out_img = np.empty(img.shape)
+                out_img[:, :, 0] = self.fourier_convolution(img[:, :, 0])
+                out_img[:, :, 1] = self.fourier_convolution(img[:, :, 1])
+                out_img[:, :, 2] = self.fourier_convolution(img[:, :, 2])
+                return out_img
+            else:
+                raise ValueError("Incorrect dimension of input image")
diff --git a/motion_blur/libs/metrics/metrics.py b/motion_blur/libs/metrics/metrics.py
@@ -94,15 +94,15 @@ def run_validation(config, net, net_type):
     return angle_loss, length_loss
 
 
-def evaluate_one_image(net, img_path, net_type, n_angles=60, L_min=0, L_max=10):
+def evaluate_one_image(net, img_path, net_type, n_angles=60, L_min=0, L_max=10, as_gray = True):
     """
         Evaluate the linear model on one image using several
         Returns the average loss
 
         TODO: this is not modular enough
     """
 
-    img = io.imread(img_path, as_gray=True)
+    img = io.imread(img_path, as_gray=as_gray)
 
     angles = torch.linspace(0, 180, n_angles)
     if L_min != L_max:
@@ -117,8 +117,10 @@ def evaluate_one_image(net, img_path, net_type, n_angles=60, L_min=0, L_max=10):
         for L in lengths:
             kernel = motion_kernel(angle, int(L))
             H = Convolution(kernel)
-            blurred_img = torch.tensor((H * img)[None, None, :, :])
+            blurred_img = torch.tensor(H * img)
             blurred_img = blurred_img.type(net_type)  # to have an image between 0 - 255
+            blurred_img = blurred_img.permute(2, 0, 1)
+            blurred_img = blurred_img[None,:,:,:]
 
             x = net.forward(blurred_img)
 

diff --git a/motion_blur/libs/nn/motion_net.py b/motion_blur/libs/nn/motion_net.py
@@ -4,7 +4,7 @@
 
 
 class MotionNet(nn.Module):
-    def __init__(self, n_layers: int, n_sublayers: int, n_features: int, img_shape: list):
+    def __init__(self, n_layers: int, n_sublayers: int, n_features: int, img_shape: list, as_gray: bool):
         """
             Network layer definition
 
@@ -36,8 +36,12 @@ def __init__(self, n_layers: int, n_sublayers: int, n_features: int, img_shape:
         self.convolutional = nn.ModuleList()
 
         # First layer
+        if as_gray:
+            self.feature_size_in = 1
+        else:
+            self.feature_size_in = 3
         layer = nn.ModuleList()
-        layer.append(nn.Conv2d(1, n_features, 3))
+        layer.append(nn.Conv2d(self.feature_size_in, n_features, 3))
         for sublayer in range(1, self.n_sublayers):
             layer.append(nn.Conv2d(n_features, n_features, 3))
         self.convolutional.append(layer)

diff --git a/motion_blur/libs/nn/train_small.py b/motion_blur/libs/nn/train_small.py
@@ -26,7 +26,9 @@ def run_train_small(config, ckp_path, save_path, net, net_type, optimizer, crite
     # start = load_checkpoint(ckp_path, net, optimizer)
 
     # Data
-    dataset = Dataset_OneImage(config.mini_batch_size, config.train_dataset_path, config.L_min, config.L_max, net_type)
+    dataset = Dataset_OneImage(
+        config.mini_batch_size, config.train_dataset_path, config.L_min, config.L_max, net_type, config.as_gray
+    )
     dataloader = DataLoader(dataset, batch_size=config.mini_batch_size, shuffle=True)
 
     # Training loop
@@ -65,7 +67,7 @@ def run_train_small(config, ckp_path, save_path, net, net_type, optimizer, crite
             # Run evaluation
             if (epoch % config.validation_period == config.validation_period - 1) & epoch != 0:
                 angle_loss, length_loss = evaluate_one_image(
-                    net, config.val_small_dataset_path, net_type, config.val_n_angles, config.L_min, config.L_max
+                    net, config.val_small_dataset_path, net_type, config.val_n_angles, config.L_min, config.L_max, config.as_gray
                 )
                 mlflow.log_metric("angle_error", angle_loss.item())
                 mlflow.log_metric("length_error", length_loss.item())

diff --git a/motion_blur/libs/utils/nn_utils.py b/motion_blur/libs/utils/nn_utils.py
@@ -7,7 +7,7 @@
 def print_training_info(net, img_size):
     print("\nNetwork information: \n")
     print(net, "\n")
-    summary(net, (1, img_size[0], img_size[1]))
+    # summary(net, (3, img_size[0], img_size[1]))
     print("\n")
 
 

diff --git a/tests/lena.jpeg b/tests/lena.jpeg
diff --git a/tests/test_linops/test_linops.py b/tests/test_linops/test_linops.py
@@ -0,0 +1,64 @@
+import numpy as np
+from motion_blur.libs.forward_models.linops.convolution import Convolution
+
+
+def test_grascale_convolution():
+
+    # 1 kernel
+    x = np.ones((5, 5))
+    kernel = np.ones((1, 1))
+
+    H = Convolution(kernel)
+    y = H * x
+
+    np.testing.assert_array_equal(y, x)
+
+    # 2x2 1 kernel
+    x = np.ones((5, 5))
+    kernel = np.ones((2, 2)) / 4
+
+    H = Convolution(kernel)
+    y = H * x
+
+    np.testing.assert_array_equal(y, x)
+
+    # Edge detector in x direction
+    x = np.ones((5, 5))
+    kernel = np.array([-1, 1])
+    kernel = kernel[None, :]
+
+    H = Convolution(kernel)
+    y = H * x
+
+    y_gt = np.zeros((5, 5))
+
+    np.testing.assert_array_equal(y, y_gt)
+
+    # Edge detector, more complicated input
+    x = np.zeros((5, 5))
+    x[:, 2] = 1
+    kernel = np.array([-1, 1])
+    kernel = kernel[None, :]
+
+    H = Convolution(kernel)
+    y = H * x
+
+    y_gt = np.zeros((5, 5))
+    y_gt[:, 2] = -1
+    y_gt[:, 3] = 1
+
+    np.testing.assert_almost_equal(y, y_gt, decimal=10)
+
+
+def test_color_convolution():
+
+    # TODO: need more tests here
+
+    # 2x2 1 kernel
+    x = np.ones((5, 5, 3))
+    kernel = np.ones((2, 2)) / 4
+
+    H = Convolution(kernel)
+    y = H * x
+
+    np.testing.assert_array_equal(y, x)