Configuration of the resolution parameter for vision capsule datasets

zeilenschubser · Aug 3, 2023 · 22db699 · 22db699
1 parent 7b7860e
commit 22db699
Show file tree

Hide file tree

Showing 11 changed files with 48 additions and 16 deletions.
diff --git a/doc/configuration/configuration.md b/doc/configuration/configuration.md
@@ -133,6 +133,8 @@ data_split
 downsample
 : samplerate DESTRUCTIVE! change the samplerate of the real files to the target samplerate.  Use better parameter samplingrate
 
+sensor
+: `resolution` can be adjusted for resizing of images (currently only for vision capsule datasets).
 #### variants
 variants for `kws`
 - v1, v2

diff --git a/hannah/conf/dataset/dresden_capsule.yaml b/hannah/conf/dataset/dresden_capsule.yaml
@@ -16,6 +16,8 @@
 ## See the License for the specific language governing permissions and
 ## limitations under the License.
 ##
+defaults:
+  - sensor: naneye
 cls: hannah.datasets.vision.DresdenCapsuleDataset
 data_folder: ${hydra:runtime.cwd}/datasets/
 download_folder: MISSING

diff --git a/hannah/conf/dataset/kvasir_anomaly.yaml b/hannah/conf/dataset/kvasir_anomaly.yaml
@@ -21,3 +21,4 @@ defaults:
     - _self_
 
 anomaly: true
+
diff --git a/hannah/conf/dataset/kvasir_capsule.yaml b/hannah/conf/dataset/kvasir_capsule.yaml
@@ -16,6 +16,8 @@
 ## See the License for the specific language governing permissions and
 ## limitations under the License.
 ##
+defaults:
+  - sensor: naneye
 cls: hannah.datasets.vision.KvasirCapsuleDataset
 data_folder: ${hydra:runtime.cwd}/datasets/
 dataset: kvasir_capsule
@@ -24,3 +26,6 @@ sampler: random
 weighted_loss: false
 anomaly: false
 split: official
+
+sensor:
+  resolution: [336,336]
diff --git a/hannah/conf/dataset/ri_capsule.yaml b/hannah/conf/dataset/ri_capsule.yaml
@@ -16,12 +16,13 @@
 ## See the License for the specific language governing permissions and
 ## limitations under the License.
 ##
+defaults:
+  - sensor: naneye
 cls: hannah.datasets.vision.RICapsuleDataset
 data_folder: ${hydra:runtime.cwd}/datasets/
 download_folder: MISSING
 dataset: ri_capsule
 val_percent: 0.1
 sampler: random
 weighted_loss: false
-
 split: official
diff --git a/hannah/conf/dataset/sensor/naneye.yaml b/hannah/conf/dataset/sensor/naneye.yaml
@@ -0,0 +1,20 @@
+##
+## Copyright (c) 2022 University of Tübingen.
+##
+## This file is part of hannah.
+## See https://atreus.informatik.uni-tuebingen.de/ties/ai/hannah/hannah for further info.
+##
+## Licensed under the Apache License, Version 2.0 (the "License");
+## you may not use this file except in compliance with the License.
+## You may obtain a copy of the License at
+##
+##     http://www.apache.org/licenses/LICENSE-2.0
+##
+## Unless required by applicable law or agreed to in writing, software
+## distributed under the License is distributed on an "AS IS" BASIS,
+## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+## See the License for the specific language governing permissions and
+## limitations under the License.
+##
+name: naneye-m
+resolution: [320,320]
diff --git a/hannah/datasets/vision/base.py b/hannah/datasets/vision/base.py
@@ -20,8 +20,8 @@
 import re
 import tarfile
 from collections import Counter, namedtuple
-from typing import Dict, List
-
+from typing import Dict, List, Optional
+from omegaconf import DictConfig
 import albumentations as A
 import cv2
 import numpy as np
@@ -40,6 +40,7 @@
 class VisionDatasetBase(AbstractDataset):
     def __init__(self, config):
         self.config = config
+        self._resolution = [224, 224]
 
     @property
     def std(self):
@@ -51,7 +52,7 @@ def mean(self):
 
     @property
     def resolution(self):
-        return [224, 224]
+        return self._resolution
 
 
 class TorchvisionDatasetBase(VisionDatasetBase):

diff --git a/hannah/datasets/vision/dresden_capsule.py b/hannah/datasets/vision/dresden_capsule.py
@@ -30,6 +30,8 @@
 import numpy as np
 import pandas as pd
 import torchvision
+from albumentations.pytorch import ToTensorV2
+import albumentations as A
 import tqdm
 
 from .base import ImageDatasetBase
@@ -66,7 +68,8 @@ def splits(cls, config):
         X_val, y_val, labels = prepare_data(study_folder, val_data)
         X_test, y_test, labels = prepare_data(study_folder, test_data)
 
-        train_set = cls(X_train, y_train, labels)
+        transform = A.Compose([A.augmentations.geometric.resize.Resize(config.sensor.resolution[0], config.sensor.resolution[1]), ToTensorV2()])
+        train_set = cls(X_train, y_train, labels, transform=transform)
         val_set = cls(X_val, y_val, labels)
         test_set = cls(X_test, y_test, labels)
 

diff --git a/hannah/datasets/vision/kvasir.py b/hannah/datasets/vision/kvasir.py
@@ -23,10 +23,8 @@
 import re
 import tarfile
 from collections import Counter, defaultdict, namedtuple
-from typing import Dict, List
 
 import albumentations as A
-import cv2
 import numpy as np
 import pandas as pd
 import requests
@@ -255,12 +253,14 @@ def relable_anomaly(X):
             val_labels = relable_anomaly(val_labels)
             test_labels = relable_anomaly(test_labels)
 
+        transform = A.Compose([A.augmentations.geometric.resize.Resize(config.sensor.resolution[0], config.sensor.resolution[1]), ToTensorV2()])
         return (
             cls(
                 train_images,
                 train_labels,
                 classes,
                 split0_bbox,
+                transform=transform,
             ),
             cls(
                 val_images,

diff --git a/hannah/datasets/vision/kvasir_unlabeled.py b/hannah/datasets/vision/kvasir_unlabeled.py
@@ -153,14 +153,8 @@ def splits(cls, config):
         val_split = [metadata[-2]]
         test_split = [metadata[-1]]
 
-        mean = tuple(config.mean)
-        std = tuple(config.mean)
-        if isinstance(config.resolution, int):
-            resolution = (config.resolution, config.resolution)
-        else:
-            resolution = tuple(config.resolution)
-
-        train_set = cls(config, train_split)
+        transform = A.Compose([A.augmentations.geometric.resize.Resize(config.sensor.resolution[0], config.sensor.resolution[1]), ToTensorV2()])
+        train_set = cls(config, train_split, transform=transform)
         test_set = cls(config, test_split)
         val_set = cls(config, val_split)
 

diff --git a/hannah/datasets/vision/ri_capsule.py b/hannah/datasets/vision/ri_capsule.py
@@ -32,6 +32,8 @@
 import tqdm
 
 from .base import ImageDatasetBase
+from albumentations.pytorch import ToTensorV2
+import albumentations as A
 
 BASE_PATH = pathlib.Path(__file__).parent
 DATA_PATH = BASE_PATH / "ri_data"
@@ -145,7 +147,8 @@ def splits(cls, config):
         )
         X_test, y_test = read_official_test(study_folder, DATA_PATH / "path_test.csv")
 
-        train_set = cls(X_train, y_train, list(LABELS.keys()))
+        transform = A.Compose([A.augmentations.geometric.resize.Resize(config.sensor.resolution[0], config.sensor.resolution[1]), ToTensorV2()])
+        train_set = cls(X_train, y_train, list(LABELS.keys()), transform=transform)
         val_set = cls(X_val, y_val, list(LABELS.keys()))
         test_set = cls(X_test, y_test, list(LABELS.keys()))