Add PIL Image Compatibility with Record Components (airctic#849)

rsomani95 · web-flow · commit cb602f533c7e · 2021-05-24T16:31:22.000-03:00
* sketch pil img with record components

* explicit reference to PIL.Image

* add test

* better repr

* finalize PR

* fix np.array &lt;-&gt; PIl.Image related tests

* better repr
diff --git a/icevision/core/record_components.py b/icevision/core/record_components.py
@@ -99,39 +99,47 @@ def set_record_id(self, record_id: int):
         self.record_id = record_id
 
     def _repr(self) -> List[str]:
-        return [f"Image ID: {self.record_id}"]
+        return [f"Record ID: {self.record_id}"]
 
     def as_dict(self) -> dict:
         return {"record_id": self.record_id}
 
 
 # TODO: we need a way to combine filepath and image mixin
-# TODO: rename to ImageArrayRecordComponent
 class ImageRecordComponent(RecordComponent):
     def __init__(self, task=tasks.common):
         super().__init__(task=task)
         self.img = None
 
-    def set_img(self, img: np.ndarray):
+    def set_img(self, img: Union[PIL.Image.Image, np.ndarray]):
+        assert isinstance(img, (PIL.Image.Image, np.ndarray))
         self.img = img
-        height, width, _ = self.img.shape
+        if isinstance(img, PIL.Image.Image):
+            height, width = img.shape
+        elif isinstance(img, np.ndarray):
+            # else:
+            height, width, _ = self.img.shape
         # this should set on SizeRecordComponent
         self.composite.set_img_size(ImgSize(width=width, height=height), original=True)
 
     def _repr(self) -> List[str]:
         if self.img is not None:
-            ndims = len(self.img.shape)
-            if ndims == 3:  # RGB, RGBA
-                height, width, channels = self.img.shape
-            elif ndims == 2:  # Grayscale
-                height, width, channels = [*self.img.shape, 1]
-            else:
-                raise ValueError(
-                    f"Expected image to have 2 or 3 dimensions, got {ndims} instead"
-                )
-            return [f"Image: {width}x{height}x{channels} <np.ndarray> Image"]
+            if isinstance(self.img, np.ndarray):
+                ndims = len(self.img.shape)
+                if ndims == 3:  # RGB, RGBA
+                    height, width, channels = self.img.shape
+                elif ndims == 2:  # Grayscale
+                    height, width, channels = [*self.img.shape, 1]
+                else:
+                    raise ValueError(
+                        f"Expected image to have 2 or 3 dimensions, got {ndims} instead"
+                    )
+                return [f"Img: {width}x{height}x{channels} <np.ndarray> Image"]
+            elif isinstance(self.img, PIL.Image.Image):
+                height, width = self.img.shape
+                return [f"Img: {width}x{height} <PIL.Image; mode='{self.img.mode}'>"]
         else:
-            return [f"Image: {self.img}"]
+            return [f"Img: {self.img}"]
 
     def _unload(self):
         self.img = None
diff --git a/icevision/data/dataset.py b/icevision/data/dataset.py
@@ -35,6 +35,9 @@ def __getitem__(self, i):
         record = self.records[i].load()
         if self.tfm is not None:
             record = self.tfm(record)
+        else:
+            # HACK FIXME
+            record.set_img(np.array(record.img))
         return record
 
     def __repr__(self):
diff --git a/icevision/tfms/albumentations/albumentations_adapter.py b/icevision/tfms/albumentations/albumentations_adapter.py
@@ -42,7 +42,8 @@ def collect(self, record):
 
 class AlbumentationsImgComponent(AlbumentationsAdapterComponent):
     def setup_img(self, record):
-        self.adapter._albu_in["image"] = record.img
+        # NOTE - assumed that `record.img` is a PIL.Image
+        self.adapter._albu_in["image"] = np.array(record.img)
 
         self.adapter._collect_ops.append(CollectOp(self.collect))
 
@@ -298,7 +299,7 @@ def _get_size_without_padding(self, record) -> ImgSize:
         height, width, _ = self._albu_out["image"].shape
 
         if get_transform(self.tfms_list, "Pad") is not None:
-            after_pad_h, after_pad_w, _ = record.img.shape
+            after_pad_h, after_pad_w, _ = np.array(record.img).shape
 
             t = get_transform(self.tfms_list, "SmallestMaxSize")
             if t is not None:
diff --git a/icevision/utils/imageio.py b/icevision/utils/imageio.py
@@ -17,13 +17,19 @@
     if PIL.ExifTags.TAGS[_EXIF_ORIENTATION_TAG] == "Orientation":
         break
 
+# from enum import Enum
 
-def open_img(fn, gray=False):
+# class PILMode(Enum):
+#     blah
+
+# FIXME
+def open_img(fn, gray=False) -> PIL.Image.Image:
+    "Open an image from disk `fn` as a PIL Image"
     color = "L" if gray else "RGB"
     image = PIL.Image.open(str(fn))
     image = PIL.ImageOps.exif_transpose(image)
     image = image.convert(color)
-    return np.array(image)
+    return image
 
 
 # TODO: Deprecated
diff --git a/icevision/visualize/draw_data.py b/icevision/visualize/draw_data.py
@@ -74,7 +74,7 @@ def draw_sample(
     * include_only: (Optional) List of labels that must be exclusively plotted. Takes
                     precedence over `exclude_labels` (?)
     """
-    img = sample.img.copy()
+    img = np.asarray(sample.img).copy()  # HACK
     num_classification_plotted = 0
 
     # Dynamic font size based on image height
diff --git a/tests/core/test_record.py b/tests/core/test_record.py
@@ -70,7 +70,7 @@ def record_wrong_num_annotations(samples_source):
 def test_record_load(record):
     record_loaded = record.load()
 
-    assert isinstance(record_loaded.img, np.ndarray)
+    assert isinstance(record_loaded.img, PIL.Image.Image)
     assert isinstance(record_loaded.detection.masks, MaskArray)
 
     # test original record is not modified
diff --git a/tests/models/torchvision_models/mask_rcnn/test_predict.py b/tests/models/torchvision_models/mask_rcnn/test_predict.py
@@ -9,7 +9,7 @@ def sample_dataset(samples_source):
     images_dir = samples_source / "images"
     images_files = get_image_files(images_dir)[-2:]
 
-    images = [open_img(path) for path in images_files]
+    images = [np.array(open_img(path)) for path in images_files]
     images = [cv2.resize(image, (128, 128)) for image in images]
 
     return Dataset.from_images(images)
diff --git a/tests/transforms/test_albu_transform.py b/tests/transforms/test_albu_transform.py
@@ -24,7 +24,7 @@ def test_inference_transform(records, check_attributes_on_component):
     ds = Dataset.from_images([img], tfm)
 
     tfmed = ds[0]
-    assert (tfmed.img == img[:, ::-1, :]).all()
+    assert (tfmed.img == np.array(img)[:, ::-1, :]).all()
     check_attributes_on_component(tfmed)
 
 
diff --git a/tests/utils/test_imageio.py b/tests/utils/test_imageio.py
@@ -10,8 +10,14 @@
     ],
 )
 def test_open_img(samples_source, fn, expected):
-    assert open_img(samples_source / fn).shape == expected
+    # When returning np arrays
+    assert np.array(open_img(samples_source / fn)).shape == expected
+    assert np.array(open_img(samples_source / fn, gray=True)).shape == expected[:-1]
+
+    # When returning PIL Images; returns only (W,H) for size, not num. channels
+    assert open_img(samples_source / fn).shape == expected[:2]
     assert open_img(samples_source / fn, gray=True).shape == expected[:-1]
+    assert isinstance(open_img(samples_source / fn), PIL.Image.Image)
 
 
 @pytest.mark.parametrize(