jesperkers
diff --git a/‎icevision/models/inference.py
+131 b/‎icevision/models/inference.py
+131
diff --git a/‎icevision/models/mmdet/common/bbox/prediction.py
+5 b/‎icevision/models/mmdet/common/bbox/prediction.py
+5
diff --git a/‎icevision/models/ross/efficientdet/prediction.py
+5-1 b/‎icevision/models/ross/efficientdet/prediction.py
+5-1
diff --git a/‎icevision/models/torchvision/faster_rcnn/prediction.py
+5 b/‎icevision/models/torchvision/faster_rcnn/prediction.py
+5
diff --git a/‎icevision/models/torchvision/retinanet/prediction.py
+1 b/‎icevision/models/torchvision/retinanet/prediction.py
+1
diff --git a/‎icevision/models/ultralytics/yolov5/prediction.py
+5-1 b/‎icevision/models/ultralytics/yolov5/prediction.py
+5-1
diff --git a/‎icevision/tfms/albumentations/albumentations_adapter.py
+9-42 b/‎icevision/tfms/albumentations/albumentations_adapter.py
+9-42
@@ -0,0 +1,131 @@
+__all__ = ["process_bbox_predictions", "_end2end_detect"]
+
+from icevision.imports import *
+from icevision.core import *
+from icevision.data import *
+from icevision.tfms.albumentations.albumentations_helpers import (
+    get_size_without_padding,
+)
+from icevision.tfms.albumentations import albumentations_adapter
+
+
+def _end2end_detect(
+    img: Union[PIL.Image.Image, Path, str],
+    transforms: albumentations_adapter.Adapter,
+    model: torch.nn.Module,
+    class_map: ClassMap,
+    detection_threshold: float = 0.5,
+    predict_fn: Callable = None,
+):
+    """
+    Run Object Detection inference (only `bboxes`) on a single image.
+
+    Parameters
+    ----------
+    img: image to run inference on. Can be a string, Path or PIL.Image
+    transforms: icevision albumentations transforms
+    model: model to run inference with
+    class_map: ClassMap with the available categories
+    detection_threshold: confidence threshold below which boxes are discarded
+
+    Returns
+    -------
+    List of dicts with category, score and bbox coordinates adjusted to original image size and aspect ratio
+    """
+    if isinstance(img, (str, Path)):
+        img = PIL.Image.open(Path(img))
+
+    infer_ds = Dataset.from_images([np.array(img)], transforms, class_map=class_map)
+    pred = predict_fn(model, infer_ds, detection_threshold=detection_threshold)[0]
+    bboxes = process_bbox_predictions(pred, img, transforms.tfms_list)
+    return bboxes
+
+
+def process_bbox_predictions(
+    pred: Prediction,
+    img: PIL.Image.Image,
+    transforms: List[Any],
+) -> List[Dict[str, Any]]:
+    """
+    Postprocess prediction.
+
+    Parameters
+    ----------
+    pred: icevision prediction object
+    img: original image, before any model-pre-processing done
+    transforms: list of model-pre-processing transforms
+
+    Returns
+    -------
+    List of dicts with class, score and bbox coordinates
+    """
+    bboxes = []
+    for bbox, score, label in zip(
+        pred.pred.detection.bboxes,
+        pred.pred.detection.scores,
+        pred.pred.detection.labels,
+    ):
+        xmin, ymin, xmax, ymax = postprocess_bbox(
+            img, bbox, transforms, pred.pred.height, pred.pred.width
+        )
+        result = {
+            "class": label,
+            "score": score,
+            "bbox": [xmin, ymin, xmax, ymax],
+        }
+        bboxes.append(result)
+    return bboxes
+
+
+def postprocess_bbox(
+    img: PIL.Image.Image, bbox: BBox, transforms: List[Any], h_after: int, w_after: int
+) -> Tuple[int, int, int, int]:
+    """
+    Post-process predicted bbox to adjust coordinates to input image size.
+
+    Parameters
+    ----------
+    img: original image, before any model-pre-processing done
+    bbox: predicted bbox
+    transforms: list of model-pre-processing transforms
+    h_after: height of image after model-pre-processing transforms
+    w_after: width of image after model-pre-processing transforms
+
+    Returns
+    -------
+    Tuple with (xmin, ymin, xmax, ymax) rescaled and re-adjusted to match the original image size
+    """
+    w_before, h_before = img.size
+    h_after, w_after = get_size_without_padding(transforms, img, h_after, w_after)
+    pad = np.abs(h_after - w_after) // 2
+
+    h_scale, w_scale = h_after / h_before, w_after / w_before
+    if h_after < w_after:
+        xmin, xmax, ymin, ymax = (
+            int(bbox.xmin),
+            int(bbox.xmax),
+            int(bbox.ymin) - pad,
+            int(bbox.ymax) - pad,
+        )
+    else:
+        xmin, xmax, ymin, ymax = (
+            int(bbox.xmin) - pad,
+            int(bbox.xmax) - pad,
+            int(bbox.ymin),
+            int(bbox.ymax),
+        )
+
+    xmin, xmax, ymin, ymax = (
+        max(xmin, 0),
+        min(xmax, w_after),
+        max(ymin, 0),
+        min(ymax, h_after),
+    )
+    xmin, xmax, ymin, ymax = (
+        int(xmin / w_scale),
+        int(xmax / w_scale),
+        int(ymin / h_scale),
+        int(ymax / h_scale),
+    )
+
+    return xmin, ymin, xmax, ymax
@@ -3,6 +3,7 @@
     "predict_from_dl",
     "convert_raw_prediction",
     "convert_raw_predictions",
+    "end2end_detect",
 ]
 
 from icevision.imports import *
@@ -13,6 +14,7 @@
 from icevision.models.mmdet.common.utils import *
 from icevision.models.mmdet.common.bbox.dataloaders import build_infer_batch
 from icevision.models.mmdet.common.utils import convert_background_from_last_to_zero
+from icevision.models.inference import *
 
 
 @torch.no_grad()
@@ -56,6 +58,9 @@ def predict(
     )
 
 
+end2end_detect = partial(_end2end_detect, predict_fn=predict)
+
+
 def predict_from_dl(
     model: nn.Module,
     infer_dl: DataLoader,
 
@@ -1,4 +1,4 @@
-__all__ = ["predict", "predict_from_dl", "convert_raw_predictions"]
+__all__ = ["predict", "predict_from_dl", "convert_raw_predictions", "end2end_detect"]
 
 from icevision.imports import *
 from icevision.utils import *
@@ -7,6 +7,7 @@
 from icevision.models.utils import _predict_from_dl
 from icevision.models.ross.efficientdet.dataloaders import *
 from effdet import DetBenchTrain, DetBenchPredict, unwrap_bench
+from icevision.models.inference import *
 
 
 @torch.no_grad()
@@ -111,3 +112,6 @@ def convert_raw_predictions(
         preds.append(Prediction(pred=pred, ground_truth=record))
 
     return preds
+
+
+end2end_detect = partial(_end2end_detect, predict_fn=predict)
@@ -3,6 +3,7 @@
     "predict_from_dl",
     "convert_raw_prediction",
     "convert_raw_predictions",
+    "end2end_detect",
 ]
 
 from icevision.imports import *
@@ -11,6 +12,7 @@
 from icevision.models.utils import _predict_from_dl
 from icevision.data import *
 from icevision.models.torchvision.faster_rcnn.dataloaders import *
+from icevision.models.inference import *
 
 
 @torch.no_grad()
@@ -133,3 +135,6 @@ def convert_raw_prediction(
         record.set_img(tensor_to_image(tensor_image))
 
     return Prediction(pred=pred, ground_truth=record)
+
+
+end2end_detect = partial(_end2end_detect, predict_fn=predict)
@@ -3,6 +3,7 @@
     "predict_from_dl",
     "convert_raw_prediction",
     "convert_raw_predictions",
+    "end2end_detect",
 ]
 
 from icevision.models.torchvision.faster_rcnn.prediction import *
@@ -1,4 +1,4 @@
-__all__ = ["predict", "predict_from_dl", "convert_raw_predictions"]
+__all__ = ["predict", "predict_from_dl", "convert_raw_predictions", "end2end_detect"]
 
 from icevision.imports import *
 from icevision.utils import *
@@ -7,6 +7,7 @@
 from icevision.models.utils import _predict_from_dl
 from icevision.models.ultralytics.yolov5.dataloaders import *
 from yolov5.utils.general import non_max_suppression
+from icevision.models.inference import *
 
 
 @torch.no_grad()
@@ -117,3 +118,6 @@ def convert_raw_predictions(
         preds.append(Prediction(pred=pred, ground_truth=record))
 
     return preds
+
+
+end2end_detect = partial(_end2end_detect, predict_fn=predict)
@@ -17,6 +17,10 @@
 from icevision.utils import *
 from icevision.core import *
 from icevision.tfms.transform import *
+from icevision.tfms.albumentations.albumentations_helpers import (
+    get_size_without_padding,
+    get_transform,
+)
 
 
 @dataclass
@@ -269,7 +273,11 @@ def apply(self, record):
         self._albu_out = tfms(**self._albu_in)
 
         # store additional info (might be used by components on `collect`)
-        self._size_no_padding = self._get_size_without_padding(record)
+        height, width, _ = self._albu_out["image"].shape
+        height, width = get_size_without_padding(
+            self.tfms_list, record.img, height, width
+        )
+        self._size_no_padding = ImgSize(width=width, height=height)
 
         # collect results
         for collect_op in sorted(self._collect_ops, key=lambda x: x.order):
@@ -295,24 +303,6 @@ def _filter_attribute(self, v: list):
         assert len(v) == len(self._keep_mask)
         return [o for o, keep in zip(v, self._keep_mask) if keep]
 
-    def _get_size_without_padding(self, record) -> ImgSize:
-        height, width, _ = self._albu_out["image"].shape
-
-        if get_transform(self.tfms_list, "Pad") is not None:
-            after_pad_h, after_pad_w, _ = np.array(record.img).shape
-
-            t = get_transform(self.tfms_list, "SmallestMaxSize")
-            if t is not None:
-                presize = t.max_size
-                height, width = _func_max_size(after_pad_h, after_pad_w, presize, min)
-
-            t = get_transform(self.tfms_list, "LongestMaxSize")
-            if t is not None:
-                size = t.max_size
-                height, width = _func_max_size(after_pad_h, after_pad_w, size, max)
-
-        return ImgSize(width=width, height=height)
-
 
 def _flatten_tfms(t):
     flat = []
@@ -330,26 +320,3 @@ def _is_iter(o):
         return True
     except:
         return False
-
-
-def get_transform(tfms_list, t):
-    for el in tfms_list:
-        if t in str(type(el)):
-            return el
-    return None
-
-
-def py3round(number):
-    """Unified rounding in all python versions."""
-    if abs(round(number) - number) == 0.5:
-        return int(2.0 * round(number / 2.0))
-
-    return int(round(number))
-
-
-def _func_max_size(height, width, max_size, func):
-    scale = max_size / float(func(width, height))
-
-    if scale != 1.0:
-        height, width = tuple(py3round(dim * scale) for dim in (height, width))
-    return height, width
Original file line number	Diff line number	Diff line change
`@@ -3,6 +3,7 @@`
`3`	`3`	`"predict_from_dl",`
`4`	`4`	`"convert_raw_prediction",`
`5`	`5`	`"convert_raw_predictions",`
	`6`	`+ "end2end_detect",`
`6`	`7`	`]`
`7`	`8`
`8`	`9`	`from icevision.models.torchvision.faster_rcnn.prediction import *`