select target object

fjsnogueira · Jul 30, 2022 · 8a4b0bc · 8a4b0bc
1 parent 6cc1b50
commit 8a4b0bc
Show file tree

Hide file tree

Showing 3 changed files with 30 additions and 19 deletions.
diff --git a/docs/DEMO.md b/docs/DEMO.md
@@ -45,6 +45,7 @@ python interactive_demo.py --video [path to the video] --num_objects 4
 * For "layered insertion" (e.g., the breakdance demo), use the "layered" overlay mode. You can load a custom layer using "Import layer". The layer should be an RGBA png file.
 * The "save overlay during propagation" checkbox does exactly that. It does not save the overlay when the user is just scrubbing the timeline.
 * For "popup" and "layered", the visualizations during propagation (and the saved overlays) have higher quality then when the user is scrubbing the timeline. This is because we have access to the soft probability mask during propagation.
+* Both "popup" and "layered" use a binary mask. By default, the first object mask is used. You can change the target object using the middle mouse key.
 
 ## FAQ
 

diff --git a/inference/interact/gui.py b/inference/interact/gui.py
@@ -337,23 +337,24 @@ def __init__(self, net: XMem,
         self.overlay_layer = None
         self.overlay_layer_torch = None
 
+        # the object id used for popup/layered overlay
+        self.vis_target_object = 1
+        # try to load the default overlay
+        self._try_load_layer('./docs/ECCV-logo.png')
+
         self.load_current_image_mask()
         self.show_current_frame()
         self.show()
 
         self.console_push_text('Initialized.')
         self.initialized = True
 
-        # try to load the default overlay
-        self._try_load_layer('./docs/ECCV-logo.png')
-
     def resizeEvent(self, event):
         self.show_current_frame()
 
     def console_push_text(self, text):
-        self.console.appendPlainText(text)
         self.console.moveCursor(QTextCursor.End)
-        print(text)
+        self.console.insertPlainText(text+'\n')
 
     def interaction_radio_clicked(self, event):
         self.last_interaction = self.curr_interaction
@@ -394,7 +395,8 @@ def load_current_torch_image_mask(self, no_mask=False):
             self.current_prob = index_numpy_to_one_hot_torch(self.current_mask, self.num_objects+1).cuda()
 
     def compose_current_im(self):
-        self.viz = get_visualization(self.viz_mode, self.current_image, self.current_mask, self.overlay_layer)
+        self.viz = get_visualization(self.viz_mode, self.current_image, self.current_mask, 
+                            self.overlay_layer, self.vis_target_object)
 
     def update_interact_vis(self):
         # Update the interactions without re-computing the overlay
@@ -434,7 +436,7 @@ def update_minimap(self):
     def update_current_image_fast(self):
         # fast path, uses gpu. Changes the image in-place to avoid copying
         self.viz = get_visualization_torch(self.viz_mode, self.current_image_torch_no_norm, 
-                    self.current_prob, self.overlay_layer_torch)
+                    self.current_prob, self.overlay_layer_torch, self.vis_target_object)
         if self.save_visualization:
             self.res_man.save_visualization(self.cursur, self.viz)
 
@@ -691,8 +693,16 @@ def on_mouse_press(self, event):
         if self.is_pos_out_of_bound(event.x(), event.y()):
             return
 
+        # mid-click
+        if (event.button() == Qt.MidButton):
+            ex, ey = self.get_scaled_pos(event.x(), event.y())
+            self.vis_target_object = self.current_mask[int(ey),int(ex)]
+            self.console_push_text(f'Target object for visualization changed to {self.vis_target_object}')
+            self.show_current_frame()
+            return
+
+        self.right_click = (event.button() == Qt.RightButton)
         self.pressed = True
-        self.right_click = (event.button() != 1)
 
         h, w = self.height, self.width
 

diff --git a/inference/interact/interactive_utils.py b/inference/interact/interactive_utils.py
@@ -37,21 +37,21 @@ def index_numpy_to_one_hot_torch(mask, num_classes):
 if torch.cuda.is_available():
     grayscale_weights_torch = torch.from_numpy(grayscale_weights).cuda().unsqueeze(0)
 
-def get_visualization(mode, image, mask, layer):
+def get_visualization(mode, image, mask, layer, target_object):
     if mode == 'fade':
         return overlay_davis(image, mask, fade=True)
     elif mode == 'davis':
         return overlay_davis(image, mask)
     elif mode == 'light':
         return overlay_davis(image, mask, 0.9)
     elif mode == 'popup':
-        return overlay_popup(image, mask)
+        return overlay_popup(image, mask, target_object)
     elif mode == 'layered':
         if layer is None:
             print('Layer file not given. Defaulting to DAVIS.')
             return overlay_davis(image, mask)
         else:
-            return overlay_layer(image, mask, layer)
+            return overlay_layer(image, mask, layer, target_object)
     else:
         raise NotImplementedError
 
@@ -86,20 +86,20 @@ def overlay_davis(image, mask, alpha=0.5, fade=False):
         im_overlay[~binary_mask] = im_overlay[~binary_mask] * 0.6
     return im_overlay.astype(image.dtype)
 
-def overlay_popup(image, mask):
+def overlay_popup(image, mask, target_object):
     # Keep foreground colored. Convert background to grayscale.
     im_overlay = image.copy()
 
-    binary_mask = ~(mask > 0)
+    binary_mask = ~(mask == target_object)
     colored_region = (im_overlay[binary_mask]*grayscale_weights).sum(-1, keepdims=-1)
     im_overlay[binary_mask] = colored_region
     return im_overlay.astype(image.dtype)
 
-def overlay_layer(image, mask, layer):
+def overlay_layer(image, mask, layer, target_object):
     # insert a layer between foreground and background
     # The CPU version is less accurate because we are using the hard mask
     # The GPU version has softer edges as it uses soft probabilities
-    obj_mask = (mask > 0).astype(np.float32)
+    obj_mask = (mask == target_object).astype(np.float32)
     layer_alpha = layer[:, :, 3].astype(np.float32) / 255
     layer_rgb = layer[:, :, :3]
     background_alpha = np.maximum(obj_mask, layer_alpha)[:,:,np.newaxis]
@@ -127,11 +127,11 @@ def overlay_davis_torch(image, mask, alpha=0.5, fade=False):
 
     return im_overlay
 
-def overlay_popup_torch(image, mask):
+def overlay_popup_torch(image, mask, target_object):
     # Keep foreground colored. Convert background to grayscale.
     image = image.permute(1, 2, 0)
 
-    obj_mask = mask[1:].max(dim=0)[0].unsqueeze(2)
+    obj_mask = mask[target_object].unsqueeze(2)
     gray_image = (image*grayscale_weights_torch).sum(-1, keepdim=True)
     im_overlay = obj_mask*image + (1-obj_mask)*gray_image
 
@@ -140,13 +140,13 @@ def overlay_popup_torch(image, mask):
 
     return im_overlay
 
-def overlay_layer_torch(image, mask, layer):
+def overlay_layer_torch(image, mask, layer, target_object):
     # insert a layer between foreground and background
     # The CPU version is less accurate because we are using the hard mask
     # The GPU version has softer edges as it uses soft probabilities
     image = image.permute(1, 2, 0)
 
-    obj_mask = mask[1:].max(dim=0)[0]
+    obj_mask = mask[target_object]
     mask = torch.argmax(mask, dim=0)
     layer_alpha = layer[:, :, 3]
     layer_rgb = layer[:, :, :3]