Skip to content

Commit

Permalink
select target object
Browse files Browse the repository at this point in the history
  • Loading branch information
hkchengrex committed Jul 30, 2022
1 parent 6cc1b50 commit 8a4b0bc
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 19 deletions.
1 change: 1 addition & 0 deletions docs/DEMO.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ python interactive_demo.py --video [path to the video] --num_objects 4
* For "layered insertion" (e.g., the breakdance demo), use the "layered" overlay mode. You can load a custom layer using "Import layer". The layer should be an RGBA png file.
* The "save overlay during propagation" checkbox does exactly that. It does not save the overlay when the user is just scrubbing the timeline.
* For "popup" and "layered", the visualizations during propagation (and the saved overlays) have higher quality then when the user is scrubbing the timeline. This is because we have access to the soft probability mask during propagation.
* Both "popup" and "layered" use a binary mask. By default, the first object mask is used. You can change the target object using the middle mouse key.

## FAQ

Expand Down
26 changes: 18 additions & 8 deletions inference/interact/gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,23 +337,24 @@ def __init__(self, net: XMem,
self.overlay_layer = None
self.overlay_layer_torch = None

# the object id used for popup/layered overlay
self.vis_target_object = 1
# try to load the default overlay
self._try_load_layer('./docs/ECCV-logo.png')

self.load_current_image_mask()
self.show_current_frame()
self.show()

self.console_push_text('Initialized.')
self.initialized = True

# try to load the default overlay
self._try_load_layer('./docs/ECCV-logo.png')

def resizeEvent(self, event):
self.show_current_frame()

def console_push_text(self, text):
self.console.appendPlainText(text)
self.console.moveCursor(QTextCursor.End)
print(text)
self.console.insertPlainText(text+'\n')

def interaction_radio_clicked(self, event):
self.last_interaction = self.curr_interaction
Expand Down Expand Up @@ -394,7 +395,8 @@ def load_current_torch_image_mask(self, no_mask=False):
self.current_prob = index_numpy_to_one_hot_torch(self.current_mask, self.num_objects+1).cuda()

def compose_current_im(self):
self.viz = get_visualization(self.viz_mode, self.current_image, self.current_mask, self.overlay_layer)
self.viz = get_visualization(self.viz_mode, self.current_image, self.current_mask,
self.overlay_layer, self.vis_target_object)

def update_interact_vis(self):
# Update the interactions without re-computing the overlay
Expand Down Expand Up @@ -434,7 +436,7 @@ def update_minimap(self):
def update_current_image_fast(self):
# fast path, uses gpu. Changes the image in-place to avoid copying
self.viz = get_visualization_torch(self.viz_mode, self.current_image_torch_no_norm,
self.current_prob, self.overlay_layer_torch)
self.current_prob, self.overlay_layer_torch, self.vis_target_object)
if self.save_visualization:
self.res_man.save_visualization(self.cursur, self.viz)

Expand Down Expand Up @@ -691,8 +693,16 @@ def on_mouse_press(self, event):
if self.is_pos_out_of_bound(event.x(), event.y()):
return

# mid-click
if (event.button() == Qt.MidButton):
ex, ey = self.get_scaled_pos(event.x(), event.y())
self.vis_target_object = self.current_mask[int(ey),int(ex)]
self.console_push_text(f'Target object for visualization changed to {self.vis_target_object}')
self.show_current_frame()
return

self.right_click = (event.button() == Qt.RightButton)
self.pressed = True
self.right_click = (event.button() != 1)

h, w = self.height, self.width

Expand Down
22 changes: 11 additions & 11 deletions inference/interact/interactive_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,21 +37,21 @@ def index_numpy_to_one_hot_torch(mask, num_classes):
if torch.cuda.is_available():
grayscale_weights_torch = torch.from_numpy(grayscale_weights).cuda().unsqueeze(0)

def get_visualization(mode, image, mask, layer):
def get_visualization(mode, image, mask, layer, target_object):
if mode == 'fade':
return overlay_davis(image, mask, fade=True)
elif mode == 'davis':
return overlay_davis(image, mask)
elif mode == 'light':
return overlay_davis(image, mask, 0.9)
elif mode == 'popup':
return overlay_popup(image, mask)
return overlay_popup(image, mask, target_object)
elif mode == 'layered':
if layer is None:
print('Layer file not given. Defaulting to DAVIS.')
return overlay_davis(image, mask)
else:
return overlay_layer(image, mask, layer)
return overlay_layer(image, mask, layer, target_object)
else:
raise NotImplementedError

Expand Down Expand Up @@ -86,20 +86,20 @@ def overlay_davis(image, mask, alpha=0.5, fade=False):
im_overlay[~binary_mask] = im_overlay[~binary_mask] * 0.6
return im_overlay.astype(image.dtype)

def overlay_popup(image, mask):
def overlay_popup(image, mask, target_object):
# Keep foreground colored. Convert background to grayscale.
im_overlay = image.copy()

binary_mask = ~(mask > 0)
binary_mask = ~(mask == target_object)
colored_region = (im_overlay[binary_mask]*grayscale_weights).sum(-1, keepdims=-1)
im_overlay[binary_mask] = colored_region
return im_overlay.astype(image.dtype)

def overlay_layer(image, mask, layer):
def overlay_layer(image, mask, layer, target_object):
# insert a layer between foreground and background
# The CPU version is less accurate because we are using the hard mask
# The GPU version has softer edges as it uses soft probabilities
obj_mask = (mask > 0).astype(np.float32)
obj_mask = (mask == target_object).astype(np.float32)
layer_alpha = layer[:, :, 3].astype(np.float32) / 255
layer_rgb = layer[:, :, :3]
background_alpha = np.maximum(obj_mask, layer_alpha)[:,:,np.newaxis]
Expand Down Expand Up @@ -127,11 +127,11 @@ def overlay_davis_torch(image, mask, alpha=0.5, fade=False):

return im_overlay

def overlay_popup_torch(image, mask):
def overlay_popup_torch(image, mask, target_object):
# Keep foreground colored. Convert background to grayscale.
image = image.permute(1, 2, 0)

obj_mask = mask[1:].max(dim=0)[0].unsqueeze(2)
obj_mask = mask[target_object].unsqueeze(2)
gray_image = (image*grayscale_weights_torch).sum(-1, keepdim=True)
im_overlay = obj_mask*image + (1-obj_mask)*gray_image

Expand All @@ -140,13 +140,13 @@ def overlay_popup_torch(image, mask):

return im_overlay

def overlay_layer_torch(image, mask, layer):
def overlay_layer_torch(image, mask, layer, target_object):
# insert a layer between foreground and background
# The CPU version is less accurate because we are using the hard mask
# The GPU version has softer edges as it uses soft probabilities
image = image.permute(1, 2, 0)

obj_mask = mask[1:].max(dim=0)[0]
obj_mask = mask[target_object]
mask = torch.argmax(mask, dim=0)
layer_alpha = layer[:, :, 3]
layer_rgb = layer[:, :, :3]
Expand Down

0 comments on commit 8a4b0bc

Please sign in to comment.