Decoupling optical flow post-processing, supporting direct prediction (…

…PaddlePaddle#1413)
LeoncCheng · Sep 30, 2021 · fab0209 · fab0209
1 parent 314a1da
commit fab0209
Show file tree

Hide file tree

Showing 3 changed files with 76 additions and 45 deletions.
diff --git a/contrib/PP-HumanSeg/README.md b/contrib/PP-HumanSeg/README.md
@@ -110,7 +110,6 @@ python data/download_data.py
 ```
 
 ### 视频流人像分割
-结合DIS（Dense Inverse Search-basedmethod）光流算法预测结果与分割结果，改善视频流人像分割。
 ```bash
 # 通过电脑摄像头进行实时分割处理
 python bg_replace.py \
@@ -126,6 +125,14 @@ python bg_replace.py \
 
 <img src="https://paddleseg.bj.bcebos.com/humanseg/data/video_test.gif" width="20%" height="20%"><img src="https://paddleseg.bj.bcebos.com/humanseg/data/result.gif" width="20%" height="20%">
 
+我们也支持使用 DIS（Dense Inverse Search-basedmethod）光流后处理算法，通过结合光流结果与分割结果，减少视频预测前后帧闪烁的问题。只要使用`--use_optic_flow`即可开启光流后处理，例如
+```bash
+# 增加光流后处理
+python bg_replace.py \
+--config export_model/ppseg_lite_portrait_398x224_with_softmax/deploy.yaml \
+--use_optic_flow
+```
+
 ### 视频流背景替换
 根据所选背景进行背景替换，背景可以是一张图片，也可以是一段视频。
 ```bash
@@ -172,7 +179,7 @@ python pretrained_model/download_pretrained_model.py
 ```
 
 ### 训练
-演示基于上述模型进行Fine-tuning。我们使用抽取的mini_supervisely数据集作为示例数据集，以PP-HumanSeg-Mobile为例，训练命令如下：
+演示如何基于上述模型进行Fine-tuning。我们使用抽取的mini_supervisely数据集作为示例数据集，以PP-HumanSeg-Mobile为例，训练命令如下：
 ```bash
 export CUDA_VISIBLE_DEVICES=0 # 设置1张可用的卡
 # windows下请执行以下命令

diff --git a/contrib/PP-HumanSeg/bg_replace.py b/contrib/PP-HumanSeg/bg_replace.py
@@ -25,7 +25,8 @@
 
 
 def parse_args():
-    parser = argparse.ArgumentParser(description='PP-HumanSeg inference for video')
+    parser = argparse.ArgumentParser(
+        description='PP-HumanSeg inference for video')
     parser.add_argument(
         "--config",
         dest="cfg",
@@ -73,15 +74,21 @@ def parse_args():
         default='./output')
 
     parser.add_argument(
-        '--with_argmax',
-        dest='with_argmax',
-        help='Perform argmax operation on the predict result.',
+        '--use_optic_flow',
+        dest='use_optic_flow',
+        help='Use optical flow for post-processing.',
         action='store_true')
     parser.add_argument(
-        '--not_soft_predict',
-        dest='not_soft_predict',
-        help=
-        'If this is turned on, the prediction result will be output directly without using soft predict',
+        '--soft_predict',
+        dest='soft_predict',
+        default=True,
+        type=eval,
+        choices=[True, False],
+        help='Whether to use predict results with transparency')
+    parser.add_argument(
+        '--add_argmax',
+        dest='add_argmax',
+        help='Perform argmax operation on the predict result.',
         action='store_true')
 
     parser.add_argument(

diff --git a/contrib/PP-HumanSeg/deploy/infer.py b/contrib/PP-HumanSeg/deploy/infer.py
@@ -110,48 +110,65 @@ def run(self, img, bg):
         output_names = self.predictor.get_output_names()
         output_handle = self.predictor.get_output_handle(output_names[0])
         output = output_handle.copy_to_cpu()
-
         return self.postprocess(output, img, ori_shapes[0], bg)
 
     def postprocess(self, pred, img, ori_shape, bg):
         if not os.path.exists(self.args.save_dir):
             os.makedirs(self.args.save_dir)
-        resize_w = pred.shape[3]
-        resize_h = pred.shape[2]
-        if self.args.with_argmax:
-            pred = reverse_transform(
-                paddle.to_tensor(pred), ori_shape, self.cfg.transforms)
-            result = np.argmax(np.array(pred), axis=0)
-        elif not self.args.not_soft_predict:
-            score_map = pred[:, 1, :, :].squeeze(0)
-            score_map = 255 * score_map
-
-            cur_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-            cur_gray = cv2.resize(cur_gray, (resize_w, resize_h))
-            optflow_map = optic_flow_process(cur_gray, score_map, self.prev_gray, self.prev_cfd, \
-                    self.disflow, self.is_init)
-            self.prev_gray = cur_gray.copy()
-            self.prev_cfd = optflow_map.copy()
-            self.is_init = False
-
-            score_map = np.repeat(optflow_map[:, :, np.newaxis], 3, axis=2)
-            score_map = np.transpose(score_map, [2, 0, 1])[np.newaxis, ...]
-            score_map = reverse_transform(
-                paddle.to_tensor(score_map),
-                ori_shape,
-                self.cfg.transforms,
-                mode='bilinear')
-            score_map = np.transpose(score_map.numpy().squeeze(0),
+        resize_w = pred.shape[-1]
+        resize_h = pred.shape[-2]
+        if self.args.soft_predict:
+            if self.args.use_optic_flow:
+                score_map = pred[:, 1, :, :].squeeze(0)
+                score_map = 255 * score_map
+                cur_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+                cur_gray = cv2.resize(cur_gray, (resize_w, resize_h))
+                optflow_map = optic_flow_process(cur_gray, score_map, self.prev_gray, self.prev_cfd, \
+                        self.disflow, self.is_init)
+                self.prev_gray = cur_gray.copy()
+                self.prev_cfd = optflow_map.copy()
+                self.is_init = False
+
+                score_map = np.repeat(optflow_map[:, :, np.newaxis], 3, axis=2)
+                score_map = np.transpose(score_map, [2, 0, 1])[np.newaxis, ...]
+                score_map = reverse_transform(
+                    paddle.to_tensor(score_map),
+                    ori_shape,
+                    self.cfg.transforms,
+                    mode='bilinear')
+                alpha = np.transpose(score_map.numpy().squeeze(0),
                                      [1, 2, 0]) / 255
-            h, w, _ = img.shape
-            bg = cv2.resize(bg, (w, h))
-            if bg.ndim == 2:
-                bg = bg[..., np.newaxis]
-
-            result = (score_map * img + (1 - score_map) * bg).astype(np.uint8)
+            else:
+                score_map = pred[:, 1, :, :]
+                score_map = score_map[np.newaxis, ...]
+                score_map = reverse_transform(
+                    paddle.to_tensor(score_map),
+                    ori_shape,
+                    self.cfg.transforms,
+                    mode='bilinear')
+                alpha = np.transpose(score_map.numpy().squeeze(0), [1, 2, 0])
 
         else:
+            if pred.ndim == 3:
+                pred = pred[:, np.newaxis, ...]
             result = reverse_transform(
-                paddle.to_tensor(pred), ori_shape, self.cfg.transforms)
+                paddle.to_tensor(pred, dtype='float32'),
+                ori_shape,
+                self.cfg.transforms,
+                mode='bilinear')
 
-        return result
+            result = np.array(result)
+            if self.args.add_argmax:
+                result = np.argmax(result, axis=1)
+            else:
+                result = result.squeeze(1)
+            alpha = np.transpose(result, [1, 2, 0])
+
+        # background replace
+        h, w, _ = img.shape
+        bg = cv2.resize(bg, (w, h))
+        if bg.ndim == 2:
+            bg = bg[..., np.newaxis]
+
+        comb = (alpha * img + (1 - alpha) * bg).astype(np.uint8)
+        return comb