up

neoguojing · Aug 18, 2024 · a7a0bc3 · a7a0bc3
1 parent 9055c9a
commit a7a0bc3
Show file tree

Hide file tree

Showing 7 changed files with 51 additions and 39 deletions.
diff --git a/detectron/demo/event_handler.py b/detectron/demo/event_handler.py
@@ -98,7 +98,7 @@ def create_event_handlers():
     )
 
     components["face_submit_btn"].click(
-        do_face_refernce,gradio('face_type','face_input'),gradio("face_output",'face_image_output')
+        do_face_refernce,gradio('face_type','face_input','face_input2'),gradio("face_output",'face_image_output')
     )
 
     components["sam_version"].change(
@@ -220,25 +220,21 @@ def ui_by_facetype(face_type):
     print("ui_by_facetype",face_type)
 
 
-def do_face_refernce(algo_type,input_images):
-    print("input image",input_images)
+def do_face_refernce(algo_type,input_image,input_image1):
+    print("input image",input_image)
     print(algo_type)
 
-    if input_images is None:
+    if input_image is None:
         gr.Warning('请上传图片')
         return None,None
 
-    input1 = input_images[0][0]
-    input2 = None
     algo_type = face_algo_map[algo_type]
-    if algo_type == "compare" and len(input_images) >=2:
-        input2 = input_images[1][0]
-    elif algo_type == "compare" and len(input_images) < 2:
+    if algo_type == "compare" and input_image1 is None:
         gr.Warning('请上传两张图片')    
         return None,None
 
     m = FaceAlgo()  # pragma: no cover
-    out,faces = m.predict(pil_image=input1,pil_image1=input2,algo_type=algo_type)
+    out,faces = m.predict(pil_image=input_image,pil_image1=input_image1,algo_type=algo_type)
 
     return out,faces
 

diff --git a/detectron/demo/face.py b/detectron/demo/face.py
@@ -118,15 +118,18 @@ def recognition(self,a):
         #face recognition
         dfs = DeepFace.find(
             img_path = a, 
-            db_path = "./test/", 
+            db_path = "./examples/", 
             detector_backend = self.backends[1],
             distance_metric = self.distance_metric[0],
         )
-
+        print(dfs)
         json_list = [df.to_json(orient='records') for df in dfs]
-        top1_path = dfs[0].at[0, 'identity']
-        top1_pil = Image.open(top1_path)
-        return json_list,[top1_pil]
+        # top1_path = dfs[0].at[0, 'identity']
+        # top1_pil = Image.open(top1_path)
+
+        identities = [identity for df in dfs for identity in df['identity'].tolist()]
+        print(identities)
+        return json_list,identities
 
     def embeddings(self,a):
         #embeddings

diff --git a/detectron/demo/sam_everything.py b/detectron/demo/sam_everything.py
@@ -265,15 +265,22 @@ def seg_with_promp(self, input_image=None, video_dir=None,point_coords=None, box
 
         if video_dir is not None:
             video_dir,frame_names,fps,frame_size = self.extract_frames(video_dir)
+            print(video_dir,frame_names[0],fps,frame_size)
             fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # 使用 'mp4v' 编码器
-            out_video_dir = os.path.join(video_dir,"after_inference.mp4")
+            out_video_dir = os.path.join("./output","after_inference.mp4")
+            print("out_video_dir:",out_video_dir)
             video_writer = cv2.VideoWriter(out_video_dir, fourcc, fps, frame_size)
             with torch.inference_mode(), torch.autocast(self.device, dtype=torch.bfloat16):
                 state = self.video_predictor.init_state(video_path=video_dir)
 
                 ann_frame_idx = 0  # the frame index we interact with
                 ann_obj_id = 1  # give a unique id to each object we interact with (it can be any integers)
 
+                if point_coords is None and box is None:
+                    point_coords = np.array([[frame_size[0]/2, frame_size[1]/2]], dtype=np.float32)
+                    # for labels, `1` means positive click and `0` means negative click
+                    point_labels = np.array([1], np.int32)
+
                 # add new prompts and instantly get the output on the same frame
                 frame_idx, object_ids, masks = self.video_predictor.add_new_points_or_box(
                     inference_state=state,
@@ -290,17 +297,16 @@ def seg_with_promp(self, input_image=None, video_dir=None,point_coords=None, box
                         out_obj_id: (out_mask_logits[i] > 0.0).cpu().numpy()
                         for i, out_obj_id in enumerate(out_obj_ids)
                     }
-                    mask = (out_mask_logits > 0.0).cpu().numpy()
+                    mask = (out_mask_logits[0] > 0.0).cpu().numpy()
                     frame = cv2.imread(os.path.join(video_dir, frame_names[out_frame_idx]))
-                    print(out_frame_idx,out_obj_ids,out_mask_logits.shape,frame.shape)
+                    print(out_frame_idx,out_obj_ids,mask.shape,frame.shape)
                     np_image = self.draw_bitmask(frame, mask,pil_image=False)
                     video_writer.write(np_image)
-                    if out_frame_idx % 30 == 0:
+                    if out_frame_idx % fps == 0:
                         pil_image = Image.fromarray(np_image)
                         yield [pil_image],None
                 yield None,out_video_dir
             video_writer.release()
-        print("seg_with_promp:", masks.shape)
 
 
     def seg_all(self, input_image):
@@ -333,15 +339,16 @@ def extract_frames(self,video_path):
                 break
 
             # 构建保存帧的文件名
-            filename = f"{output_dir}/{count:05d}.jpg"
+            filename = os.path.join(output_dir,f"{count:05d}.jpg")
+            print(filename)
             # 保存帧为图片
             cv2.imwrite(filename, frame)
             count += 1
             frame_names.append(filename)
 
         # 释放视频对象
         cap.release()
-        return output_dir,filename,fps,frame_size
+        return output_dir,frame_names,fps,frame_size
 
     @staticmethod
     def draw_bitmask_split(np_image, masks):

diff --git a/detectron/demo/ui.py b/detectron/demo/ui.py
@@ -16,7 +16,9 @@
 
 # 获取当前文件所在的目录
 current_directory = current_file_path.parent
-print(current_directory)
+
+width = 713
+height = int(width / 1.618)
 
 def create_ui():
     with gr.Blocks() as demo:
@@ -33,7 +35,7 @@ def create_ui():
                 with gr.Column(scale=2):
                     with gr.Row(elem_id='audio-container'):
                         with gr.Group():
-                            components["image_input"] = gr.Image(type="pil",elem_id='image-input',label='输入')
+                            components["image_input"] = gr.Image(type="pil",elem_id='image-input',label='输入',width=width,height=height)
                             components["base_examples"] = gr.Examples(
                                 examples=[
                                     [os.path.join(current_directory,"examples/horse.png")],
@@ -49,7 +51,7 @@ def create_ui():
                 with gr.Column(scale=2):
                     with gr.Row():
                         with gr.Group():
-                            components["image_output"] = gr.Image(type="pil",elem_id='image-output',label='输出',interactive=False)
+                            components["image_output"] = gr.Image(type="pil",elem_id='image-output',label='输出',interactive=False,width=width,height=height)
 
             with gr.Row():
                 with gr.Group():
@@ -72,8 +74,8 @@ def create_ui():
                         with gr.Column(scale=2):
                             components["yolo_dist_b"] = gr.Number(value=0,label="B",visible=False)
                         with gr.Group():
-                            components["yolo_image_input"] = gr.Image(type="pil",elem_id='image-input',label='输入')
-                            components["yolo_video_input"] = gr.Video(label='输入',visible=False,interactive=True)
+                            components["yolo_image_input"] = gr.Image(type="pil",elem_id='image-input',label='输入',width=width,height=height)
+                            components["yolo_video_input"] = gr.Video(label='输入',visible=False,interactive=True,width=width,height=height)
                             components["yolo_image_examples"] = gr.Examples(
                                 examples=[
                                     [os.path.join(current_directory,"examples/horse.png")],
@@ -99,8 +101,8 @@ def create_ui():
                 with gr.Column(scale=2):
                     with gr.Row():
                         with gr.Group():
-                            components["yolo_image_output"] = gr.Image(type="pil",elem_id='image-output',label='输出',interactive=False)
-                            components["yolo_video_output"] = gr.PlayableVideo(label='输出',visible=False)
+                            components["yolo_image_output"] = gr.Image(type="pil",elem_id='image-output',label='输出',interactive=False,width=width,height=height)
+                            components["yolo_video_output"] = gr.PlayableVideo(label='输出',visible=False,width=width,height=height)
                             components["yolo_crop_output"] = gr.Gallery(type="pil",elem_id='crop-output',label='裁剪',interactive=False,visible=True)
 
             with gr.Row():
@@ -120,11 +122,12 @@ def create_ui():
                 with gr.Column(scale=2):
                     with gr.Row(elem_id=''):
                         with gr.Group():
-                            components["face_input"] = gr.Gallery(elem_id='face-input',label='输入',columns=2,type="pil")
+                            components["face_input"] = gr.Image(elem_id='face-input',label='输入',type="pil",width=width,height=height)
+                            components["face_input2"] = gr.Image(elem_id='face_input2',label='输入-比对',type="pil",width=width,height=height)
                             components["face_examples"] = gr.Examples(
                                 examples=[
-                                    [[("face1",os.path.join(current_directory,"examples/face1.jpeg"))]],
-                                    [[(os.path.join(current_directory,"examples/face2.jpeg"),"face2")]],
+                                    [os.path.join(current_directory,"examples/face1.jpeg")],
+                                    [os.path.join(current_directory,"examples/face2.jpeg")],
                                 ],
                                 inputs=[components["face_input"]],
                                 examples_per_page=6,
@@ -152,7 +155,7 @@ def create_ui():
                 with gr.Column(scale=2):
                     with gr.Row(elem_id=''):
                         with gr.Group():
-                            components["ocr_input"] = gr.Image(elem_id='ocr-input',label='输入',type="pil")
+                            components["ocr_input"] = gr.Image(elem_id='ocr-input',label='输入',type="pil",width=width,height=height)
                             components["ocr_examples"] = gr.Examples(
                                 examples=[
                                     [os.path.join(current_directory,"examples/json.png")],
@@ -165,7 +168,7 @@ def create_ui():
                 with gr.Column(scale=2):
                     with gr.Row():
                         with gr.Group():
-                            components["ocr_output"] = gr.Image(elem_id='ocr_output',label='输出',interactive=False,type="pil")
+                            components["ocr_output"] = gr.Image(elem_id='ocr_output',label='输出',interactive=False,type="pil",width=width,height=height)
             with gr.Row():
                 with gr.Group():
                     components["ocr_json_output"] = gr.JSON(label="推理结果")
@@ -181,8 +184,8 @@ def create_ui():
             with gr.Row():
                 with gr.Column(scale=2):
                     with gr.Group():
-                        components["sam_input"] = ImagePrompter(elem_id='sam-input',label='输入',type="pil")
-                        components["sam_video_input"] = gr.Video(label='视频输入',visible=False,interactive=True)
+                        components["sam_input"] = ImagePrompter(elem_id='sam-input',label='输入',type="pil",width=width,height=height)
+                        components["sam_video_input"] = gr.Video(label='视频输入',visible=False,interactive=True,width=width,height=height)
                         components["sam_image_examples"] = gr.Examples(
                             examples=[
                                 [{'image': os.path.join(current_directory,"examples/horse.png"), 'points': []}],
@@ -206,7 +209,7 @@ def create_ui():
                 with gr.Column(scale=2):
                     with gr.Group():
                         components["sam_output"] = gr.Gallery(elem_id='sam_output',label='输出',columns=1,interactive=False)
-                        components["sam_video_output"] = gr.PlayableVideo(label='输出',visible=False)
+                        components["sam_video_output"] = gr.PlayableVideo(label='输出',visible=False,width=width,height=height)
 
         with gr.Tab("知识库"):
             with gr.Row():

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -10,6 +10,9 @@ services:
       - /data/.deepface:/root/.deepface
       - /data/.torch:/root/.torch
       - /data/gradio:/tmp/gradio
+      - /data/gradio/examples:/workspace/detectron/demo/examples
+      - /data/gradio/output:/workspace/detectron/demo/output
+      - /data/gradio/knowledge_bases:/workspace/detectron/demo/knowledge_bases
     deploy:
       resources:
         reservations:

diff --git a/requirements-docker.txt b/requirements-docker.txt
@@ -4,7 +4,7 @@ facenet_pytorch
 ultralytics
 scikit-image==0.23.2
 torch>=2.3.1
-gradio==4.36.0
+gradio==4.41.0
 fvcore==0.1.5.post20221221
 omegaconf==2.3.0
 pycocotools==2.0.7

diff --git a/requirements.txt b/requirements.txt
@@ -5,7 +5,7 @@ torch>=2.3.1
 ultralytics
 segment_anything==1.0
 scikit-image==0.23.2
-gradio==4.36.0
+gradio==4.41.0
 fvcore==0.1.5.post20221221
 omegaconf==2.3.0
 pycocotools==2.0.7