yologesture主要代码

watchGT · Apr 19, 2023 · cff636e · cff636e
1 parent d3a0fca
commit cff636e
Show file tree

Hide file tree

Showing 38 changed files with 5,517 additions and 0 deletions.
diff --git a/Pipfile b/Pipfile
@@ -0,0 +1,19 @@
+[[source]]
+name = "pypi"
+url = "https://pypi.org/simple"
+verify_ssl = true
+
+[dev-packages]
+
+[packages]
+streamlit = ">0.49.0"
+opencv-python = "*"
+numpy = "*"
+torchvision = "0.9.1"
+torch = "1.8.1"
+Pillow = "8.2.0"
+pyyaml = "6.0"
+matplotlib = "*"
+opencv-python-headless = "4.5.2.52"
+av = "*"
+streamlit-webrtc = "0.36.1"
diff --git a/gen_annotation.py b/gen_annotation.py
@@ -0,0 +1,62 @@
+from lxml import etree
+
+class GEN_Annotations:
+    def __init__(self, filename):
+        self.root = etree.Element("annotation")
+
+        child1 = etree.SubElement(self.root, "folder")
+        child1.text = "VOC2007"
+
+        child2 = etree.SubElement(self.root, "filename")
+        child2.text = filename
+
+        child3 = etree.SubElement(self.root, "source")
+
+        child4 = etree.SubElement(child3, "annotation")
+        child4.text = "PASCAL VOC2007"
+        child5 = etree.SubElement(child3, "database")
+        child5.text = "Unknown"
+
+##        child6 = etree.SubElement(child3, "image")
+##        child6.text = "flickr"
+##        child7 = etree.SubElement(child3, "flickrid")
+##        child7.text = "35435"
+
+
+    def set_size(self,witdh,height,channel):
+        size = etree.SubElement(self.root, "size")
+        widthn = etree.SubElement(size, "width")
+        widthn.text = str(witdh)
+        heightn = etree.SubElement(size, "height")
+        heightn.text = str(height)
+        channeln = etree.SubElement(size, "depth")
+        channeln.text = str(channel)
+    def savefile(self,filename):
+        tree = etree.ElementTree(self.root)
+        tree.write(filename, pretty_print=True, xml_declaration=False, encoding='utf-8')
+    def add_pic_attr(self,label,xmin,ymin,xmax,ymax):
+        object = etree.SubElement(self.root, "object")
+        namen = etree.SubElement(object, "name")
+        namen.text = label
+        bndbox = etree.SubElement(object, "bndbox")
+        xminn = etree.SubElement(bndbox, "xmin")
+        xminn.text = str(xmin)
+        yminn = etree.SubElement(bndbox, "ymin")
+        yminn.text = str(ymin)
+        xmaxn = etree.SubElement(bndbox, "xmax")
+        xmaxn.text = str(xmax)
+        ymaxn = etree.SubElement(bndbox, "ymax")
+        ymaxn.text = str(ymax)
+
+
+if __name__ == '__main__':
+    filename="000001.jpg"
+    anno= GEN_Annotations(filename)
+    anno.set_size(1280,720,3)
+    for i in range(3):
+        xmin=i+1
+        ymin=i+10
+        xmax=i+100
+        ymax=i+100
+        anno.add_pic_attr("pikachu",xmin,ymin,xmax,ymax)
+    anno.savefile("00001.xml")
diff --git a/gesture_streamlit.py b/gesture_streamlit.py
@@ -0,0 +1,179 @@
+"""Create an Object Detection Web App using PyTorch and Streamlit."""
+# import libraries
+from PIL import Image
+from torchvision import models, transforms
+import torch
+import streamlit as st
+from yolo import YOLO
+import os
+import urllib
+# 设置网页的icon
+st.set_page_config(page_title='Gesture Detector', page_icon='✌',
+                   layout='centered', initial_sidebar_state='expanded')
+
+def main():
+    # Render the readme as markdown using st.markdown.
+    readme_text = st.markdown(open("instructions.md",encoding='utf-8').read())
+
+    # Download external dependencies.
+    for filename in EXTERNAL_DEPENDENCIES.keys():
+        download_file(filename)
+
+    # Once we have the dependencies, add a selector for the app mode on the sidebar.
+    st.sidebar.title("What to do")
+    app_mode = st.sidebar.selectbox("Choose the app mode",
+        ["Show instructions", "Run the app", "Show the source code"])
+    if app_mode == "Show instructions":
+        st.sidebar.success('To continue select "Run the app".')
+    elif app_mode == "Show the source code":
+        readme_text.empty()
+        st.code(open("gesture_streamlit.py",encoding='utf-8').read())
+    elif app_mode == "Run the app":
+        readme_text.empty()
+        run_the_app()
+
+# External files to download.
+EXTERNAL_DEPENDENCIES = {
+    "yolotiny_ep100.pth": {
+        "url": "https://github.com/Dreaming-future/my_weights/releases/download/v1.0/yolotiny_ep100.pth",
+        "size": 23627989
+    },
+    "yolotiny_SE_ep100.pth": {
+        "url": "https://github.com/Dreaming-future/my_weights/releases/download/v1.0/yolotiny_SE_ep100.pth",
+        "size": 23802697
+    },
+    "yolotiny_CBAM_ep100.pth":{
+        "url": "https://github.com/Dreaming-future/my_weights/releases/download/v1.0/yolotiny_CBAM_ep100.pth",
+        "size": 23978051
+    },
+    "yolotiny_ECA_ep100.pth":{
+        "url": "https://github.com/Dreaming-future/my_weights/releases/download/v1.0/yolotiny_ECA_ep100.pth",
+        "size": 23629391
+    },
+
+}
+
+
+# This file downloader demonstrates Streamlit animation.
+def download_file(file_path):
+    # Don't download the file twice. (If possible, verify the download using the file length.)
+    if os.path.exists(file_path):
+        if "size" not in EXTERNAL_DEPENDENCIES[file_path]:
+            return
+        elif os.path.getsize(file_path) == EXTERNAL_DEPENDENCIES[file_path]["size"]:
+            return
+    # print(os.path.getsize(file_path))
+    # These are handles to two visual elements to animate.
+    weights_warning, progress_bar = None, None
+    try:
+        weights_warning = st.warning("Downloading %s..." % file_path)
+        progress_bar = st.progress(0)
+        with open(file_path, "wb") as output_file:
+            with urllib.request.urlopen(EXTERNAL_DEPENDENCIES[file_path]["url"]) as response:
+                length = int(response.info()["Content-Length"])
+                counter = 0.0
+                MEGABYTES = 2.0 ** 20.0
+                while True:
+                    data = response.read(8192)
+                    if not data:
+                        break
+                    counter += len(data)
+                    output_file.write(data)
+
+                    # We perform animation by overwriting the elements.
+                    weights_warning.warning("Downloading %s... (%6.2f/%6.2f MB)" %
+                        (file_path, counter / MEGABYTES, length / MEGABYTES))
+                    progress_bar.progress(min(counter / length, 1.0))
+    except Exception as e:
+        print(e)
+    # Finally, we remove these visual elements by calling .empty().
+    finally:
+        if weights_warning is not None:
+            weights_warning.empty()
+        if progress_bar is not None:
+            progress_bar.empty()
+
+# This is the main app app itself, which appears when the user selects "Run the app".
+def run_the_app():    
+    class Config():
+        def __init__(self, weights = 'yolotiny_ep100.pth', tiny = True, phi = 0, shape = 416,nms_iou = 0.3, confidence = 0.5):
+            self.weights = weights
+            self.tiny = tiny
+            self.phi = phi
+            self.cuda = False
+            self.shape = shape
+            self.confidence = confidence
+            self.nms_iou = nms_iou
+    # set title of app
+    st.markdown('<h1 align="center">✌ Gesture Detection</h1>',
+                unsafe_allow_html=True)
+    st.sidebar.markdown("# Gesture Detection on?")
+    activities = ["Image", "Video"]
+    choice = st.sidebar.selectbox("Choose among the given options:", activities)
+    phi = st.sidebar.selectbox("yolov4-tiny 使用的自注意力模式:",('0tiny','1SE','2CABM','3ECA'))
+    print("")
+    conf,nms = object_detector_ui()
+    @st.cache
+    def get_yolo(phi,conf,nms):
+        weights = 'yolotiny_ep100.pth'
+        if phi == '0tiny':
+            weights = 'yolotiny_ep100.pth'
+        elif phi == '1SE':
+            weights = 'yolotiny_SE_ep100.pth'
+        elif phi == '2CABM':
+            weights = 'yolotiny_CBAM_ep100.pth'
+        elif phi == '3ECA':
+            weights = 'yolotiny_ECA_ep100.pth'
+        opt = Config(weights = weights, tiny = True, phi = int(phi[0]), shape = 416,nms_iou = nms, confidence = conf)
+        yolo = YOLO(opt)
+        return yolo
+    yolo = get_yolo(phi,conf,nms)
+    st.write("YOLOV4 tiny 模型加载完毕")
+    gesture_detection(yolo)
+
+
+
+# This sidebar UI lets the user select parameters for the YOLO object detector.
+def object_detector_ui():
+    st.sidebar.markdown("# Model")
+    confidence_threshold = st.sidebar.slider("Confidence threshold", 0.0, 1.0, 0.5, 0.01)
+    overlap_threshold = st.sidebar.slider("Overlap threshold", 0.0, 1.0, 0.3, 0.01)
+    return confidence_threshold, overlap_threshold
+
+def predict(image,yolo):
+    """Return predictions.
+
+    Parameters
+    ----------
+    :param image: uploaded image
+    :type image: jpg
+    :rtype: list
+    :return: none
+    """
+    crop            = False
+    count           = False
+    try:
+        image = Image.open(image)
+        r_image = yolo.detect_image(image, crop = crop, count=count)
+        transform = transforms.Compose([transforms.ToTensor()])        
+        result = transform(r_image)
+        st.image(result.permute(1,2,0).numpy(), caption = 'Processed Image.', use_column_width = True)
+    except Exception as e:
+        print(e)
+
+def gesture_detection(yolo):
+    # enable users to upload images for the model to make predictions
+    file_up = st.file_uploader("Upload an image", type = "jpg")
+    classes = ["up","down","left","right","front","back","clockwise","anticlockwise"]
+    class_to_idx = {cls: idx for (idx, cls) in enumerate(classes)}
+    st.sidebar.markdown("See the model preformance and play with it")
+    if file_up is not None:
+        # display image that user uploaded
+        image = Image.open(file_up)
+        st.image(image, caption = 'Uploaded Image.', use_column_width = True)
+        st.write("")
+        st.write("Just a second ...")
+        predict(file_up,yolo)
+
+if __name__ == "__main__":
+    main()
diff --git a/get_map.py b/get_map.py
@@ -0,0 +1,123 @@
+import os
+import xml.etree.ElementTree as ET
+
+from PIL import Image
+from tqdm import tqdm
+import yaml
+from utils.utils import get_classes
+from utils.utils_map import get_coco_map, get_map
+from yolo import YOLO
+from get_yaml import get_config
+import argparse
+if __name__ == "__main__":
+    '''
+    Recall和Precision不像AP是一个面积的概念，在门限值不同时，网络的Recall和Precision值是不同的。
+    map计算结果中的Recall和Precision代表的是当预测时，门限置信度为0.5时，所对应的Recall和Precision值。
+
+    此处获得的./map_out/detection-results/里面的txt的框的数量会比直接predict多一些，这是因为这里的门限低，
+    目的是为了计算不同门限条件下的Recall和Precision值，从而实现map的计算。
+    '''
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--weights',type=str,default='model_data/yolotiny_SE_ep100.pth',help='initial weights path')
+    parser.add_argument('--tiny',action='store_true',help='使用yolotiny模型')
+    parser.add_argument('--phi',type=int,default=1,help='yolov4tiny注意力机制类型')
+    parser.add_argument('--mode',type=int,default=0,help='get map的模式')
+    parser.add_argument('--cuda',action='store_true',help='表示是否使用GPU')
+    parser.add_argument('--shape',type=int,default=416,help='输入图像的shape')
+    parser.add_argument('--confidence',type=float,default=0.5,help='只有得分大于置信度的预测框会被保留下来')
+    parser.add_argument('--nms_iou',type=float,default=0.3,help='非极大抑制所用到的nms_iou大小')
+    opt = parser.parse_args()
+    print(opt)
+    # 配置文件
+    config = get_config()
+
+    #------------------------------------------------------------------------------------------------------------------#
+    #   map_mode用于指定该文件运行时计算的内容
+    #   map_mode为0代表整个map计算流程，包括获得预测结果、获得真实框、计算VOC_map。
+    #   map_mode为1代表仅仅获得预测结果。
+    #   map_mode为2代表仅仅获得真实框。
+    #   map_mode为3代表仅仅计算VOC_map。
+    #   map_mode为4代表利用COCO工具箱计算当前数据集的0.50:0.95map。需要获得预测结果、获得真实框后并安装pycocotools才行
+    #-------------------------------------------------------------------------------------------------------------------#
+    map_mode        = opt.mode
+    #-------------------------------------------------------#
+    #   MINOVERLAP用于指定想要获得的mAP0.x
+    #   比如计算mAP0.75，可以设定MINOVERLAP = 0.75。
+    #-------------------------------------------------------#
+    MINOVERLAP      = 0.5
+    #-------------------------------------------------------#
+    #   map_vis用于指定是否开启VOC_map计算的可视化
+    #-------------------------------------------------------#
+    map_vis         = False
+    #-------------------------------------------------------#
+    #   指向VOC数据集所在的文件夹
+    #   默认指向根目录下的VOC数据集
+    #-------------------------------------------------------#
+    VOCdevkit_path  = 'VOCdevkit'
+    #-------------------------------------------------------#
+    #   结果输出的文件夹，默认为map_out
+    #-------------------------------------------------------#
+    map_out_path    = 'map_out'
+
+    image_ids = open(os.path.join(VOCdevkit_path, "VOC2007/ImageSets/Main/val.txt")).read().strip().split()
+
+    if not os.path.exists(map_out_path):
+        os.makedirs(map_out_path)
+    if not os.path.exists(os.path.join(map_out_path, 'ground-truth')):
+        os.makedirs(os.path.join(map_out_path, 'ground-truth'))
+    if not os.path.exists(os.path.join(map_out_path, 'detection-results')):
+        os.makedirs(os.path.join(map_out_path, 'detection-results'))
+    if not os.path.exists(os.path.join(map_out_path, 'images-optional')):
+        os.makedirs(os.path.join(map_out_path, 'images-optional'))
+
+    class_names  = config['classes']
+
+    if map_mode == 0 or map_mode == 1:
+        print("Load model.")
+        yolo = YOLO(opt, confidence = 0.001, nms_iou = 0.5)
+        print("Load model done.")
+
+        print("Get predict result.")
+        for image_id in tqdm(image_ids):
+            image_path  = os.path.join(VOCdevkit_path, "VOC2007/JPEGImages/"+image_id+".jpg")
+            image       = Image.open(image_path)
+            if map_vis:
+                image.save(os.path.join(map_out_path, "images-optional/" + image_id + ".jpg"))
+            yolo.get_map_txt(image_id, image, class_names, map_out_path)
+        print("Get predict result done.")
+
+    if map_mode == 0 or map_mode == 2:
+        print("Get ground truth result.")
+        for image_id in tqdm(image_ids):
+            with open(os.path.join(map_out_path, "ground-truth/"+image_id+".txt"), "w") as new_f:
+                root = ET.parse(os.path.join(VOCdevkit_path, "VOC2007/Annotations/"+image_id+".xml")).getroot()
+                for obj in root.findall('object'):
+                    difficult_flag = False
+                    if obj.find('difficult')!=None:
+                        difficult = obj.find('difficult').text
+                        if int(difficult)==1:
+                            difficult_flag = True
+                    obj_name = obj.find('name').text
+                    if obj_name not in class_names:
+                        continue
+                    bndbox  = obj.find('bndbox')
+                    left    = bndbox.find('xmin').text
+                    top     = bndbox.find('ymin').text
+                    right   = bndbox.find('xmax').text
+                    bottom  = bndbox.find('ymax').text
+
+                    if difficult_flag:
+                        new_f.write("%s %s %s %s %s difficult\n" % (obj_name, left, top, right, bottom))
+                    else:
+                        new_f.write("%s %s %s %s %s\n" % (obj_name, left, top, right, bottom))
+        print("Get ground truth result done.")
+
+    if map_mode == 0 or map_mode == 3:
+        print("Get map.")
+        get_map(MINOVERLAP, True, path = map_out_path)
+        print("Get map done.")
+
+    if map_mode == 4:
+        print("Get map.")
+        get_coco_map(class_names = class_names, path = map_out_path)
+        print("Get map done.")