yolo_0429

Cherrytien · Apr 29, 2020 · 741586e · 741586e
1 parent bf539c2
commit 741586e
Show file tree

Hide file tree

Showing 6 changed files with 1,278 additions and 3 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -111,6 +111,7 @@ src/Viewer.cc
 src/Conversion.cc
 src/MaskNet.cc
 src/Geometry.cc
+src/yolo.cc
 )
 
 target_link_libraries(${PROJECT_NAME}
@@ -131,6 +132,10 @@ add_executable(rgbd_tum
 Examples/RGB-D/rgbd_tum.cc)
 target_link_libraries(rgbd_tum ${PROJECT_NAME})
 
+add_executable(rgbd_tum_yolo
+Examples/RGB-D/rgbd_tum_yolo.cc)
+target_link_libraries(rgbd_tum_yolo ${PROJECT_NAME})
+
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/Examples/Stereo)
 
 add_executable(stereo_kitti
@@ -143,11 +148,15 @@ add_executable(mono_tum
 Examples/Monocular/mono_tum.cc)
 target_link_libraries(mono_tum ${PROJECT_NAME})
 
+add_executable(mono_tum_yolo
+Examples/Monocular/mono_tum_yolo.cc)
+target_link_libraries(mono_tum_yolo ${PROJECT_NAME})
+
 add_executable(mono_kitti
 Examples/Monocular/mono_kitti.cc)
 target_link_libraries(mono_kitti ${PROJECT_NAME})
 
-add_executable(mono_carla
-Examples/Monocular/mono_carla.cc)
-target_link_libraries(mono_carla ${PROJECT_NAME})
+#add_executable(mono_carla
+#Examples/Monocular/mono_carla.cc)
+#target_link_libraries(mono_carla ${PROJECT_NAME})
 
diff --git a/Examples/RGB-D/rgbd_tum_yolo.cc b/Examples/RGB-D/rgbd_tum_yolo.cc
@@ -0,0 +1,221 @@
+/**
+* This file is a modified version of ORB-SLAM2.<https://github.com/raulmur/ORB_SLAM2>
+*
+* This file is part of DynaSLAM.
+* Copyright (C) 2018 Berta Bescos <bbescos at unizar dot es> (University of Zaragoza)
+* For more information see <https://github.com/bertabescos/DynaSLAM>.
+*
+*/
+
+
+#include<iostream>
+#include<algorithm>
+#include<fstream>
+#include<chrono>
+#include <unistd.h>
+#include<opencv2/core/core.hpp>
+
+#include "Geometry.h"
+#include "MaskNet.h"
+#include <System.h>
+#include "yolo.h"
+
+using namespace std;
+
+void LoadImages(const string &strAssociationFilename, vector<string> &vstrImageFilenamesRGB,
+                vector<string> &vstrImageFilenamesD, vector<double> &vTimestamps);
+
+int main(int argc, char **argv)
+{
+    if(argc != 5 && argc != 6 && argc != 7)
+    {
+        cerr << endl << "Usage: ./rgbd_tum path_to_vocabulary path_to_settings path_to_sequence path_to_association (path_to_masks) (path_to_output)" << endl;
+        return 1;
+    }
+
+    // Retrieve paths to images
+    vector<string> vstrImageFilenamesRGB;
+    vector<string> vstrImageFilenamesD;
+    vector<double> vTimestamps;
+    string strAssociationFilename = string(argv[4]);
+    LoadImages(strAssociationFilename, vstrImageFilenamesRGB, vstrImageFilenamesD, vTimestamps);
+
+    // Check consistency in the number of images and depthmaps
+    int nImages = vstrImageFilenamesRGB.size();
+    std::cout << "nImages: " << nImages << std::endl;
+
+    nImages = 720;
+
+    if(vstrImageFilenamesRGB.empty())
+    {
+        cerr << endl << "No images found in provided path." << endl;
+        return 1;
+    }
+    else if(vstrImageFilenamesD.size()!=vstrImageFilenamesRGB.size())
+    {
+        cerr << endl << "Different number of images for rgb and depth." << endl;
+        return 1;
+    }
+
+    // Initialize Mask R-CNN
+    //DynaSLAM::SegmentDynObject *MaskNet;
+    yolov3::yolov3Segment* yolo;
+    if (argc==6 || argc==7)
+    {
+        //cout << "Loading Mask R-CNN. This could take a while..." << endl;
+        //MaskNet = new DynaSLAM::SegmentDynObject();
+        //cout << "Mask R-CNN loaded!" << endl;
+        cout << "Loading Yolov3 net. This could take a while..." << endl;
+        yolo = new yolov3::yolov3Segment();
+        cout << "Yolov3 net loaded!" << endl;
+    }
+
+    // Create SLAM system. It initializes all system threads and gets ready to process frames.
+    ORB_SLAM2::System SLAM(argv[1],argv[2],ORB_SLAM2::System::RGBD,true);
+
+    // Vector for tracking time statistics
+    vector<float> vTimesTrack;
+    vTimesTrack.resize(nImages);
+
+    cout << endl << "-------" << endl;
+    cout << "Start processing sequence ..." << endl;
+    cout << "Images in the sequence: " << nImages << endl << endl;
+
+    // Dilation settings
+    int dilation_size = 15;
+    cv::Mat kernel = getStructuringElement(cv::MORPH_ELLIPSE,
+                                           cv::Size( 2*dilation_size + 1, 2*dilation_size+1 ),
+                                           cv::Point( dilation_size, dilation_size ) );
+
+    if (argc==7)
+    {
+        std::string dir = string(argv[6]);
+        mkdir(dir.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
+        dir = string(argv[6]) + "/rgb/";
+        mkdir(dir.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
+        dir = string(argv[6]) + "/depth/";
+        mkdir(dir.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
+        dir = string(argv[6]) + "/mask/";
+        mkdir(dir.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
+    }
+
+    // Main loop
+        cv::Mat imRGB, imD;
+        cv::Mat imRGBOut, imDOut,maskOut;
+
+    for(int ni=0; ni<nImages; ni++)
+    {
+        cout << ni << endl;
+        // Read image and depthmap from file
+        imRGB = cv::imread(string(argv[3])+"/"+vstrImageFilenamesRGB[ni],CV_LOAD_IMAGE_UNCHANGED);
+        imD = cv::imread(string(argv[3])+"/"+vstrImageFilenamesD[ni],CV_LOAD_IMAGE_UNCHANGED);
+
+        double tframe = vTimestamps[ni];
+
+        if(imRGB.empty())
+        {
+            cerr << endl << "Failed to load image at: "
+                 << string(argv[3]) << "/" << vstrImageFilenamesRGB[ni] << endl;
+            return 1;
+        }
+
+#ifdef COMPILEDWITHC11
+        std::chrono::steady_clock::time_point t1 = std::chrono::steady_clock::now();
+#else
+        std::chrono::monotonic_clock::time_point t1 = std::chrono::monotonic_clock::now();
+#endif
+
+        // Segment out the images
+        cv::Mat mask = cv::Mat::ones(480,640,CV_8U);
+        /*
+        if (argc == 6 || argc == 7)
+        {
+            cv::Mat maskRCNN;
+            maskRCNN = MaskNet->GetSegmentation(imRGB,string(argv[5]),vstrImageFilenamesRGB[ni].replace(0,4,""));
+            cv::Mat maskRCNNdil = maskRCNN.clone();
+            cv::dilate(maskRCNN,maskRCNNdil, kernel);
+            mask = mask - maskRCNNdil;
+        }
+        */
+        if (argc == 6 || argc == 7)
+            mask = yolo->Segmentation(imRGB);
+        // Pass the image to the SLAM system
+        if (argc == 7){SLAM.TrackRGBD(imRGB,imD,mask,tframe,imRGBOut,imDOut,maskOut);}
+        else {SLAM.TrackRGBD(imRGB,imD,mask,tframe);}
+
+#ifdef COMPILEDWITHC11
+        std::chrono::steady_clock::time_point t2 = std::chrono::steady_clock::now();
+#else
+        std::chrono::monotonic_clock::time_point t2 = std::chrono::monotonic_clock::now();
+#endif
+
+        if (argc == 7)
+        {
+            cv::imwrite(string(argv[6]) + "/rgb/" + vstrImageFilenamesRGB[ni],imRGBOut);
+            vstrImageFilenamesD[ni].replace(0,6,"");
+            cv::imwrite(string(argv[6]) + "/depth/" + vstrImageFilenamesD[ni],imDOut);
+            cv::imwrite(string(argv[6]) + "/mask/" + vstrImageFilenamesRGB[ni],maskOut);
+        }
+
+        double ttrack= std::chrono::duration_cast<std::chrono::duration<double> >(t2 - t1).count();
+
+        vTimesTrack[ni]=ttrack;
+
+        // Wait to load the next frame
+        double T=0;
+        if(ni<nImages-1)
+            T = vTimestamps[ni+1]-tframe;
+        else if(ni>0)
+            T = tframe-vTimestamps[ni-1];
+
+        if(ttrack<T)
+            usleep((T-ttrack)*1e6);
+    }
+
+    // Stop all threads
+    SLAM.Shutdown();
+
+    // Tracking time statistics
+    sort(vTimesTrack.begin(),vTimesTrack.end());
+    float totaltime = 0;
+    for(int ni=0; ni<nImages; ni++)
+    {
+        totaltime+=vTimesTrack[ni];
+    }
+    cout << "-------" << endl << endl;
+    cout << "median tracking time: " << vTimesTrack[nImages/2] << endl;
+    cout << "mean tracking time: " << totaltime/nImages << endl;
+
+    // Save camera trajectory
+    SLAM.SaveTrajectoryTUM("CameraTrajectory.txt");
+    SLAM.SaveKeyFrameTrajectoryTUM("KeyFrameTrajectory.txt");
+
+    return 0;
+}
+
+void LoadImages(const string &strAssociationFilename, vector<string> &vstrImageFilenamesRGB,
+                vector<string> &vstrImageFilenamesD, vector<double> &vTimestamps)
+{
+    ifstream fAssociation;
+    fAssociation.open(strAssociationFilename.c_str());
+    while(!fAssociation.eof())
+    {
+        string s;
+        getline(fAssociation,s);
+        if(!s.empty())
+        {
+            stringstream ss;
+            ss << s;
+            double t;
+            string sRGB, sD;
+            ss >> t;
+            vTimestamps.push_back(t);
+            ss >> sRGB;
+            vstrImageFilenamesRGB.push_back(sRGB);
+            ss >> t;
+            ss >> sD;
+            vstrImageFilenamesD.push_back(sD);
+
+        }
+    }
+}
diff --git a/include/yolo.h b/include/yolo.h
@@ -0,0 +1,42 @@
+#include <iostream>
+#include <opencv2/dnn.hpp>
+#include <opencv2/core.hpp>
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui.hpp>
+#include <opencv2/imgproc.hpp>
+#include <opencv2/videoio.hpp>
+#include <opencv2/video.hpp>
+#include <opencv2/imgcodecs.hpp>
+#include <fstream>
+#include <chrono>
+#include <thread>
+#include <unistd.h>
+#include <sstream>
+#include <string>
+#include <vector>
+using namespace std;
+using namespace cv;
+using namespace dnn;
+
+namespace yolov3 {
+    class yolov3Segment {
+        private:
+            float confThreshold = 0.5; // Confidence threshold
+            float nmsThreshold = 0.4;  // Non-maximum suppression threshold
+            int inpWidth = 640;  // Width of network's input image
+            int inpHeight = 480; // Height of network's input image
+            vector<string> classes;
+            Net net;
+
+        public:
+            yolov3Segment();
+            ~yolov3Segment();
+            cv::Mat Segmentation(cv::Mat &image);
+
+            // Remove the bounding boxes with low confidence using non-maxima suppression
+            cv::Mat postprocess(Mat& frame, const vector<Mat>& out);
+            // Get the names of the output layers
+            vector<String> getOutputsNames(const Net& net);
+
+    };
+}