Implementation

AndreyGermanov · May 10, 2023 · a6a210c · a6a210c
1 parent 5d7adad
commit a6a210c
Show file tree

Hide file tree

Showing 7 changed files with 291 additions and 2 deletions.
diff --git a/README.md b/README.md
@@ -1,2 +1,34 @@
-# yolov8_onnx_go
-YOLOv8 Inference using Go
+# YOLOv8 inference using Go
+
+This is a web interface to [YOLOv8 object detection neural network](https://ultralytics.com/yolov8)
+implemented on [Go](https://go.dev).
+
+## Install
+
+* Clone this repository: `git clone [email protected]:AndreyGermanov/yolov8_onnx_nodejs.git`
+* Go to the root of cloned repository
+* Install dependencies by running `go mod download`
+* Open the `main.go`, find line `ort.SetSharedLibraryPath(...)` and specify the path to the ONNX runtime library path in it.*
+
+*If you do not have installed `ONNX runtime`, then you can manually download it for
+your operating system from [this repository](https://github.com/microsoft/onnxruntime/releases),
+extract archive to some folder and then specify a path to a main library path: 
+subfolder:
+
+* `lib/libonnxruntime.so` - for Linux
+* `lib/libonnxruntime.dylib` - for MacOS
+* `lib/onnxruntime.dll` - for Windows
+
+This repository contains the ONNX Runtime library for Linux only.
+
+## Run
+
+Execute:
+
+```
+go run main.go
+```
+
+It will start a webserver on http://localhost:8080. Use any web browser to open the web interface.
+
+Using the interface you can upload the image to the object detector and see bounding boxes of all objects detected on it.
diff --git a/go.mod b/go.mod
@@ -0,0 +1,7 @@
+module object_detector
+
+go 1.18
+
+require github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646
+
+require github.com/yalue/onnxruntime_go v0.0.0-20230331205425-1acf4f2a2e42 // indirect
diff --git a/index.html b/index.html
@@ -0,0 +1,55 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <title>YOLOv8 Object Detection</title>
+    <style>
+      canvas {
+          display:block;
+          border: 1px solid black;
+          margin-top:10px;
+      }
+    </style>
+    <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.min.js"></script>
+</head>
+<body>
+    <input id="uploadInput" type="file"/>
+    <canvas></canvas>
+    <script>
+       const input = document.getElementById("uploadInput");
+       input.addEventListener("change",async(event) => {
+           const data = new FormData();
+           data.append("image_file",event.target.files[0],"image_file");
+           const response = await fetch("/detect",{
+               method:"post",
+               body:data
+           });
+           const boxes = await response.json();
+           draw_image_and_boxes(event.target.files[0],boxes);
+       })
+
+      function draw_image_and_boxes(file,boxes) {
+          const img = new Image()
+          img.src = URL.createObjectURL(file);
+          img.onload = () => {
+              const canvas = document.querySelector("canvas");
+              canvas.width = img.width;
+              canvas.height = img.height;
+              const ctx = canvas.getContext("2d");
+              ctx.drawImage(img,0,0);
+              ctx.strokeStyle = "#00FF00";
+              ctx.lineWidth = 3;
+              ctx.font = "18px serif";
+              boxes.forEach(([x1,y1,x2,y2,label]) => {
+                  ctx.strokeRect(x1,y1,x2-x1,y2-y1);
+                  ctx.fillStyle = "#00ff00";
+                  const width = ctx.measureText(label).width;
+                  ctx.fillRect(x1,y1,width+10,25);
+                  ctx.fillStyle = "#000000";
+                  ctx.fillText(label, x1, y1+18);
+              });
+          }
+      }
+    </script>
+</body>
+</html>
diff --git a/libonnxruntime.so b/libonnxruntime.so
diff --git a/libonnxruntime.so.1.14.1 b/libonnxruntime.so.1.14.1
diff --git a/main.go b/main.go
@@ -0,0 +1,195 @@
+package main
+
+import (
+	"encoding/json"
+	"github.com/nfnt/resize"
+	ort "github.com/yalue/onnxruntime_go"
+	"image"
+	_ "image/gif"
+	_ "image/jpeg"
+	_ "image/png"
+	"io"
+	"math"
+	"net/http"
+	"os"
+	"sort"
+)
+
+// Main function that defines
+// a web service endpoints a starts
+// the web service
+func main() {
+	server := http.Server{
+		Addr: "0.0.0.0:8080",
+	}
+	http.HandleFunc("/", index)
+	http.HandleFunc("/detect", detect)
+	server.ListenAndServe()
+}
+
+// Site main page handler function.
+// Returns Content of index.html file
+func index(w http.ResponseWriter, _ *http.Request) {
+	file, _ := os.Open("index.html")
+	buf, _ := io.ReadAll(file)
+	w.Write(buf)
+}
+
+// Handler of /detect POST endpoint
+// Receives uploaded file with a name "image_file", passes it
+// through YOLOv8 object detection network and returns and array
+// of bounding boxes.
+// Returns a JSON array of objects bounding boxes in format [[x1,y1,x2,y2,object_type,probability],..]
+func detect(w http.ResponseWriter, r *http.Request) {
+	r.ParseMultipartForm(0)
+	file, _, _ := r.FormFile("image_file")
+	boxes := detect_objects_on_image(file)
+	buf, _ := json.Marshal(&boxes)
+	w.Write(buf)
+}
+
+// Function receives an image,
+// passes it through YOLOv8 neural network
+// and returns an array of detected objects
+// and their bounding boxes
+// Returns Array of bounding boxes in format [[x1,y1,x2,y2,object_type,probability],..]
+func detect_objects_on_image(buf io.Reader) [][]interface{} {
+	input, img_width, img_height := prepare_input(buf)
+	output := run_model(input)
+	return process_output(output, img_width, img_height)
+}
+
+// Function used to convert input image to tensor,
+// required as an input to YOLOv8 object detection
+// network.
+// Returns the input tensor, original image width and height
+func prepare_input(buf io.Reader) ([]float32, int64, int64) {
+	img, _, _ := image.Decode(buf)
+	size := img.Bounds().Size()
+	img_width, img_height := int64(size.X), int64(size.Y)
+	img = resize.Resize(640, 640, img, resize.Lanczos3)
+	red := []float32{}
+	green := []float32{}
+	blue := []float32{}
+	for y := 0; y < 640; y++ {
+		for x := 0; x < 640; x++ {
+			r, g, b, _ := img.At(x, y).RGBA()
+			red = append(red, float32(r/257)/255.0)
+			green = append(green, float32(g/257)/255.0)
+			blue = append(blue, float32(b/257)/255.0)
+		}
+	}
+	input := append(red, green...)
+	input = append(input, blue...)
+	return input, img_width, img_height
+}
+
+// Function used to pass provided input tensor to
+// YOLOv8 neural network and return result
+// Returns raw output of YOLOv8 network as a single dimension
+// array
+func run_model(input []float32) []float32 {
+	ort.SetSharedLibraryPath("./libonnxruntime.so")
+	_ = ort.InitializeEnvironment()
+
+	inputShape := ort.NewShape(1, 3, 640, 640)
+	inputTensor, _ := ort.NewTensor(inputShape, input)
+
+	outputShape := ort.NewShape(1, 84, 8400)
+	outputTensor, _ := ort.NewEmptyTensor[float32](outputShape)
+
+	session, _ := ort.NewSession[float32]("./yolov8m.onnx",
+		[]string{"images"}, []string{"output0"},
+		[]*ort.Tensor[float32]{inputTensor}, []*ort.Tensor[float32]{outputTensor})
+
+	_ = session.Run()
+	return outputTensor.GetData()
+}
+
+// Function used to convert RAW output from YOLOv8 to an array
+// of detected objects. Each object contain the bounding box of
+// this object, the type of object and the probability
+// Returns array of detected objects in a format [[x1,y1,x2,y2,object_type,probability],..]
+func process_output(output []float32, img_width, img_height int64) [][]interface{} {
+	boxes := [][]interface{}{}
+	for index := 0; index < 8400; index++ {
+		class_id, prob := 0, float32(0.0)
+		for col := 0; col < 80; col++ {
+			if output[8400*(col+4)+index] > prob {
+				prob = output[8400*(col+4)+index]
+				class_id = col
+			}
+		}
+		if prob < 0.5 {
+			continue
+		}
+		label := yolo_classes[class_id]
+		xc := output[index]
+		yc := output[8400+index]
+		w := output[2*8400+index]
+		h := output[3*8400+index]
+		x1 := (xc - w/2) / 640 * float32(img_width)
+		y1 := (yc - h/2) / 640 * float32(img_height)
+		x2 := (xc + w/2) / 640 * float32(img_width)
+		y2 := (yc + h/2) / 640 * float32(img_height)
+		boxes = append(boxes, []interface{}{float64(x1), float64(y1), float64(x2), float64(y2), label, prob})
+	}
+
+	sort.Slice(boxes, func(i, j int) bool {
+		return boxes[i][5].(float32) < boxes[j][5].(float32)
+	})
+	result := [][]interface{}{}
+	for len(boxes) > 0 {
+		result = append(result, boxes[0])
+		tmp := [][]interface{}{}
+		for _, box := range boxes {
+			if iou(boxes[0], box) < 0.7 {
+				tmp = append(tmp, box)
+			}
+		}
+		boxes = tmp
+	}
+	return result
+}
+
+// Function calculates "Intersection-over-union" coefficient for specified two boxes
+// https://pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/.
+// Returns Intersection over union ratio as a float number
+func iou(box1, box2 []interface{}) float64 {
+	return intersect(box1, box2) / union(box1, box2)
+}
+
+// Function calculates union area of two boxes
+// Returns Area of the boxes union as a float number
+func union(box1, box2 []interface{}) float64 {
+	box1_x1, box1_y1, box1_x2, box1_y2 := box1[0].(float64), box1[1].(float64), box1[2].(float64), box1[3].(float64)
+	box2_x1, box2_y1, box2_x2, box2_y2 := box2[0].(float64), box2[1].(float64), box2[2].(float64), box2[3].(float64)
+	box1_area := (box1_x2 - box1_x1) * (box1_y2 - box1_y1)
+	box2_area := (box2_x2 - box2_x1) * (box2_y2 - box2_y1)
+	return box1_area + box2_area - intersect(box1, box2)
+}
+
+// Function calculates intersection area of two boxes
+// Returns Area of intersection of the boxes as a float number
+func intersect(box1, box2 []interface{}) float64 {
+	box1_x1, box1_y1, box1_x2, box1_y2 := box1[0].(float64), box1[1].(float64), box1[2].(float64), box1[3].(float64)
+	box2_x1, box2_y1, box2_x2, box2_y2 := box2[0].(float64), box2[1].(float64), box2[2].(float64), box2[3].(float64)
+	x1 := math.Max(box1_x1, box2_x1)
+	y1 := math.Max(box1_y1, box2_y1)
+	x2 := math.Min(box1_x2, box2_x2)
+	y2 := math.Min(box1_y2, box2_y2)
+	return (x2 - x1) * (y2 - y1)
+}
+
+// Array of YOLOv8 class labels
+var yolo_classes = []string{
+	"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat",
+	"traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse",
+	"sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie",
+	"suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove",
+	"skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon",
+	"bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut",
+	"cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse",
+	"remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book",
+	"clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush",
+}
diff --git a/yolov8m.onnx b/yolov8m.onnx