From ecd8231090383fd2868c40fae11fa2057d8d386d Mon Sep 17 00:00:00 2001 From: RomStriker Date: Thu, 23 Jul 2020 15:18:10 +0200 Subject: [PATCH] Update for yolov4-full --- .../detection/tflite/YoloV4Classifier.java | 258 +++++++++++++----- 1 file changed, 183 insertions(+), 75 deletions(-) diff --git a/android/app/src/main/java/org/tensorflow/lite/examples/detection/tflite/YoloV4Classifier.java b/android/app/src/main/java/org/tensorflow/lite/examples/detection/tflite/YoloV4Classifier.java index f8493090..ce3488fd 100755 --- a/android/app/src/main/java/org/tensorflow/lite/examples/detection/tflite/YoloV4Classifier.java +++ b/android/app/src/main/java/org/tensorflow/lite/examples/detection/tflite/YoloV4Classifier.java @@ -1,11 +1,8 @@ /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -181,10 +178,11 @@ public float getObjThresh() { private static boolean isGPU = true; // tiny or not - private static boolean isTiny = true; + private static boolean isTiny = false; // config yolov4 tiny private static final int[] OUTPUT_WIDTH_TINY = new int[]{2535, 2535}; + private static final int[] OUTPUT_WIDTH_FULL = new int[]{10647, 10647}; private static final int[][] MASKS_TINY = new int[][]{{3, 4, 5}, {1, 2, 3}}; private static final int[] ANCHORS_TINY = new int[]{ 23, 27, 37, 58, 81, 82, 81, 82, 135, 169, 344, 319}; @@ -304,84 +302,127 @@ protected ByteBuffer convertBitmapToByteBuffer(Bitmap bitmap) { return byteBuffer; } - private ArrayList getDetections(ByteBuffer byteBuffer, Bitmap bitmap) { - ArrayList detections = new ArrayList(); - Map outputMap = new HashMap<>(); - for (int i = 0; i < OUTPUT_WIDTH.length; i++) { - float[][][][][] out = new float[1][OUTPUT_WIDTH[i]][OUTPUT_WIDTH[i]][3][5 + labels.size()]; - outputMap.put(i, out); - } +// private ArrayList getDetections(ByteBuffer byteBuffer, Bitmap bitmap) { +// ArrayList detections = new ArrayList(); +// Map outputMap = new HashMap<>(); +// for (int i = 0; i < OUTPUT_WIDTH.length; i++) { +// float[][][][][] out = new float[1][OUTPUT_WIDTH[i]][OUTPUT_WIDTH[i]][3][5 + labels.size()]; +// outputMap.put(i, out); +// } +// +// Log.d("YoloV4Classifier", "mObjThresh: " + getObjThresh()); +// +// Object[] inputArray = {byteBuffer}; +// tfLite.runForMultipleInputsOutputs(inputArray, outputMap); +// +// for (int i = 0; i < OUTPUT_WIDTH.length; i++) { +// int gridWidth = OUTPUT_WIDTH[i]; +// float[][][][][] out = (float[][][][][]) outputMap.get(i); +// +// Log.d("YoloV4Classifier", "out[" + i + "] detect start"); +// for (int y = 0; y < gridWidth; ++y) { +// for (int x = 0; x < gridWidth; ++x) { +// for (int b = 0; b < NUM_BOXES_PER_BLOCK; ++b) { +// final int offset = +// (gridWidth * (NUM_BOXES_PER_BLOCK * (labels.size() + 5))) * y +// + (NUM_BOXES_PER_BLOCK * (labels.size() + 5)) * x +// + (labels.size() + 5) * b; +// +// final float confidence = expit(out[0][y][x][b][4]); +// int detectedClass = -1; +// float maxClass = 0; +// +// final float[] classes = new float[labels.size()]; +// for (int c = 0; c < labels.size(); ++c) { +// classes[c] = out[0][y][x][b][5 + c]; +// } +// +// for (int c = 0; c < labels.size(); ++c) { +// if (classes[c] > maxClass) { +// detectedClass = c; +// maxClass = classes[c]; +// } +// } +// +// final float confidenceInClass = maxClass * confidence; +// if (confidenceInClass > getObjThresh()) { +//// final float xPos = (x + (expit(out[0][y][x][b][0]) * XYSCALE[i]) - (0.5f * (XYSCALE[i] - 1))) * (INPUT_SIZE / gridWidth); +//// final float yPos = (y + (expit(out[0][y][x][b][1]) * XYSCALE[i]) - (0.5f * (XYSCALE[i] - 1))) * (INPUT_SIZE / gridWidth); +// +// final float xPos = (x + expit(out[0][y][x][b][0])) * (1.0f * INPUT_SIZE / gridWidth); +// final float yPos = (y + expit(out[0][y][x][b][1])) * (1.0f * INPUT_SIZE / gridWidth); +// +// final float w = (float) (Math.exp(out[0][y][x][b][2]) * ANCHORS[2 * MASKS[i][b]]); +// final float h = (float) (Math.exp(out[0][y][x][b][3]) * ANCHORS[2 * MASKS[i][b] + 1]); +// +// final RectF rect = +// new RectF( +// Math.max(0, xPos - w / 2), +// Math.max(0, yPos - h / 2), +// Math.min(bitmap.getWidth() - 1, xPos + w / 2), +// Math.min(bitmap.getHeight() - 1, yPos + h / 2)); +// detections.add(new Recognition("" + offset, labels.get(detectedClass), +// confidenceInClass, rect, detectedClass)); +// } +// } +// } +// } +// Log.d("YoloV4Classifier", "out[" + i + "] detect end"); +// } +// return detections; +// } - Log.d("YoloV4Classifier", "mObjThresh: " + getObjThresh()); + /** + * For yolov4-tiny, the situation would be a little different from the yolov4, it only has two + * output. Both has three dimenstion. The first one is a tensor with dimension [1, 2535,4], containing all the bounding boxes. + * The second one is a tensor with dimension [1, 2535, class_num], containing all the classes score. + * @param byteBuffer input ByteBuffer, which contains the image information + * @param bitmap pixel disenty used to resize the output images + * @return an array list containing the recognitions + */ + private ArrayList getDetectionsForFull(ByteBuffer byteBuffer, Bitmap bitmap) { + ArrayList detections = new ArrayList(); + Map outputMap = new HashMap<>(); + outputMap.put(0, new float[1][OUTPUT_WIDTH_FULL[0]][4]); + outputMap.put(1, new float[1][OUTPUT_WIDTH_FULL[1]][labels.size()]); Object[] inputArray = {byteBuffer}; tfLite.runForMultipleInputsOutputs(inputArray, outputMap); - for (int i = 0; i < OUTPUT_WIDTH.length; i++) { - int gridWidth = OUTPUT_WIDTH[i]; - float[][][][][] out = (float[][][][][]) outputMap.get(i); - - Log.d("YoloV4Classifier", "out[" + i + "] detect start"); - for (int y = 0; y < gridWidth; ++y) { - for (int x = 0; x < gridWidth; ++x) { - for (int b = 0; b < NUM_BOXES_PER_BLOCK; ++b) { - final int offset = - (gridWidth * (NUM_BOXES_PER_BLOCK * (labels.size() + 5))) * y - + (NUM_BOXES_PER_BLOCK * (labels.size() + 5)) * x - + (labels.size() + 5) * b; - - final float confidence = expit(out[0][y][x][b][4]); - int detectedClass = -1; - float maxClass = 0; - - final float[] classes = new float[labels.size()]; - for (int c = 0; c < labels.size(); ++c) { - classes[c] = out[0][y][x][b][5 + c]; - } - - for (int c = 0; c < labels.size(); ++c) { - if (classes[c] > maxClass) { - detectedClass = c; - maxClass = classes[c]; - } - } - - final float confidenceInClass = maxClass * confidence; - if (confidenceInClass > getObjThresh()) { -// final float xPos = (x + (expit(out[0][y][x][b][0]) * XYSCALE[i]) - (0.5f * (XYSCALE[i] - 1))) * (INPUT_SIZE / gridWidth); -// final float yPos = (y + (expit(out[0][y][x][b][1]) * XYSCALE[i]) - (0.5f * (XYSCALE[i] - 1))) * (INPUT_SIZE / gridWidth); - - final float xPos = (x + expit(out[0][y][x][b][0])) * (1.0f * INPUT_SIZE / gridWidth); - final float yPos = (y + expit(out[0][y][x][b][1])) * (1.0f * INPUT_SIZE / gridWidth); - - final float w = (float) (Math.exp(out[0][y][x][b][2]) * ANCHORS[2 * MASKS[i][b]]); - final float h = (float) (Math.exp(out[0][y][x][b][3]) * ANCHORS[2 * MASKS[i][b] + 1]); - - final RectF rect = - new RectF( - Math.max(0, xPos - w / 2), - Math.max(0, yPos - h / 2), - Math.min(bitmap.getWidth() - 1, xPos + w / 2), - Math.min(bitmap.getHeight() - 1, yPos + h / 2)); - detections.add(new Recognition("" + offset, labels.get(detectedClass), - confidenceInClass, rect, detectedClass)); - } - } + int gridWidth = OUTPUT_WIDTH_FULL[0]; + float[][][] bboxes = (float [][][]) outputMap.get(0); + float[][][] out_score = (float[][][]) outputMap.get(1); + + for (int i = 0; i < gridWidth;i++){ + float maxClass = 0; + int detectedClass = -1; + final float[] classes = new float[labels.size()]; + for (int c = 0;c< labels.size();c++){ + classes [c] = out_score[0][i][c]; + } + for (int c = 0;c maxClass){ + detectedClass = c; + maxClass = classes[c]; } } - Log.d("YoloV4Classifier", "out[" + i + "] detect end"); + final float score = maxClass; + if (score > getObjThresh()){ + final float xPos = bboxes[0][i][0]; + final float yPos = bboxes[0][i][1]; + final float w = bboxes[0][i][2]; + final float h = bboxes[0][i][3]; + final RectF rectF = new RectF( + Math.max(0, xPos - w / 2), + Math.max(0, yPos - h / 2), + Math.min(bitmap.getWidth() - 1, xPos + w / 2), + Math.min(bitmap.getHeight() - 1, yPos + h / 2)); + detections.add(new Recognition("" + i, labels.get(detectedClass),score,rectF,detectedClass )); + } } return detections; } - /** - * For yolov4-tiny, the situation would be a little different from the yolov4, it only has two - * output. Both has three dimenstion. The first one is a tensor with dimension [1, 2535,4], containing all the bounding boxes. - * The second one is a tensor with dimension [1, 2535, class_num], containing all the classes score. - * @param byteBuffer input ByteBuffer, which contains the image information - * @param bitmap pixel disenty used to resize the output images - * @return an array list containing the recognitions - */ private ArrayList getDetectionsForTiny(ByteBuffer byteBuffer, Bitmap bitmap) { ArrayList detections = new ArrayList(); Map outputMap = new HashMap<>(); @@ -418,7 +459,7 @@ private ArrayList getDetectionsForTiny(ByteBuffer byteBuffer, Bitma Math.max(0, yPos - h / 2), Math.min(bitmap.getWidth() - 1, xPos + w / 2), Math.min(bitmap.getHeight() - 1, yPos + h / 2)); - detections.add(new Recognition("" + i, labels.get(detectedClass),score,rectF,detectedClass )); + detections.add(new Recognition("" + i, labels.get(detectedClass),score,rectF,detectedClass )); } } return detections; @@ -426,12 +467,79 @@ private ArrayList getDetectionsForTiny(ByteBuffer byteBuffer, Bitma public ArrayList recognizeImage(Bitmap bitmap) { ByteBuffer byteBuffer = convertBitmapToByteBuffer(bitmap); + +// Map outputMap = new HashMap<>(); +// for (int i = 0; i < OUTPUT_WIDTH.length; i++) { +// float[][][][][] out = new float[1][OUTPUT_WIDTH[i]][OUTPUT_WIDTH[i]][3][5 + labels.size()]; +// outputMap.put(i, out); +// } +// +// Log.d("YoloV4Classifier", "mObjThresh: " + getObjThresh()); +// +// Object[] inputArray = {byteBuffer}; +// tfLite.runForMultipleInputsOutputs(inputArray, outputMap); +// +// ArrayList detections = new ArrayList(); +// +// for (int i = 0; i < OUTPUT_WIDTH.length; i++) { +// int gridWidth = OUTPUT_WIDTH[i]; +// float[][][][][] out = (float[][][][][]) outputMap.get(i); +// +// Log.d("YoloV4Classifier", "out[" + i + "] detect start"); +// for (int y = 0; y < gridWidth; ++y) { +// for (int x = 0; x < gridWidth; ++x) { +// for (int b = 0; b < NUM_BOXES_PER_BLOCK; ++b) { +// final int offset = +// (gridWidth * (NUM_BOXES_PER_BLOCK * (labels.size() + 5))) * y +// + (NUM_BOXES_PER_BLOCK * (labels.size() + 5)) * x +// + (labels.size() + 5) * b; +// +// final float confidence = expit(out[0][y][x][b][4]); +// int detectedClass = -1; +// float maxClass = 0; +// +// final float[] classes = new float[labels.size()]; +// for (int c = 0; c < labels.size(); ++c) { +// classes[c] = out[0][y][x][b][5 + c]; +// } +// +// for (int c = 0; c < labels.size(); ++c) { +// if (classes[c] > maxClass) { +// detectedClass = c; +// maxClass = classes[c]; +// } +// } +// +// final float confidenceInClass = maxClass * confidence; +// if (confidenceInClass > getObjThresh()) { +//// final float xPos = (x + (expit(out[0][y][x][b][0]) * XYSCALE[i]) - (0.5f * (XYSCALE[i] - 1))) * (INPUT_SIZE / gridWidth); +//// final float yPos = (y + (expit(out[0][y][x][b][1]) * XYSCALE[i]) - (0.5f * (XYSCALE[i] - 1))) * (INPUT_SIZE / gridWidth); +// +// final float xPos = (x + expit(out[0][y][x][b][0])) * (1.0f * INPUT_SIZE / gridWidth); +// final float yPos = (y + expit(out[0][y][x][b][1])) * (1.0f * INPUT_SIZE / gridWidth); +// +// final float w = (float) (Math.exp(out[0][y][x][b][2]) * ANCHORS[2 * MASKS[i][b]]); +// final float h = (float) (Math.exp(out[0][y][x][b][3]) * ANCHORS[2 * MASKS[i][b] + 1]); +// +// final RectF rect = +// new RectF( +// Math.max(0, xPos - w / 2), +// Math.max(0, yPos - h / 2), +// Math.min(bitmap.getWidth() - 1, xPos + w / 2), +// Math.min(bitmap.getHeight() - 1, yPos + h / 2)); +// detections.add(new Recognition("" + offset, labels.get(detectedClass), +// confidenceInClass, rect, detectedClass)); +// } +// } +// } +// } +// Log.d("YoloV4Classifier", "out[" + i + "] detect end"); +// } ArrayList detections; - //check whether the tiny version is specified if (isTiny) { detections = getDetectionsForTiny(byteBuffer, bitmap); } else { - detections = getDetections(byteBuffer, bitmap); + detections = getDetectionsForFull(byteBuffer, bitmap); } final ArrayList recognitions = nms(detections); return recognitions; @@ -488,4 +596,4 @@ public boolean checkInvalidateBox(float x, float y, float width, float height, f return true; } -} +} \ No newline at end of file