Skip to content

Commit

Permalink
Update for yolov4-full
Browse files Browse the repository at this point in the history
  • Loading branch information
RomStriker committed Jul 23, 2020
1 parent 087052f commit ecd8231
Showing 1 changed file with 183 additions and 75 deletions.
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
Expand Down Expand Up @@ -181,10 +178,11 @@ public float getObjThresh() {
private static boolean isGPU = true;

// tiny or not
private static boolean isTiny = true;
private static boolean isTiny = false;

// config yolov4 tiny
private static final int[] OUTPUT_WIDTH_TINY = new int[]{2535, 2535};
private static final int[] OUTPUT_WIDTH_FULL = new int[]{10647, 10647};
private static final int[][] MASKS_TINY = new int[][]{{3, 4, 5}, {1, 2, 3}};
private static final int[] ANCHORS_TINY = new int[]{
23, 27, 37, 58, 81, 82, 81, 82, 135, 169, 344, 319};
Expand Down Expand Up @@ -304,84 +302,127 @@ protected ByteBuffer convertBitmapToByteBuffer(Bitmap bitmap) {
return byteBuffer;
}

private ArrayList<Recognition> getDetections(ByteBuffer byteBuffer, Bitmap bitmap) {
ArrayList<Recognition> detections = new ArrayList<Recognition>();
Map<Integer, Object> outputMap = new HashMap<>();
for (int i = 0; i < OUTPUT_WIDTH.length; i++) {
float[][][][][] out = new float[1][OUTPUT_WIDTH[i]][OUTPUT_WIDTH[i]][3][5 + labels.size()];
outputMap.put(i, out);
}
// private ArrayList<Recognition> getDetections(ByteBuffer byteBuffer, Bitmap bitmap) {
// ArrayList<Recognition> detections = new ArrayList<Recognition>();
// Map<Integer, Object> outputMap = new HashMap<>();
// for (int i = 0; i < OUTPUT_WIDTH.length; i++) {
// float[][][][][] out = new float[1][OUTPUT_WIDTH[i]][OUTPUT_WIDTH[i]][3][5 + labels.size()];
// outputMap.put(i, out);
// }
//
// Log.d("YoloV4Classifier", "mObjThresh: " + getObjThresh());
//
// Object[] inputArray = {byteBuffer};
// tfLite.runForMultipleInputsOutputs(inputArray, outputMap);
//
// for (int i = 0; i < OUTPUT_WIDTH.length; i++) {
// int gridWidth = OUTPUT_WIDTH[i];
// float[][][][][] out = (float[][][][][]) outputMap.get(i);
//
// Log.d("YoloV4Classifier", "out[" + i + "] detect start");
// for (int y = 0; y < gridWidth; ++y) {
// for (int x = 0; x < gridWidth; ++x) {
// for (int b = 0; b < NUM_BOXES_PER_BLOCK; ++b) {
// final int offset =
// (gridWidth * (NUM_BOXES_PER_BLOCK * (labels.size() + 5))) * y
// + (NUM_BOXES_PER_BLOCK * (labels.size() + 5)) * x
// + (labels.size() + 5) * b;
//
// final float confidence = expit(out[0][y][x][b][4]);
// int detectedClass = -1;
// float maxClass = 0;
//
// final float[] classes = new float[labels.size()];
// for (int c = 0; c < labels.size(); ++c) {
// classes[c] = out[0][y][x][b][5 + c];
// }
//
// for (int c = 0; c < labels.size(); ++c) {
// if (classes[c] > maxClass) {
// detectedClass = c;
// maxClass = classes[c];
// }
// }
//
// final float confidenceInClass = maxClass * confidence;
// if (confidenceInClass > getObjThresh()) {
//// final float xPos = (x + (expit(out[0][y][x][b][0]) * XYSCALE[i]) - (0.5f * (XYSCALE[i] - 1))) * (INPUT_SIZE / gridWidth);
//// final float yPos = (y + (expit(out[0][y][x][b][1]) * XYSCALE[i]) - (0.5f * (XYSCALE[i] - 1))) * (INPUT_SIZE / gridWidth);
//
// final float xPos = (x + expit(out[0][y][x][b][0])) * (1.0f * INPUT_SIZE / gridWidth);
// final float yPos = (y + expit(out[0][y][x][b][1])) * (1.0f * INPUT_SIZE / gridWidth);
//
// final float w = (float) (Math.exp(out[0][y][x][b][2]) * ANCHORS[2 * MASKS[i][b]]);
// final float h = (float) (Math.exp(out[0][y][x][b][3]) * ANCHORS[2 * MASKS[i][b] + 1]);
//
// final RectF rect =
// new RectF(
// Math.max(0, xPos - w / 2),
// Math.max(0, yPos - h / 2),
// Math.min(bitmap.getWidth() - 1, xPos + w / 2),
// Math.min(bitmap.getHeight() - 1, yPos + h / 2));
// detections.add(new Recognition("" + offset, labels.get(detectedClass),
// confidenceInClass, rect, detectedClass));
// }
// }
// }
// }
// Log.d("YoloV4Classifier", "out[" + i + "] detect end");
// }
// return detections;
// }

Log.d("YoloV4Classifier", "mObjThresh: " + getObjThresh());
/**
* For yolov4-tiny, the situation would be a little different from the yolov4, it only has two
* output. Both has three dimenstion. The first one is a tensor with dimension [1, 2535,4], containing all the bounding boxes.
* The second one is a tensor with dimension [1, 2535, class_num], containing all the classes score.
* @param byteBuffer input ByteBuffer, which contains the image information
* @param bitmap pixel disenty used to resize the output images
* @return an array list containing the recognitions
*/

private ArrayList<Recognition> getDetectionsForFull(ByteBuffer byteBuffer, Bitmap bitmap) {
ArrayList<Recognition> detections = new ArrayList<Recognition>();
Map<Integer, Object> outputMap = new HashMap<>();
outputMap.put(0, new float[1][OUTPUT_WIDTH_FULL[0]][4]);
outputMap.put(1, new float[1][OUTPUT_WIDTH_FULL[1]][labels.size()]);
Object[] inputArray = {byteBuffer};
tfLite.runForMultipleInputsOutputs(inputArray, outputMap);

for (int i = 0; i < OUTPUT_WIDTH.length; i++) {
int gridWidth = OUTPUT_WIDTH[i];
float[][][][][] out = (float[][][][][]) outputMap.get(i);

Log.d("YoloV4Classifier", "out[" + i + "] detect start");
for (int y = 0; y < gridWidth; ++y) {
for (int x = 0; x < gridWidth; ++x) {
for (int b = 0; b < NUM_BOXES_PER_BLOCK; ++b) {
final int offset =
(gridWidth * (NUM_BOXES_PER_BLOCK * (labels.size() + 5))) * y
+ (NUM_BOXES_PER_BLOCK * (labels.size() + 5)) * x
+ (labels.size() + 5) * b;

final float confidence = expit(out[0][y][x][b][4]);
int detectedClass = -1;
float maxClass = 0;

final float[] classes = new float[labels.size()];
for (int c = 0; c < labels.size(); ++c) {
classes[c] = out[0][y][x][b][5 + c];
}

for (int c = 0; c < labels.size(); ++c) {
if (classes[c] > maxClass) {
detectedClass = c;
maxClass = classes[c];
}
}

final float confidenceInClass = maxClass * confidence;
if (confidenceInClass > getObjThresh()) {
// final float xPos = (x + (expit(out[0][y][x][b][0]) * XYSCALE[i]) - (0.5f * (XYSCALE[i] - 1))) * (INPUT_SIZE / gridWidth);
// final float yPos = (y + (expit(out[0][y][x][b][1]) * XYSCALE[i]) - (0.5f * (XYSCALE[i] - 1))) * (INPUT_SIZE / gridWidth);

final float xPos = (x + expit(out[0][y][x][b][0])) * (1.0f * INPUT_SIZE / gridWidth);
final float yPos = (y + expit(out[0][y][x][b][1])) * (1.0f * INPUT_SIZE / gridWidth);

final float w = (float) (Math.exp(out[0][y][x][b][2]) * ANCHORS[2 * MASKS[i][b]]);
final float h = (float) (Math.exp(out[0][y][x][b][3]) * ANCHORS[2 * MASKS[i][b] + 1]);

final RectF rect =
new RectF(
Math.max(0, xPos - w / 2),
Math.max(0, yPos - h / 2),
Math.min(bitmap.getWidth() - 1, xPos + w / 2),
Math.min(bitmap.getHeight() - 1, yPos + h / 2));
detections.add(new Recognition("" + offset, labels.get(detectedClass),
confidenceInClass, rect, detectedClass));
}
}
int gridWidth = OUTPUT_WIDTH_FULL[0];
float[][][] bboxes = (float [][][]) outputMap.get(0);
float[][][] out_score = (float[][][]) outputMap.get(1);

for (int i = 0; i < gridWidth;i++){
float maxClass = 0;
int detectedClass = -1;
final float[] classes = new float[labels.size()];
for (int c = 0;c< labels.size();c++){
classes [c] = out_score[0][i][c];
}
for (int c = 0;c<labels.size();++c){
if (classes[c] > maxClass){
detectedClass = c;
maxClass = classes[c];
}
}
Log.d("YoloV4Classifier", "out[" + i + "] detect end");
final float score = maxClass;
if (score > getObjThresh()){
final float xPos = bboxes[0][i][0];
final float yPos = bboxes[0][i][1];
final float w = bboxes[0][i][2];
final float h = bboxes[0][i][3];
final RectF rectF = new RectF(
Math.max(0, xPos - w / 2),
Math.max(0, yPos - h / 2),
Math.min(bitmap.getWidth() - 1, xPos + w / 2),
Math.min(bitmap.getHeight() - 1, yPos + h / 2));
detections.add(new Recognition("" + i, labels.get(detectedClass),score,rectF,detectedClass ));
}
}
return detections;
}

/**
* For yolov4-tiny, the situation would be a little different from the yolov4, it only has two
* output. Both has three dimenstion. The first one is a tensor with dimension [1, 2535,4], containing all the bounding boxes.
* The second one is a tensor with dimension [1, 2535, class_num], containing all the classes score.
* @param byteBuffer input ByteBuffer, which contains the image information
* @param bitmap pixel disenty used to resize the output images
* @return an array list containing the recognitions
*/
private ArrayList<Recognition> getDetectionsForTiny(ByteBuffer byteBuffer, Bitmap bitmap) {
ArrayList<Recognition> detections = new ArrayList<Recognition>();
Map<Integer, Object> outputMap = new HashMap<>();
Expand Down Expand Up @@ -418,20 +459,87 @@ private ArrayList<Recognition> getDetectionsForTiny(ByteBuffer byteBuffer, Bitma
Math.max(0, yPos - h / 2),
Math.min(bitmap.getWidth() - 1, xPos + w / 2),
Math.min(bitmap.getHeight() - 1, yPos + h / 2));
detections.add(new Recognition("" + i, labels.get(detectedClass),score,rectF,detectedClass ));
detections.add(new Recognition("" + i, labels.get(detectedClass),score,rectF,detectedClass ));
}
}
return detections;
}

public ArrayList<Recognition> recognizeImage(Bitmap bitmap) {
ByteBuffer byteBuffer = convertBitmapToByteBuffer(bitmap);

// Map<Integer, Object> outputMap = new HashMap<>();
// for (int i = 0; i < OUTPUT_WIDTH.length; i++) {
// float[][][][][] out = new float[1][OUTPUT_WIDTH[i]][OUTPUT_WIDTH[i]][3][5 + labels.size()];
// outputMap.put(i, out);
// }
//
// Log.d("YoloV4Classifier", "mObjThresh: " + getObjThresh());
//
// Object[] inputArray = {byteBuffer};
// tfLite.runForMultipleInputsOutputs(inputArray, outputMap);
//
// ArrayList<Recognition> detections = new ArrayList<Recognition>();
//
// for (int i = 0; i < OUTPUT_WIDTH.length; i++) {
// int gridWidth = OUTPUT_WIDTH[i];
// float[][][][][] out = (float[][][][][]) outputMap.get(i);
//
// Log.d("YoloV4Classifier", "out[" + i + "] detect start");
// for (int y = 0; y < gridWidth; ++y) {
// for (int x = 0; x < gridWidth; ++x) {
// for (int b = 0; b < NUM_BOXES_PER_BLOCK; ++b) {
// final int offset =
// (gridWidth * (NUM_BOXES_PER_BLOCK * (labels.size() + 5))) * y
// + (NUM_BOXES_PER_BLOCK * (labels.size() + 5)) * x
// + (labels.size() + 5) * b;
//
// final float confidence = expit(out[0][y][x][b][4]);
// int detectedClass = -1;
// float maxClass = 0;
//
// final float[] classes = new float[labels.size()];
// for (int c = 0; c < labels.size(); ++c) {
// classes[c] = out[0][y][x][b][5 + c];
// }
//
// for (int c = 0; c < labels.size(); ++c) {
// if (classes[c] > maxClass) {
// detectedClass = c;
// maxClass = classes[c];
// }
// }
//
// final float confidenceInClass = maxClass * confidence;
// if (confidenceInClass > getObjThresh()) {
//// final float xPos = (x + (expit(out[0][y][x][b][0]) * XYSCALE[i]) - (0.5f * (XYSCALE[i] - 1))) * (INPUT_SIZE / gridWidth);
//// final float yPos = (y + (expit(out[0][y][x][b][1]) * XYSCALE[i]) - (0.5f * (XYSCALE[i] - 1))) * (INPUT_SIZE / gridWidth);
//
// final float xPos = (x + expit(out[0][y][x][b][0])) * (1.0f * INPUT_SIZE / gridWidth);
// final float yPos = (y + expit(out[0][y][x][b][1])) * (1.0f * INPUT_SIZE / gridWidth);
//
// final float w = (float) (Math.exp(out[0][y][x][b][2]) * ANCHORS[2 * MASKS[i][b]]);
// final float h = (float) (Math.exp(out[0][y][x][b][3]) * ANCHORS[2 * MASKS[i][b] + 1]);
//
// final RectF rect =
// new RectF(
// Math.max(0, xPos - w / 2),
// Math.max(0, yPos - h / 2),
// Math.min(bitmap.getWidth() - 1, xPos + w / 2),
// Math.min(bitmap.getHeight() - 1, yPos + h / 2));
// detections.add(new Recognition("" + offset, labels.get(detectedClass),
// confidenceInClass, rect, detectedClass));
// }
// }
// }
// }
// Log.d("YoloV4Classifier", "out[" + i + "] detect end");
// }
ArrayList<Recognition> detections;
//check whether the tiny version is specified
if (isTiny) {
detections = getDetectionsForTiny(byteBuffer, bitmap);
} else {
detections = getDetections(byteBuffer, bitmap);
detections = getDetectionsForFull(byteBuffer, bitmap);
}
final ArrayList<Recognition> recognitions = nms(detections);
return recognitions;
Expand Down Expand Up @@ -488,4 +596,4 @@ public boolean checkInvalidateBox(float x, float y, float width, float height, f

return true;
}
}
}

0 comments on commit ecd8231

Please sign in to comment.