Skip to content

Commit

Permalink
libavfilter/dnn: add layer maximum for native mode.
Browse files Browse the repository at this point in the history
The reason to add this layer is that it is used by srcnn in vf_sr.
This layer is currently ignored in native mode. After this patch,
we can add multiple outputs support for native mode.

Signed-off-by: Guo, Yejun <[email protected]>
Signed-off-by: Pedro Arthur <[email protected]>
  • Loading branch information
guoyejun authored and grandao committed Sep 20, 2019
1 parent ea673a0 commit b2683c6
Show file tree
Hide file tree
Showing 8 changed files with 198 additions and 7 deletions.
1 change: 1 addition & 0 deletions libavfilter/dnn/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native.o
OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layer_pad.o
OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layer_conv2d.o
OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layer_depth2space.o
OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layer_maximum.o

DNN-OBJS-$(CONFIG_LIBTENSORFLOW) += dnn/dnn_backend_tf.o

Expand Down
36 changes: 34 additions & 2 deletions libavfilter/dnn/dnn_backend_native.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "dnn_backend_native_layer_pad.h"
#include "dnn_backend_native_layer_conv2d.h"
#include "dnn_backend_native_layer_depth2space.h"
#include "dnn_backend_native_layer_maximum.h"

static DNNReturnType set_input_output_native(void *model, DNNInputData *input, const char *input_name, const char **output_names, uint32_t nb_output)
{
Expand Down Expand Up @@ -78,6 +79,7 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
ConvolutionalParams *conv_params;
DepthToSpaceParams *depth_to_space_params;
LayerPadParams *pad_params;
DnnLayerMaximumParams *maximum_params;

model = av_malloc(sizeof(DNNModel));
if (!model){
Expand Down Expand Up @@ -237,6 +239,21 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
network->layers[layer].type = MIRROR_PAD;
network->layers[layer].params = pad_params;
break;
case MAXIMUM:
maximum_params = av_malloc(sizeof(*maximum_params));
if (!maximum_params){
avio_closep(&model_file_context);
ff_dnn_free_model_native(&model);
return NULL;
}
maximum_params->val.u32 = avio_rl32(model_file_context);
dnn_size += 4;
network->layers[layer].type = MAXIMUM;
network->layers[layer].params = maximum_params;
network->layers[layer].input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
network->layers[layer].output_operand_index = (int32_t)avio_rl32(model_file_context);
dnn_size += 8;
break;
default:
avio_closep(&model_file_context);
ff_dnn_free_model_native(&model);
Expand Down Expand Up @@ -290,6 +307,7 @@ DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *output
ConvolutionalParams *conv_params;
DepthToSpaceParams *depth_to_space_params;
LayerPadParams *pad_params;
DnnLayerMaximumParams *maximum_params;

if (network->layers_num <= 0 || network->operands_num <= 0)
return DNN_ERROR;
Expand All @@ -313,6 +331,11 @@ DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *output
dnn_execute_layer_pad(network->operands, network->layers[layer].input_operand_indexes,
network->layers[layer].output_operand_index, pad_params);
break;
case MAXIMUM:
maximum_params = (DnnLayerMaximumParams *)network->layers[layer].params;
dnn_execute_layer_maximum(network->operands, network->layers[layer].input_operand_indexes,
network->layers[layer].output_operand_index, maximum_params);
break;
case INPUT:
return DNN_ERROR;
}
Expand All @@ -333,10 +356,19 @@ DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *output
return DNN_SUCCESS;
}

int32_t calculate_operand_data_length(DnnOperand* operand)
int32_t calculate_operand_dims_count(const DnnOperand *oprd)
{
int32_t result = 1;
for (int i = 0; i < 4; ++i)
result *= oprd->dims[i];

return result;
}

int32_t calculate_operand_data_length(const DnnOperand* oprd)
{
// currently, we just support DNN_FLOAT
return operand->dims[0] * operand->dims[1] * operand->dims[2] * operand->dims[3] * sizeof(float);
return oprd->dims[0] * oprd->dims[1] * oprd->dims[2] * oprd->dims[3] * sizeof(float);
}

void ff_dnn_free_model_native(DNNModel **model)
Expand Down
6 changes: 3 additions & 3 deletions libavfilter/dnn/dnn_backend_native.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
#include "../dnn_interface.h"
#include "libavformat/avio.h"

typedef enum {INPUT, CONV, DEPTH_TO_SPACE, MIRROR_PAD} DNNLayerType;
typedef enum {INPUT = 0, CONV = 1, DEPTH_TO_SPACE = 2, MIRROR_PAD = 3, MAXIMUM = 4} DNNLayerType;

typedef enum {DOT_INPUT = 1, DOT_OUTPUT = 2, DOT_INTERMEDIATE = DOT_INPUT | DOT_INPUT} DNNOperandType;

Expand Down Expand Up @@ -104,6 +104,6 @@ DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *output

void ff_dnn_free_model_native(DNNModel **model);

int32_t calculate_operand_data_length(DnnOperand *operand);

int32_t calculate_operand_data_length(const DnnOperand *oprd);
int32_t calculate_operand_dims_count(const DnnOperand *oprd);
#endif
54 changes: 54 additions & 0 deletions libavfilter/dnn/dnn_backend_native_layer_maximum.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
* Copyright (c) 2019 Guo Yejun
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/

/**
* @file
* DNN native backend implementation.
*/

#include "dnn_backend_native.h"
#include "libavutil/avassert.h"
#include "dnn_backend_native_layer_maximum.h"

int dnn_execute_layer_maximum(DnnOperand *operands, const int32_t *input_operand_indexes, int32_t output_operand_index, const DnnLayerMaximumParams *params)
{
const DnnOperand *input = &operands[input_operand_indexes[0]];
DnnOperand *output = &operands[output_operand_index];
int dims_count;
const float *src;
float *dst;

for (int i = 0; i < 4; ++i)
output->dims[i] = input->dims[i];

output->data_type = input->data_type;
output->length = calculate_operand_data_length(output);
output->data = av_realloc(output->data, output->length);
if (!output->data)
return DNN_ERROR;

dims_count = calculate_operand_dims_count(output);
src = input->data;
dst = output->data;
for (int i = 0; i < dims_count; ++i)
dst[i] = FFMAX(src[i], params->val.y);

return 0;
}
42 changes: 42 additions & 0 deletions libavfilter/dnn/dnn_backend_native_layer_maximum.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* Copyright (c) 2019 Guo Yejun
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/

/**
* @file
* DNN inference functions interface for native backend.
*/


#ifndef AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_MAXIMUM_H
#define AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_MAXIMUM_H

#include "libavformat/avio.h"
#include "dnn_backend_native.h"

typedef struct DnnLayerMaximumParams{
union {
uint32_t u32;
float y;
}val;
} DnnLayerMaximumParams;

int dnn_execute_layer_maximum(DnnOperand *operands, const int32_t *input_operand_indexes, int32_t output_operand_index, const DnnLayerMaximumParams *params);

#endif
47 changes: 47 additions & 0 deletions libavfilter/dnn/dnn_backend_tf.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "libavformat/avio.h"
#include "libavutil/avassert.h"
#include "dnn_backend_native_layer_pad.h"
#include "dnn_backend_native_layer_maximum.h"

#include <tensorflow/c/c_api.h>

Expand Down Expand Up @@ -401,6 +402,48 @@ static DNNReturnType add_pad_layer(TFModel *tf_model, TF_Operation **cur_op,
return DNN_SUCCESS;
}

static DNNReturnType add_maximum_layer(TFModel *tf_model, TF_Operation **cur_op,
DnnLayerMaximumParams *params, const int layer)
{
TF_Operation *op;
TF_Tensor *tensor;
TF_OperationDescription *op_desc;
TF_Output input;
float *y;

char name_buffer[NAME_BUFFER_SIZE];
snprintf(name_buffer, NAME_BUFFER_SIZE, "maximum/y%d", layer);

op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
tensor = TF_AllocateTensor(TF_FLOAT, NULL, 0, TF_DataTypeSize(TF_FLOAT));
y = (float *)TF_TensorData(tensor);
*y = params->val.y;
TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
return DNN_ERROR;
}
op = TF_FinishOperation(op_desc, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
return DNN_ERROR;
}

snprintf(name_buffer, NAME_BUFFER_SIZE, "maximum%d", layer);
op_desc = TF_NewOperation(tf_model->graph, "Maximum", name_buffer);
input.oper = *cur_op;
input.index = 0;
TF_AddInput(op_desc, input);
input.oper = op;
TF_AddInput(op_desc, input);
TF_SetAttrType(op_desc, "T", TF_FLOAT);
*cur_op = TF_FinishOperation(op_desc, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
return DNN_ERROR;
}

return DNN_SUCCESS;
}

static DNNReturnType load_native_model(TFModel *tf_model, const char *model_filename)
{
int32_t layer;
Expand Down Expand Up @@ -471,6 +514,10 @@ static DNNReturnType load_native_model(TFModel *tf_model, const char *model_file
layer_add_res = add_pad_layer(tf_model, &op,
(LayerPadParams *)conv_network->layers[layer].params, layer);
break;
case MAXIMUM:
layer_add_res = add_maximum_layer(tf_model, &op,
(DnnLayerMaximumParams *)conv_network->layers[layer].params, layer);
break;
default:
CLEANUP_ON_ERROR(tf_model);
}
Expand Down
17 changes: 16 additions & 1 deletion tools/python/convert_from_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def __init__(self, graph_def, nodes, outfile, dump4tb):
self.converted_nodes = set()
self.conv2d_scope_names = set()
self.conv2d_scopename_inputname_dict = {}
self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3}
self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 'Maximum':4}
self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2}
self.name_operand_dict = {}

Expand Down Expand Up @@ -200,6 +200,19 @@ def dump_mirrorpad_to_file(self, node, f):
np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)


def dump_maximum_to_file(self, node, f):
assert(node.op == 'Maximum')
self.layer_number = self.layer_number + 1
ynode = self.name_node_dict[node.input[1]]
y = ynode.attr['value'].tensor.float_val[0]
np.array([self.op2code[node.op]], dtype=np.uint32).tofile(f)
np.array([y], dtype=np.float32).tofile(f)
self.converted_nodes.add(node.name)
input_operand_index = self.add_operand(node.input[0], Operand.IOTYPE_INPUT)
output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT)
np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)


def dump_layers_to_file(self, f):
for node in self.nodes:
if node.name in self.converted_nodes:
Expand All @@ -216,6 +229,8 @@ def dump_layers_to_file(self, f):
self.dump_depth2space_to_file(node, f)
elif node.op == 'MirrorPad':
self.dump_mirrorpad_to_file(node, f)
elif node.op == 'Maximum':
self.dump_maximum_to_file(node, f)


def dump_operands_to_file(self, f):
Expand Down
2 changes: 1 addition & 1 deletion tools/python/convert_header.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@
major = 0

# increase minor when we don't have to re-convert the model file
minor = 1
minor = 2

0 comments on commit b2683c6

Please sign in to comment.