Add ability to export to a triton python backend (NVIDIA-Merlin#545)

vishalbelsare · Feb 2, 2021 · cef7571 · cef7571
1 parent cdb6c99
commit cef7571
Show file tree

Hide file tree

Showing 10 changed files with 1,890 additions and 4 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
+nvtabular/inference/triton/model_config_pb2.py
 */.ipynb_checkpoints/*
 /.*_checkpoints/
 .ipynb_checkpoints/*

diff --git a/nvtabular/inference/__init__.py b/nvtabular/inference/__init__.py
diff --git a/nvtabular/inference/triton/__init__.py b/nvtabular/inference/triton/__init__.py
@@ -0,0 +1,94 @@
+import os
+import subprocess
+from shutil import copyfile
+
+import cudf
+import tritonclient.http as httpclient
+from google.protobuf import text_format
+from tritonclient.utils import np_to_triton_dtype
+
+# read in the triton ModelConfig proto object - generating it if it doesn't exist
+try:
+    import nvtabular.inference.triton.model_config_pb2 as model_config
+except ImportError:
+    pwd = os.path.dirname(__file__)
+    try:
+        subprocess.check_output(
+            ["protoc", f"--python_out={pwd}", f"--proto_path={pwd}", "model_config.proto"]
+        )
+    except Exception as e:
+        raise ImportError("Failed to compile model_config.proto - is protobuf installed?") from e
+    import nvtabular.inference.triton.model_config_pb2 as model_config
+
+
+def generate_triton_model(workflow, name, output_path, version=1):
+    """ converts a workflow to a triton mode """
+    workflow.save(os.path.join(output_path, str(version), "workflow"))
+    _generate_model_config(workflow, name, output_path)
+    copyfile(
+        os.path.join(os.path.dirname(__file__), "model.py"),
+        os.path.join(output_path, str(version), "model.py"),
+    )
+
+
+def convert_df_to_triton_input(column_names, batch, input_class=httpclient.InferInput):
+    columns = [(col, batch[col]) for col in column_names]
+    inputs = [input_class(name, col.shape, np_to_triton_dtype(col.dtype)) for name, col in columns]
+    for i, (name, col) in enumerate(columns):
+        inputs[i].set_data_from_numpy(col.values_host)
+    return inputs
+
+
+def convert_triton_output_to_df(columns, response):
+    return cudf.DataFrame({col: response.as_numpy(col) for col in columns})
+
+
+def _generate_model_config(workflow, name, output_path):
+    """given a workflow generates the trton modelconfig proto object describing the inputs
+    and outputs to that workflow"""
+    config = model_config.ModelConfig(name=name, backend="python")
+
+    for column in workflow.column_group.input_column_names:
+        dtype = workflow.input_dtypes[column]
+        config.input.append(
+            model_config.ModelInput(name=column, data_type=_convert_dtype(dtype), dims=[-1])
+        )
+
+    for column, dtype in workflow.output_dtypes.items():
+        config.output.append(
+            model_config.ModelOutput(name=column, data_type=_convert_dtype(dtype), dims=[-1])
+        )
+
+    with open(os.path.join(output_path, "config.pbtxt"), "w") as o:
+        text_format.PrintMessage(config, o)
+
+
+def _convert_dtype(dtype):
+    """ converts a dtype to the appropiate triton proto type """
+    if dtype == "float64":
+        return model_config.TYPE_FP64
+    if dtype == "float32":
+        return model_config.TYPE_FP32
+    if dtype == "float16":
+        return model_config.TYPE_FP16
+    if dtype == "int64":
+        return model_config.TYPE_INT64
+    if dtype == "int32":
+        return model_config.TYPE_INT32
+    if dtype == "int16":
+        return model_config.TYPE_INT16
+    if dtype == "int8":
+        return model_config.TYPE_INT8
+    if dtype == "uint64":
+        return model_config.TYPE_UINT64
+    if dtype == "uint32":
+        return model_config.TYPE_UINT32
+    if dtype == "uint16":
+        return model_config.TYPE_UINT16
+    if dtype == "uint8":
+        return model_config.TYPE_UINT8
+    if dtype == "bool":
+        return model_config.TYPE_BOOL
+    if cudf.utils.dtypes.is_string_dtype(dtype):
+        return model_config.TYPE_STRING
+    raise ValueError(f"Can't convert dtype {dtype})")
diff --git a/nvtabular/inference/triton/model.py b/nvtabular/inference/triton/model.py
@@ -0,0 +1,83 @@
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+from typing import List
+
+import cudf
+from triton_python_backend_utils import (
+    InferenceRequest,
+    InferenceResponse,
+    Tensor,
+    get_input_tensor_by_name,
+)
+
+import nvtabular
+
+
+class TritonPythonModel:
+    """ Generic TritonPythonModel for nvtabular workflows """
+
+    def initialize(self, args):
+        workflow_path = os.path.join(
+            args["model_repository"], str(args["model_version"]), "workflow"
+        )
+        self.workflow = nvtabular.Workflow.load(workflow_path)
+
+    def execute(self, requests: List[InferenceRequest]) -> List[InferenceResponse]:
+        """Transforms the input batches by running through a NVTabular workflow.transform
+        function.
+        """
+        responses = []
+        for request in requests:
+            # create a cudf DataFrame from the triton request
+            input_df = cudf.DataFrame(
+                {
+                    name: _convert_tensor(get_input_tensor_by_name(request, name))
+                    for name in self.workflow.column_group.input_column_names
+                }
+            )
+
+            # use our NVTabular workflow to transform the dataframe
+            output_df = self.workflow.transform(nvtabular.Dataset(input_df)).to_ddf().compute()
+
+            # convert back to a triton response
+            response = InferenceResponse(
+                output_tensors=[
+                    Tensor(col, output_df[col].values_host) for col in output_df.columns
+                ]
+            )
+            responses.append(response)
+
+        return responses
+
+
+def _convert_tensor(t):
+    out = t.as_numpy()
+    # cudf doesn't seem to handle dtypes like |S15
+    if out.dtype.kind == "S" and out.dtype.str.startswith("|S"):
+        out = out.astype("str")
+    return out