Accel init shutdown (onnx#1357)

* support for scheme with locks for updating init status, but no lock to test if init. * remove inner mutex as strong consitency on Z does not require memory fences Signed-off-by: Alexandre Eichenberger <[email protected]> Co-authored-by: Tung D. Le <[email protected]>
whitneywhtsang · Apr 28, 2022 · d6c8d80 · d6c8d80
1 parent 15b93c0
commit d6c8d80
Show file tree

Hide file tree

Showing 6 changed files with 186 additions and 2 deletions.
diff --git a/src/Accelerators/InitAccelerators.cpp b/src/Accelerators/InitAccelerators.cpp
@@ -8,7 +8,7 @@
 //
 // =============================================================================
 //
-// Initialization of accelerators.
+// Initialization of accelerators' compile time data structures.
 //
 //===----------------------------------------------------------------------===//
 

diff --git a/src/Accelerators/NNPA/CMakeLists.txt b/src/Accelerators/NNPA/CMakeLists.txt
@@ -32,6 +32,7 @@ if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "s390x")
   add_subdirectory(Support)
   add_subdirectory(Transform)
   add_subdirectory(Compiler)
+  add_subdirectory(Runtime)
 
   add_onnx_mlir_library(NNPAAccel
     NNPAAccelerator.cpp

diff --git a/src/Accelerators/NNPA/NNPAAccelerator.cpp b/src/Accelerators/NNPA/NNPAAccelerator.cpp
@@ -48,7 +48,7 @@ NNPAAccelerator *NNPAAccelerator::getInstance() {
 NNPAAccelerator::NNPAAccelerator() : Accelerator(Accelerator::Kind::NNPA) {
   LLVM_DEBUG(llvm::dbgs() << "Creating an NNPA accelerator\n");
   acceleratorTargets.push_back(this);
-  addCompilerConfig(CCM_SHARED_LIB_DEPS, {"zdnn"});
+  addCompilerConfig(CCM_SHARED_LIB_DEPS, {"zdnn", "RuntimeNNPA"});
 };
 
 NNPAAccelerator::~NNPAAccelerator() { delete instance; }

diff --git a/src/Accelerators/NNPA/Runtime/CMakeLists.txt b/src/Accelerators/NNPA/Runtime/CMakeLists.txt
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: Apache-2.0
+
+# TODO: statically link to libcruntime.a
+
+add_onnx_mlir_library(RuntimeNNPA STATIC
+  OMRuntimeNNPA.c
+
+  EXCLUDE_FROM_OM_LIBS
+
+  DEPENDS
+  libzdnn
+
+  INCLUDE_DIRS PRIVATE
+  ${NNPA_INCLUDE_PATH}
+  )
+set_target_properties(RuntimeNNPA
+  PROPERTIES
+  LANGUAGE C
+  POSITION_INDEPENDENT_CODE TRUE
+  )
+
diff --git a/src/Accelerators/NNPA/Runtime/OMRuntimeNNPA.c b/src/Accelerators/NNPA/Runtime/OMRuntimeNNPA.c
@@ -0,0 +1,153 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+//===-------------------------- OMRuntimeNNPA.c ---------------------------===//
+//
+// Copyright 2022 The IBM Research Authors.
+//
+// =============================================================================
+//
+// Onnx MLIR NNPA Accelerator Runtime
+//
+//===----------------------------------------------------------------------===//
+
+// Include pthreads (need special treatment on Zos).
+#ifdef __MVS__
+#define _OPEN_THREADS
+#endif
+#include <pthread.h>
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "zdnn.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Interface for device init and shutdown.
+ *
+ * For devices that requires initialization before execution, we suggest the
+ * following interface. Assuming a device named X.
+ *
+ * 1. Define a variable OMIsInitAccelX initialized to zero. It should be safe
+ *    to read this variable outside of a lock. Setting this value to one is done
+ *    within OMInitAccelX and setting this value to zero is done within the
+ *    OMShutdownAccelX.
+ * 2. Define a function OMInitAccelX that initialize the device only once, and
+ *    once it is initialized, set the OMIsInitAccelX value to 1. This function
+ *    must be thread safe.
+ * 3. Optionally define a function OMShutdownAccelX that shut down the device
+ *    only once. This function is thread safe. Additional restrictions exist
+ *    on this function, namely that it can only be called when provably no
+ *    threads are using the accelerator. Failure to do so may result in
+ *    incorrect result and/or execution failure.
+ * 4. For models that use accelerator X, the compiler must insert a test of the
+ *    type below before any use of accelerator's X functionality.
+ *
+ *    if (!OMIsInitAccelX) OMInitAccelX().
+ *
+ *    Calling OMInitAccelX() unconditionally is also appropriate.
+ *
+ * 5. Accelerators that requires a given level of support (e.g. the graph was
+ *    compiled with code that requires level V), one may define a additional
+ *    init function OMInitCompatibleAccelNNPA which passes the minimum level
+ *    V as parameter. After initializing the function, the device is tested
+ *    to see if it support level V. If not, an error is generated and the
+ *    program abort.
+ */
+
+/* Init and shutdown for NNPA device.
+ *
+ * This test can be performed in the run_main_graph() without grabbing a lock,
+ * as follows:
+ *
+ * if (!OMIsInitAccelNNPA) OMInitAccelNNPA();
+ *
+ * OMInitAccelNNPA() is thread safe, and is guaranteed to set
+ * OMIsInitAccelNNPA=1 once any other threads are guaranteed to see the full
+ * effects of the zdnn_init(). Because Z does not has a release consistency
+ * memory subsystem, we don't need a hard memory fence between zdnn_init() and
+ * OMIsInitAccelNNPA=1.
+ *
+ * For the OMShutdownAccelNNPA(), we simply set the OMIsInitAccelNNPA flag to
+ * zero as there is currently no zdnn shutdown call. If one were added, then we
+ * would follow the same code pattern as in the init function.
+ */
+
+// Define variable that tracks whether an accelerator is initialized or not.
+// Initial value is uninitialized.
+// Name must be OMIsInitAccelX where X=NNPA.
+long OMIsInitAccelNNPA = 0;
+
+// Mutex definitions for init and shutdown serialization.
+pthread_mutex_t OMMutexForInitShutdownNNPA = PTHREAD_MUTEX_INITIALIZER;
+
+// Define function that performs the serialization of the initialization as well
+// as set the OMIsInitAccelNNPA to true.
+// Name must be OMInitAccelX where X=NNPA.
+void OMInitAccelNNPA() {
+  if (!OMIsInitAccelNNPA) {
+    /* Grab mutex. */
+    pthread_mutex_lock(&OMMutexForInitShutdownNNPA);
+    /* Test again in the mutex to see if accelerator is not initialized. */
+    if (!OMIsInitAccelNNPA) {
+      /* Still unitinitialized, actual init. */
+      zdnn_init();
+      /* No need for a fence due to strong consistency. */
+      OMIsInitAccelNNPA = 1;
+    } /* Release mutex. */
+    pthread_mutex_unlock(&OMMutexForInitShutdownNNPA);
+  }
+}
+
+// Perform the same initialization and also check that the NNPA version that the
+// program was compiled for is compatible with the actual NNPA hardware.
+void OMInitCompatibleAccelNNPA(uint64_t versionNum) {
+  if (!OMIsInitAccelNNPA) {
+    int isCompatible = 1;
+    /* Grab mutex. */
+    pthread_mutex_lock(&OMMutexForInitShutdownNNPA);
+    /* Test again in the mutex to see if accelerator is not initialized. */
+    if (!OMIsInitAccelNNPA) {
+      /* Still unitinitialized, actual init. */
+      zdnn_init();
+      /* Check if version is compatible */
+      isCompatible = zdnn_is_version_runnable((uint32_t)versionNum);
+      /* No need for a fence due to strong consistency. */
+      OMIsInitAccelNNPA = 1;
+    }
+    /* Release mutex. */
+    pthread_mutex_unlock(&OMMutexForInitShutdownNNPA);
+    /* If not compatible, generate an error here */
+    if (!isCompatible) {
+      fprintf(stderr,
+          "Attempting to initialize zdnn with version num %llu, which is "
+          "not compatible with current NNPA hardware\n",
+          versionNum);
+      exit(1);
+    }
+  }
+}
+
+// Define function that performs the serialization of the shutdown as well
+// as set the OMIsInitAccelNNPA to false. This function can only be called when
+// all evaluation on the NNPA are known to have completed. Name must be
+// OMShutdownAccelX where X=NNPA.
+void OMShutdownAccelNNPA() {
+  if (OMIsInitAccelNNPA) {
+    /* Grab mutex. */
+    pthread_mutex_lock(&OMMutexForInitShutdownNNPA);
+    /* Nothing to unitnitialize. */
+    OMIsInitAccelNNPA = 0;
+    /* Release mutex. */
+    pthread_mutex_unlock(&OMMutexForInitShutdownNNPA);
+  }
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/Runtime/CMakeLists.txt b/src/Runtime/CMakeLists.txt
@@ -2,6 +2,9 @@
 
 add_subdirectory(jni)
 
+# TODO: should add for each accelerator its subdirectory that implements InitAccel##name
+# and ShutdownAccel##name.
+
 # Create static libcruntime.a to be embedded in model.so to make model.so self contained.
 # However, by default object code for static library is not compiled with -fPIC. Embedding
 # such static library in a shared library can cause runtime failure on some architectures,
@@ -14,6 +17,9 @@ add_onnx_mlir_library(cruntime STATIC
   OMTensorList.c
   OnnxDataType.c
 
+  DEPENDS
+  AcceleratorsInc
+
   EXCLUDE_FROM_OM_LIBS
 
   INCLUDE_DIRS PRIVATE
@@ -33,6 +39,9 @@ add_onnx_mlir_library(OMTensorUtils
   OMTensorList.cpp
   OnnxDataType.cpp
 
+  DEPENDS 
+  AcceleratorsInc
+
   EXCLUDE_FROM_OM_LIBS
 
   INCLUDE_DIRS PUBLIC