Skip to content

Commit

Permalink
Accel init shutdown (onnx#1357)
Browse files Browse the repository at this point in the history
* support for scheme with locks for updating init status, but no lock to test if init.
* remove inner mutex as strong consitency on Z does not require memory fences

Signed-off-by: Alexandre Eichenberger <[email protected]>
Co-authored-by: Tung D. Le <[email protected]>
  • Loading branch information
AlexandreEichenberger and tungld authored Apr 28, 2022
1 parent 15b93c0 commit d6c8d80
Show file tree
Hide file tree
Showing 6 changed files with 186 additions and 2 deletions.
2 changes: 1 addition & 1 deletion src/Accelerators/InitAccelerators.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
//
// =============================================================================
//
// Initialization of accelerators.
// Initialization of accelerators' compile time data structures.
//
//===----------------------------------------------------------------------===//

Expand Down
1 change: 1 addition & 0 deletions src/Accelerators/NNPA/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "s390x")
add_subdirectory(Support)
add_subdirectory(Transform)
add_subdirectory(Compiler)
add_subdirectory(Runtime)

add_onnx_mlir_library(NNPAAccel
NNPAAccelerator.cpp
Expand Down
2 changes: 1 addition & 1 deletion src/Accelerators/NNPA/NNPAAccelerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ NNPAAccelerator *NNPAAccelerator::getInstance() {
NNPAAccelerator::NNPAAccelerator() : Accelerator(Accelerator::Kind::NNPA) {
LLVM_DEBUG(llvm::dbgs() << "Creating an NNPA accelerator\n");
acceleratorTargets.push_back(this);
addCompilerConfig(CCM_SHARED_LIB_DEPS, {"zdnn"});
addCompilerConfig(CCM_SHARED_LIB_DEPS, {"zdnn", "RuntimeNNPA"});
};

NNPAAccelerator::~NNPAAccelerator() { delete instance; }
Expand Down
21 changes: 21 additions & 0 deletions src/Accelerators/NNPA/Runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# SPDX-License-Identifier: Apache-2.0

# TODO: statically link to libcruntime.a

add_onnx_mlir_library(RuntimeNNPA STATIC
OMRuntimeNNPA.c

EXCLUDE_FROM_OM_LIBS

DEPENDS
libzdnn

INCLUDE_DIRS PRIVATE
${NNPA_INCLUDE_PATH}
)
set_target_properties(RuntimeNNPA
PROPERTIES
LANGUAGE C
POSITION_INDEPENDENT_CODE TRUE
)

153 changes: 153 additions & 0 deletions src/Accelerators/NNPA/Runtime/OMRuntimeNNPA.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
/*
* SPDX-License-Identifier: Apache-2.0
*/

//===-------------------------- OMRuntimeNNPA.c ---------------------------===//
//
// Copyright 2022 The IBM Research Authors.
//
// =============================================================================
//
// Onnx MLIR NNPA Accelerator Runtime
//
//===----------------------------------------------------------------------===//

// Include pthreads (need special treatment on Zos).
#ifdef __MVS__
#define _OPEN_THREADS
#endif
#include <pthread.h>

#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>

#include "zdnn.h"

#ifdef __cplusplus
extern "C" {
#endif

/* Interface for device init and shutdown.
*
* For devices that requires initialization before execution, we suggest the
* following interface. Assuming a device named X.
*
* 1. Define a variable OMIsInitAccelX initialized to zero. It should be safe
* to read this variable outside of a lock. Setting this value to one is done
* within OMInitAccelX and setting this value to zero is done within the
* OMShutdownAccelX.
* 2. Define a function OMInitAccelX that initialize the device only once, and
* once it is initialized, set the OMIsInitAccelX value to 1. This function
* must be thread safe.
* 3. Optionally define a function OMShutdownAccelX that shut down the device
* only once. This function is thread safe. Additional restrictions exist
* on this function, namely that it can only be called when provably no
* threads are using the accelerator. Failure to do so may result in
* incorrect result and/or execution failure.
* 4. For models that use accelerator X, the compiler must insert a test of the
* type below before any use of accelerator's X functionality.
*
* if (!OMIsInitAccelX) OMInitAccelX().
*
* Calling OMInitAccelX() unconditionally is also appropriate.
*
* 5. Accelerators that requires a given level of support (e.g. the graph was
* compiled with code that requires level V), one may define a additional
* init function OMInitCompatibleAccelNNPA which passes the minimum level
* V as parameter. After initializing the function, the device is tested
* to see if it support level V. If not, an error is generated and the
* program abort.
*/

/* Init and shutdown for NNPA device.
*
* This test can be performed in the run_main_graph() without grabbing a lock,
* as follows:
*
* if (!OMIsInitAccelNNPA) OMInitAccelNNPA();
*
* OMInitAccelNNPA() is thread safe, and is guaranteed to set
* OMIsInitAccelNNPA=1 once any other threads are guaranteed to see the full
* effects of the zdnn_init(). Because Z does not has a release consistency
* memory subsystem, we don't need a hard memory fence between zdnn_init() and
* OMIsInitAccelNNPA=1.
*
* For the OMShutdownAccelNNPA(), we simply set the OMIsInitAccelNNPA flag to
* zero as there is currently no zdnn shutdown call. If one were added, then we
* would follow the same code pattern as in the init function.
*/

// Define variable that tracks whether an accelerator is initialized or not.
// Initial value is uninitialized.
// Name must be OMIsInitAccelX where X=NNPA.
long OMIsInitAccelNNPA = 0;

// Mutex definitions for init and shutdown serialization.
pthread_mutex_t OMMutexForInitShutdownNNPA = PTHREAD_MUTEX_INITIALIZER;

// Define function that performs the serialization of the initialization as well
// as set the OMIsInitAccelNNPA to true.
// Name must be OMInitAccelX where X=NNPA.
void OMInitAccelNNPA() {
if (!OMIsInitAccelNNPA) {
/* Grab mutex. */
pthread_mutex_lock(&OMMutexForInitShutdownNNPA);
/* Test again in the mutex to see if accelerator is not initialized. */
if (!OMIsInitAccelNNPA) {
/* Still unitinitialized, actual init. */
zdnn_init();
/* No need for a fence due to strong consistency. */
OMIsInitAccelNNPA = 1;
} /* Release mutex. */
pthread_mutex_unlock(&OMMutexForInitShutdownNNPA);
}
}

// Perform the same initialization and also check that the NNPA version that the
// program was compiled for is compatible with the actual NNPA hardware.
void OMInitCompatibleAccelNNPA(uint64_t versionNum) {
if (!OMIsInitAccelNNPA) {
int isCompatible = 1;
/* Grab mutex. */
pthread_mutex_lock(&OMMutexForInitShutdownNNPA);
/* Test again in the mutex to see if accelerator is not initialized. */
if (!OMIsInitAccelNNPA) {
/* Still unitinitialized, actual init. */
zdnn_init();
/* Check if version is compatible */
isCompatible = zdnn_is_version_runnable((uint32_t)versionNum);
/* No need for a fence due to strong consistency. */
OMIsInitAccelNNPA = 1;
}
/* Release mutex. */
pthread_mutex_unlock(&OMMutexForInitShutdownNNPA);
/* If not compatible, generate an error here */
if (!isCompatible) {
fprintf(stderr,
"Attempting to initialize zdnn with version num %llu, which is "
"not compatible with current NNPA hardware\n",
versionNum);
exit(1);
}
}
}

// Define function that performs the serialization of the shutdown as well
// as set the OMIsInitAccelNNPA to false. This function can only be called when
// all evaluation on the NNPA are known to have completed. Name must be
// OMShutdownAccelX where X=NNPA.
void OMShutdownAccelNNPA() {
if (OMIsInitAccelNNPA) {
/* Grab mutex. */
pthread_mutex_lock(&OMMutexForInitShutdownNNPA);
/* Nothing to unitnitialize. */
OMIsInitAccelNNPA = 0;
/* Release mutex. */
pthread_mutex_unlock(&OMMutexForInitShutdownNNPA);
}
}

#ifdef __cplusplus
}
#endif
9 changes: 9 additions & 0 deletions src/Runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

add_subdirectory(jni)

# TODO: should add for each accelerator its subdirectory that implements InitAccel##name
# and ShutdownAccel##name.

# Create static libcruntime.a to be embedded in model.so to make model.so self contained.
# However, by default object code for static library is not compiled with -fPIC. Embedding
# such static library in a shared library can cause runtime failure on some architectures,
Expand All @@ -14,6 +17,9 @@ add_onnx_mlir_library(cruntime STATIC
OMTensorList.c
OnnxDataType.c

DEPENDS
AcceleratorsInc

EXCLUDE_FROM_OM_LIBS

INCLUDE_DIRS PRIVATE
Expand All @@ -33,6 +39,9 @@ add_onnx_mlir_library(OMTensorUtils
OMTensorList.cpp
OnnxDataType.cpp

DEPENDS
AcceleratorsInc

EXCLUDE_FROM_OM_LIBS

INCLUDE_DIRS PUBLIC
Expand Down

0 comments on commit d6c8d80

Please sign in to comment.