forked from zhihu/ZhiLight
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
390 changed files
with
79,482 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
cmake_minimum_required(VERSION 3.10) | ||
project(main VERSION 0.1) | ||
|
||
enable_language(C) | ||
enable_language(CXX) | ||
enable_language(CUDA) | ||
|
||
set(CMAKE_CXX_STANDARD 14) | ||
set(CMAKE_CXX_STANDARD_REQUIRED True) | ||
|
||
add_subdirectory(bmengine) | ||
add_subdirectory(tests) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
cmake_minimum_required(VERSION 3.10) | ||
project(bmengine VERSION 0.1) | ||
|
||
enable_language(C) | ||
enable_language(CXX) | ||
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) | ||
set(CMAKE_CUDA_ARCHITECTURES "80;89") | ||
endif() | ||
if(NOT APPLE) | ||
enable_language(CUDA) | ||
endif() | ||
|
||
set(CMAKE_CXX_STANDARD 14) | ||
set(CMAKE_CXX_STANDARD_REQUIRED True) | ||
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/) | ||
|
||
|
||
find_library(CUDART_LIBRARY cudart ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) | ||
find_library(CUBLAS_LIBRARY cublas ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) | ||
find_library(CUBLASLT_LIBRARY cublasLt ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) | ||
find_library(CUBRAND_LIBRARY curand_static ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) | ||
find_library(CULIBOS_LIBRARY culibos ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) | ||
find_package(NCCL REQUIRED) | ||
|
||
|
||
file(GLOB_RECURSE FILES_BMENGINE "*.cpp") | ||
file(GLOB_RECURSE FILES_BMENGINE_CUDA "*.cu") | ||
file(GLOB_RECURSE FILES_BMENGINE_HEADER RELATIVE "include/bmengine" "*.h") | ||
|
||
add_library(bmengine STATIC | ||
${FILES_BMENGINE} | ||
${FILES_BMENGINE_CUDA} | ||
) | ||
if(NCCL_FOUND) | ||
include_directories(SYSTEM ${NCCL_INCLUDE_DIRS}) | ||
else() | ||
message(WARNING "Not compiling with NCCL support. Suppress this warning with -DUSE_NCCL=OFF.") | ||
set(USE_NCCL OFF) | ||
endif() | ||
set_property(TARGET bmengine PROPERTY POSITION_INDEPENDENT_CODE ON) | ||
set_property(TARGET bmengine PROPERTY CMAKE_CXX_VISIBILITY_PRESET hidden) | ||
set_property(TARGET bmengine PROPERTY CMAKE_CUDA_VISIBILITY_PRESET hidden) | ||
|
||
target_include_directories(bmengine | ||
PUBLIC "include" | ||
PUBLIC "include/private/3rd/" | ||
PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} | ||
) | ||
|
||
target_link_libraries( | ||
bmengine | ||
${CUDART_LIBRARY} | ||
${CUBLAS_LIBRARY} | ||
${CUBLASLT_LIBRARY} | ||
${CUBRAND_LIBRARY} | ||
${CULIBOS_LIBRARY} | ||
${CULIBOS_LIBRARY} | ||
${NCCL_LIBRARIES} | ||
"-Wl,-Bsymbolic -Wl,-Bsymbolic-functions" | ||
"pthread" | ||
) | ||
|
||
include(GNUInstallDirs) | ||
install( | ||
TARGETS bmengine | ||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} | ||
) | ||
install( | ||
DIRECTORY "include/bmengine" DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} FILES_MATCHING PATTERN "*.h" PATTERN "*.cuh" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,147 @@ | ||
|
||
#include "bmengine/c10d/c10d.h" | ||
#include "bmengine/core/tensor.h" | ||
#include "bmengine/core/exception.h" | ||
|
||
namespace bmengine { | ||
namespace c10d { | ||
|
||
ncclDataType_t dtype2nccl(core::DataType dtype) { | ||
switch (dtype) { | ||
case core::DataType::kInt8: return ncclInt8; | ||
case core::DataType::kDouble: return ncclDouble; | ||
case core::DataType::kFloat: return ncclFloat; | ||
case core::DataType::kHalf: return ncclHalf; | ||
case core::DataType::kBFloat16: return ncclBfloat16; | ||
case core::DataType::kInt32: return ncclInt32; | ||
default: | ||
BM_ASSERT(false, "Unsupport dtype " + std::string(get_data_type_name(dtype))); | ||
return ncclNumTypes; | ||
} | ||
} | ||
|
||
void NCCLAllGather(const core::Context& ctx, const core::Tensor& sendbuff, core::Tensor& recvbuff) { | ||
BM_NCCL_ASSERT(ncclAllGather( | ||
sendbuff.data<void*>(), | ||
recvbuff.mutable_data<void*>(), | ||
sendbuff.numel(), | ||
dtype2nccl(sendbuff.dtype()), | ||
ctx.current_comm(), | ||
ctx.current_stream()->ptr)); | ||
} | ||
|
||
void NCCLCheckAsync(ncclComm_t comm) { | ||
auto state = ncclInProgress; | ||
do { | ||
ncclCommGetAsyncError(comm, &state); | ||
} while (state == ncclInProgress); | ||
BM_NCCL_ASSERT(state); | ||
} | ||
|
||
void NCCLAllReduce( | ||
const core::Context& ctx, | ||
const core::Tensor& sendbuff, | ||
core::Tensor& recvbuff, | ||
ncclRedOp_t op) { | ||
BM_NCCL_ASSERT(ncclAllReduce( | ||
sendbuff.data<void*>(), | ||
recvbuff.mutable_data<void*>(), | ||
sendbuff.numel(), | ||
dtype2nccl(sendbuff.dtype()), | ||
op, | ||
ctx.current_comm(), | ||
ctx.current_stream()->ptr)); | ||
// NCCLCheckAsync(ctx.current_comm()); | ||
} | ||
|
||
void NCCLBroadcast( | ||
const core::Context& ctx, const core::Tensor& sendbuff, core::Tensor& recvbuff, int root) { | ||
BM_NCCL_ASSERT(ncclBroadcast( | ||
sendbuff.data<void*>(), | ||
recvbuff.mutable_data<void*>(), | ||
sendbuff.numel(), | ||
dtype2nccl(sendbuff.dtype()), | ||
root, | ||
ctx.current_comm(), | ||
ctx.current_stream()->ptr)); | ||
} | ||
|
||
void NCCLReduce( | ||
const core::Context& ctx, | ||
const core::Tensor& sendbuff, | ||
core::Tensor& recvbuff, | ||
ncclRedOp_t op, | ||
int root) { | ||
BM_NCCL_ASSERT(ncclReduce( | ||
sendbuff.data<void*>(), | ||
recvbuff.mutable_data<void*>(), | ||
sendbuff.numel(), | ||
dtype2nccl(sendbuff.dtype()), | ||
op, | ||
root, | ||
ctx.current_comm(), | ||
ctx.current_stream()->ptr)); | ||
} | ||
|
||
void NCCLReduceScatter( | ||
const core::Context& ctx, | ||
const core::Tensor& sendbuff, | ||
core::Tensor& recvbuff, | ||
ncclRedOp_t op) { | ||
BM_NCCL_ASSERT(ncclReduceScatter( | ||
sendbuff.data<void*>(), | ||
recvbuff.mutable_data<void*>(), | ||
recvbuff.numel(), | ||
dtype2nccl(sendbuff.dtype()), | ||
op, | ||
ctx.current_comm(), | ||
ctx.current_stream()->ptr)); | ||
} | ||
|
||
void NCCLSend(const core::Context& ctx, const core::Tensor& sendbuff, int peer) { | ||
BM_NCCL_ASSERT(ncclSend( | ||
sendbuff.data<void*>(), | ||
sendbuff.numel(), | ||
dtype2nccl(sendbuff.dtype()), | ||
peer, | ||
ctx.current_comm(), | ||
ctx.current_stream()->ptr)); | ||
} | ||
|
||
void NCCLRecv(const core::Context& ctx, core::Tensor& recvbuff, int peer) { | ||
BM_NCCL_ASSERT(ncclRecv( | ||
recvbuff.mutable_data<void*>(), | ||
recvbuff.numel(), | ||
dtype2nccl(recvbuff.dtype()), | ||
peer, | ||
ctx.current_comm(), | ||
ctx.current_stream()->ptr)); | ||
} | ||
|
||
void NCCLGroupStart() { | ||
BM_NCCL_ASSERT(ncclGroupStart()); | ||
} | ||
|
||
void NCCLGroupEnd() { | ||
BM_NCCL_ASSERT(ncclGroupEnd()); | ||
} | ||
void NCCLGroupEndCheck(ncclComm_t comm) { | ||
ncclResult_t ret = ncclGroupEnd(); | ||
if (ret == ncclInProgress) { | ||
NCCLCheckAsync(comm); | ||
} else { | ||
BM_NCCL_ASSERT(ret); | ||
} | ||
} | ||
int NCCLCommCount(const core::Context& ctx) { | ||
int res; | ||
BM_NCCL_ASSERT(ncclCommCount(ctx.current_comm(), &res)); | ||
return res; | ||
} | ||
int NCCLCommUserRank(const core::Context& ctx) { | ||
int rank; | ||
BM_NCCL_ASSERT(ncclCommUserRank(ctx.current_comm(), &rank)); | ||
return rank; | ||
} | ||
} // namespace c10d | ||
} // namespace bmengine |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
# Find the nccl libraries | ||
# | ||
# The following variables are optionally searched for defaults | ||
# NCCL_ROOT: Base directory where all NCCL components are found | ||
# NCCL_INCLUDE_DIR: Directory where NCCL header is found | ||
# NCCL_LIB_DIR: Directory where NCCL library is found | ||
# | ||
# The following are set after configuration is done: | ||
# NCCL_FOUND | ||
# NCCL_INCLUDE_DIRS | ||
# NCCL_LIBRARIES | ||
# | ||
# The path hints include CUDA_TOOLKIT_ROOT_DIR seeing as some folks | ||
# install NCCL in the same location as the CUDA toolkit. | ||
# See https://github.com/caffe2/caffe2/issues/1601 | ||
|
||
set(NCCL_INCLUDE_DIR $ENV{NCCL_INCLUDE_DIR} CACHE PATH "Folder contains NVIDIA NCCL headers") | ||
set(NCCL_LIB_DIR $ENV{NCCL_LIB_DIR} CACHE PATH "Folder contains NVIDIA NCCL libraries") | ||
set(NCCL_VERSION $ENV{NCCL_VERSION} CACHE STRING "Version of NCCL to build with") | ||
|
||
if ($ENV{NCCL_ROOT_DIR}) | ||
message(WARNING "NCCL_ROOT_DIR is deprecated. Please set NCCL_ROOT instead.") | ||
endif() | ||
list(APPEND NCCL_ROOT $ENV{NCCL_ROOT_DIR} ${CUDA_TOOLKIT_ROOT_DIR}) | ||
# Compatible layer for CMake <3.12. NCCL_ROOT will be accounted in for searching paths and libraries for CMake >=3.12. | ||
list(APPEND CMAKE_PREFIX_PATH ${NCCL_ROOT}) | ||
|
||
#set(NCCL_INCLUDE_DIR /usr/local/cuda/include) | ||
#set(NCCL_LIB_DIR /usr/local/cuda/lib64) | ||
|
||
find_path(NCCL_INCLUDE_DIRS | ||
NAMES nccl.h | ||
HINTS ${NCCL_INCLUDE_DIR}) | ||
|
||
if (USE_STATIC_NCCL) | ||
MESSAGE(STATUS "USE_STATIC_NCCL is set. Linking with static NCCL library.") | ||
SET(NCCL_LIBNAME "nccl_static") | ||
if (NCCL_VERSION) # Prefer the versioned library if a specific NCCL version is specified | ||
set(CMAKE_FIND_LIBRARY_SUFFIXES ".a.${NCCL_VERSION}" ${CMAKE_FIND_LIBRARY_SUFFIXES}) | ||
endif() | ||
else() | ||
SET(NCCL_LIBNAME "nccl") | ||
if (NCCL_VERSION) # Prefer the versioned library if a specific NCCL version is specified | ||
set(CMAKE_FIND_LIBRARY_SUFFIXES ".so.${NCCL_VERSION}" ${CMAKE_FIND_LIBRARY_SUFFIXES}) | ||
endif() | ||
endif() | ||
|
||
find_library(NCCL_LIBRARIES | ||
NAMES ${NCCL_LIBNAME} | ||
HINTS ${NCCL_LIB_DIR}) | ||
|
||
include(FindPackageHandleStandardArgs) | ||
find_package_handle_standard_args(NCCL DEFAULT_MSG NCCL_INCLUDE_DIRS NCCL_LIBRARIES) | ||
|
||
if(NCCL_FOUND) # obtaining NCCL version and some sanity checks | ||
set (NCCL_HEADER_FILE "${NCCL_INCLUDE_DIRS}/nccl.h") | ||
message (STATUS "Determining NCCL version from ${NCCL_HEADER_FILE}...") | ||
set (OLD_CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES}) | ||
list (APPEND CMAKE_REQUIRED_INCLUDES ${NCCL_INCLUDE_DIRS}) | ||
include(CheckCXXSymbolExists) | ||
check_cxx_symbol_exists(NCCL_VERSION_CODE nccl.h NCCL_VERSION_DEFINED) | ||
|
||
if (NCCL_VERSION_DEFINED) | ||
set(file "${PROJECT_BINARY_DIR}/detect_nccl_version.cu") | ||
file(WRITE ${file} " | ||
#include <iostream> | ||
#include <nccl.h> | ||
int main() | ||
{ | ||
std::cout << NCCL_MAJOR << '.' << NCCL_MINOR << '.' << NCCL_PATCH << std::endl; | ||
int x; | ||
ncclGetVersion(&x); | ||
return x == NCCL_VERSION_CODE; | ||
} | ||
") | ||
try_run(NCCL_VERSION_MATCHED compile_result ${PROJECT_BINARY_DIR} ${file} | ||
RUN_OUTPUT_VARIABLE NCCL_VERSION_FROM_HEADER | ||
CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${NCCL_INCLUDE_DIRS}" | ||
LINK_LIBRARIES ${NCCL_LIBRARIES}) | ||
if (NOT NCCL_VERSION_MATCHED) | ||
message(FATAL_ERROR "Found NCCL header version and library version do not match! \ | ||
(include: ${NCCL_INCLUDE_DIRS}, library: ${NCCL_LIBRARIES}) Please set NCCL_INCLUDE_DIR and NCCL_LIB_DIR manually.") | ||
endif() | ||
message(STATUS "NCCL version: ${NCCL_VERSION_FROM_HEADER}") | ||
else() | ||
message(STATUS "NCCL version < 2.3.5-5") | ||
endif () | ||
set (CMAKE_REQUIRED_INCLUDES ${OLD_CMAKE_REQUIRED_INCLUDES}) | ||
|
||
message(STATUS "Found NCCL (include: ${NCCL_INCLUDE_DIRS}, library: ${NCCL_LIBRARIES})") | ||
mark_as_advanced(NCCL_ROOT_DIR NCCL_INCLUDE_DIRS NCCL_LIBRARIES) | ||
endif() |
Oops, something went wrong.