Skip to content

Commit

Permalink
Fisrt commit
Browse files Browse the repository at this point in the history
  • Loading branch information
spetrel committed Dec 6, 2024
1 parent a750c49 commit f55c7ce
Show file tree
Hide file tree
Showing 390 changed files with 79,482 additions and 0 deletions.
12 changes: 12 additions & 0 deletions 3rd/bmengine/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
cmake_minimum_required(VERSION 3.10)
project(main VERSION 0.1)

enable_language(C)
enable_language(CXX)
enable_language(CUDA)

set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED True)

add_subdirectory(bmengine)
add_subdirectory(tests)
70 changes: 70 additions & 0 deletions 3rd/bmengine/bmengine/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
cmake_minimum_required(VERSION 3.10)
project(bmengine VERSION 0.1)

enable_language(C)
enable_language(CXX)
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES "80;89")
endif()
if(NOT APPLE)
enable_language(CUDA)
endif()

set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED True)
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/)


find_library(CUDART_LIBRARY cudart ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
find_library(CUBLAS_LIBRARY cublas ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
find_library(CUBLASLT_LIBRARY cublasLt ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
find_library(CUBRAND_LIBRARY curand_static ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
find_library(CULIBOS_LIBRARY culibos ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
find_package(NCCL REQUIRED)


file(GLOB_RECURSE FILES_BMENGINE "*.cpp")
file(GLOB_RECURSE FILES_BMENGINE_CUDA "*.cu")
file(GLOB_RECURSE FILES_BMENGINE_HEADER RELATIVE "include/bmengine" "*.h")

add_library(bmengine STATIC
${FILES_BMENGINE}
${FILES_BMENGINE_CUDA}
)
if(NCCL_FOUND)
include_directories(SYSTEM ${NCCL_INCLUDE_DIRS})
else()
message(WARNING "Not compiling with NCCL support. Suppress this warning with -DUSE_NCCL=OFF.")
set(USE_NCCL OFF)
endif()
set_property(TARGET bmengine PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET bmengine PROPERTY CMAKE_CXX_VISIBILITY_PRESET hidden)
set_property(TARGET bmengine PROPERTY CMAKE_CUDA_VISIBILITY_PRESET hidden)

target_include_directories(bmengine
PUBLIC "include"
PUBLIC "include/private/3rd/"
PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
)

target_link_libraries(
bmengine
${CUDART_LIBRARY}
${CUBLAS_LIBRARY}
${CUBLASLT_LIBRARY}
${CUBRAND_LIBRARY}
${CULIBOS_LIBRARY}
${CULIBOS_LIBRARY}
${NCCL_LIBRARIES}
"-Wl,-Bsymbolic -Wl,-Bsymbolic-functions"
"pthread"
)

include(GNUInstallDirs)
install(
TARGETS bmengine
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
)
install(
DIRECTORY "include/bmengine" DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} FILES_MATCHING PATTERN "*.h" PATTERN "*.cuh"
)
147 changes: 147 additions & 0 deletions 3rd/bmengine/bmengine/c10d/c10d.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@

#include "bmengine/c10d/c10d.h"
#include "bmengine/core/tensor.h"
#include "bmengine/core/exception.h"

namespace bmengine {
namespace c10d {

ncclDataType_t dtype2nccl(core::DataType dtype) {
switch (dtype) {
case core::DataType::kInt8: return ncclInt8;
case core::DataType::kDouble: return ncclDouble;
case core::DataType::kFloat: return ncclFloat;
case core::DataType::kHalf: return ncclHalf;
case core::DataType::kBFloat16: return ncclBfloat16;
case core::DataType::kInt32: return ncclInt32;
default:
BM_ASSERT(false, "Unsupport dtype " + std::string(get_data_type_name(dtype)));
return ncclNumTypes;
}
}

void NCCLAllGather(const core::Context& ctx, const core::Tensor& sendbuff, core::Tensor& recvbuff) {
BM_NCCL_ASSERT(ncclAllGather(
sendbuff.data<void*>(),
recvbuff.mutable_data<void*>(),
sendbuff.numel(),
dtype2nccl(sendbuff.dtype()),
ctx.current_comm(),
ctx.current_stream()->ptr));
}

void NCCLCheckAsync(ncclComm_t comm) {
auto state = ncclInProgress;
do {
ncclCommGetAsyncError(comm, &state);
} while (state == ncclInProgress);
BM_NCCL_ASSERT(state);
}

void NCCLAllReduce(
const core::Context& ctx,
const core::Tensor& sendbuff,
core::Tensor& recvbuff,
ncclRedOp_t op) {
BM_NCCL_ASSERT(ncclAllReduce(
sendbuff.data<void*>(),
recvbuff.mutable_data<void*>(),
sendbuff.numel(),
dtype2nccl(sendbuff.dtype()),
op,
ctx.current_comm(),
ctx.current_stream()->ptr));
// NCCLCheckAsync(ctx.current_comm());
}

void NCCLBroadcast(
const core::Context& ctx, const core::Tensor& sendbuff, core::Tensor& recvbuff, int root) {
BM_NCCL_ASSERT(ncclBroadcast(
sendbuff.data<void*>(),
recvbuff.mutable_data<void*>(),
sendbuff.numel(),
dtype2nccl(sendbuff.dtype()),
root,
ctx.current_comm(),
ctx.current_stream()->ptr));
}

void NCCLReduce(
const core::Context& ctx,
const core::Tensor& sendbuff,
core::Tensor& recvbuff,
ncclRedOp_t op,
int root) {
BM_NCCL_ASSERT(ncclReduce(
sendbuff.data<void*>(),
recvbuff.mutable_data<void*>(),
sendbuff.numel(),
dtype2nccl(sendbuff.dtype()),
op,
root,
ctx.current_comm(),
ctx.current_stream()->ptr));
}

void NCCLReduceScatter(
const core::Context& ctx,
const core::Tensor& sendbuff,
core::Tensor& recvbuff,
ncclRedOp_t op) {
BM_NCCL_ASSERT(ncclReduceScatter(
sendbuff.data<void*>(),
recvbuff.mutable_data<void*>(),
recvbuff.numel(),
dtype2nccl(sendbuff.dtype()),
op,
ctx.current_comm(),
ctx.current_stream()->ptr));
}

void NCCLSend(const core::Context& ctx, const core::Tensor& sendbuff, int peer) {
BM_NCCL_ASSERT(ncclSend(
sendbuff.data<void*>(),
sendbuff.numel(),
dtype2nccl(sendbuff.dtype()),
peer,
ctx.current_comm(),
ctx.current_stream()->ptr));
}

void NCCLRecv(const core::Context& ctx, core::Tensor& recvbuff, int peer) {
BM_NCCL_ASSERT(ncclRecv(
recvbuff.mutable_data<void*>(),
recvbuff.numel(),
dtype2nccl(recvbuff.dtype()),
peer,
ctx.current_comm(),
ctx.current_stream()->ptr));
}

void NCCLGroupStart() {
BM_NCCL_ASSERT(ncclGroupStart());
}

void NCCLGroupEnd() {
BM_NCCL_ASSERT(ncclGroupEnd());
}
void NCCLGroupEndCheck(ncclComm_t comm) {
ncclResult_t ret = ncclGroupEnd();
if (ret == ncclInProgress) {
NCCLCheckAsync(comm);
} else {
BM_NCCL_ASSERT(ret);
}
}
int NCCLCommCount(const core::Context& ctx) {
int res;
BM_NCCL_ASSERT(ncclCommCount(ctx.current_comm(), &res));
return res;
}
int NCCLCommUserRank(const core::Context& ctx) {
int rank;
BM_NCCL_ASSERT(ncclCommUserRank(ctx.current_comm(), &rank));
return rank;
}
} // namespace c10d
} // namespace bmengine
93 changes: 93 additions & 0 deletions 3rd/bmengine/bmengine/cmake/FindNCCL.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# Find the nccl libraries
#
# The following variables are optionally searched for defaults
# NCCL_ROOT: Base directory where all NCCL components are found
# NCCL_INCLUDE_DIR: Directory where NCCL header is found
# NCCL_LIB_DIR: Directory where NCCL library is found
#
# The following are set after configuration is done:
# NCCL_FOUND
# NCCL_INCLUDE_DIRS
# NCCL_LIBRARIES
#
# The path hints include CUDA_TOOLKIT_ROOT_DIR seeing as some folks
# install NCCL in the same location as the CUDA toolkit.
# See https://github.com/caffe2/caffe2/issues/1601

set(NCCL_INCLUDE_DIR $ENV{NCCL_INCLUDE_DIR} CACHE PATH "Folder contains NVIDIA NCCL headers")
set(NCCL_LIB_DIR $ENV{NCCL_LIB_DIR} CACHE PATH "Folder contains NVIDIA NCCL libraries")
set(NCCL_VERSION $ENV{NCCL_VERSION} CACHE STRING "Version of NCCL to build with")

if ($ENV{NCCL_ROOT_DIR})
message(WARNING "NCCL_ROOT_DIR is deprecated. Please set NCCL_ROOT instead.")
endif()
list(APPEND NCCL_ROOT $ENV{NCCL_ROOT_DIR} ${CUDA_TOOLKIT_ROOT_DIR})
# Compatible layer for CMake <3.12. NCCL_ROOT will be accounted in for searching paths and libraries for CMake >=3.12.
list(APPEND CMAKE_PREFIX_PATH ${NCCL_ROOT})

#set(NCCL_INCLUDE_DIR /usr/local/cuda/include)
#set(NCCL_LIB_DIR /usr/local/cuda/lib64)

find_path(NCCL_INCLUDE_DIRS
NAMES nccl.h
HINTS ${NCCL_INCLUDE_DIR})

if (USE_STATIC_NCCL)
MESSAGE(STATUS "USE_STATIC_NCCL is set. Linking with static NCCL library.")
SET(NCCL_LIBNAME "nccl_static")
if (NCCL_VERSION) # Prefer the versioned library if a specific NCCL version is specified
set(CMAKE_FIND_LIBRARY_SUFFIXES ".a.${NCCL_VERSION}" ${CMAKE_FIND_LIBRARY_SUFFIXES})
endif()
else()
SET(NCCL_LIBNAME "nccl")
if (NCCL_VERSION) # Prefer the versioned library if a specific NCCL version is specified
set(CMAKE_FIND_LIBRARY_SUFFIXES ".so.${NCCL_VERSION}" ${CMAKE_FIND_LIBRARY_SUFFIXES})
endif()
endif()

find_library(NCCL_LIBRARIES
NAMES ${NCCL_LIBNAME}
HINTS ${NCCL_LIB_DIR})

include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(NCCL DEFAULT_MSG NCCL_INCLUDE_DIRS NCCL_LIBRARIES)

if(NCCL_FOUND) # obtaining NCCL version and some sanity checks
set (NCCL_HEADER_FILE "${NCCL_INCLUDE_DIRS}/nccl.h")
message (STATUS "Determining NCCL version from ${NCCL_HEADER_FILE}...")
set (OLD_CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES})
list (APPEND CMAKE_REQUIRED_INCLUDES ${NCCL_INCLUDE_DIRS})
include(CheckCXXSymbolExists)
check_cxx_symbol_exists(NCCL_VERSION_CODE nccl.h NCCL_VERSION_DEFINED)

if (NCCL_VERSION_DEFINED)
set(file "${PROJECT_BINARY_DIR}/detect_nccl_version.cu")
file(WRITE ${file} "
#include <iostream>
#include <nccl.h>
int main()
{
std::cout << NCCL_MAJOR << '.' << NCCL_MINOR << '.' << NCCL_PATCH << std::endl;
int x;
ncclGetVersion(&x);
return x == NCCL_VERSION_CODE;
}
")
try_run(NCCL_VERSION_MATCHED compile_result ${PROJECT_BINARY_DIR} ${file}
RUN_OUTPUT_VARIABLE NCCL_VERSION_FROM_HEADER
CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${NCCL_INCLUDE_DIRS}"
LINK_LIBRARIES ${NCCL_LIBRARIES})
if (NOT NCCL_VERSION_MATCHED)
message(FATAL_ERROR "Found NCCL header version and library version do not match! \
(include: ${NCCL_INCLUDE_DIRS}, library: ${NCCL_LIBRARIES}) Please set NCCL_INCLUDE_DIR and NCCL_LIB_DIR manually.")
endif()
message(STATUS "NCCL version: ${NCCL_VERSION_FROM_HEADER}")
else()
message(STATUS "NCCL version < 2.3.5-5")
endif ()
set (CMAKE_REQUIRED_INCLUDES ${OLD_CMAKE_REQUIRED_INCLUDES})

message(STATUS "Found NCCL (include: ${NCCL_INCLUDE_DIRS}, library: ${NCCL_LIBRARIES})")
mark_as_advanced(NCCL_ROOT_DIR NCCL_INCLUDE_DIRS NCCL_LIBRARIES)
endif()
Loading

0 comments on commit f55c7ce

Please sign in to comment.