Skip to content

Commit

Permalink
Update For Version 2.0: add support for CUDA and VLM (#43)
Browse files Browse the repository at this point in the history
* release dashinfer 2.0 version

thirdparty: add cutlass.

python: spanattention build from source.

benchmark: add stop model in the end.
  • Loading branch information
kzjeef authored and laiwenzh committed Dec 16, 2024
1 parent 2e7ea7b commit a8b9f8e
Show file tree
Hide file tree
Showing 7,828 changed files with 2,035,079 additions and 9,804 deletions.
The diff you're trying to view is too large. We only load the first 3000 changed files.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "third_party/from_source/cutlass"]
path = third_party/from_source/cutlass
url = https://github.com/NVIDIA/cutlass.git
13 changes: 13 additions & 0 deletions .readthedocs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
version: "2"

build:
os: "ubuntu-22.04"
tools:
python: "3.10"

python:
install:
- requirements: docs/requirements.txt

sphinx:
configuration: docs/sphinx/conf.py
192 changes: 118 additions & 74 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ set(project_version_in_env $ENV{AS_RELEASE_VERSION})
# remove -rc1 like string in version name.

if ((NOT DEFINED project_version_in_env))
set(project_version_in_env "1.0.0")
set(project_version_in_env "2.0.0")
endif()

string(REGEX REPLACE "-rc[0-9]+" "" STRIPED_VERSION_STRING ${project_version_in_env})
Expand All @@ -13,6 +13,10 @@ message("Build AllSpark with version:${project_version_in_env}")

project(DashInfer LANGUAGES C CXX VERSION ${project_version_in_env})

include(GNUInstallDirs)
set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-${PROJECT_VERSION} CACHE STRING "Force modify install dir" FORCE)
message(STATUS "CMAKE_INSTALL_PREFIX:${CMAKE_INSTALL_PREFIX} CPACK_PACKAGE_DEVICE_NAME:${CPACK_PACKAGE_DEVICE_NAME}")

if (NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build,
options are: Debug/Release/RelWithDebInfo/MinSizeRel." FORCE)
Expand All @@ -25,10 +29,24 @@ message(STATUS "CMAKE_BUILD_TYPE:${CMAKE_BUILD_TYPE}")
set(ALLSPARK_CBLAS "MKL"
CACHE STRING "Blas library choice, MKL/BLIS/NONE")

set(RUNTIME_THREAD "OMP" CACHE STRING "cpu threading runtime")
set(RUNTIME_THREAD "OMP" CACHE STRING "cpu threading runtime") # VALUE : OMP/TBB
#option
option(ENABLE_CUDA "build with cuda support" ON)

set(CONFIG_ACCELERATOR_TYPE "CUDA" CACHE STRING "compute accelerator type(like CUDA), NONE means cpu")
set(CONFIG_HOST_CPU_TYPE "X86" CACHE STRING "host cpu type, like X86, ARMV9, etc.")

## CUDA Related option.
##
option(ENABLE_NV_STATIC_LIB "build with static lib of nvidia" OFF)
option(USE_SYSTEM_NV_LIB "use system nccl lib instead download binary." OFF)
option(ENABLE_CUDA_PINNED_WEIGHT_LOAD "enable cuda pinned memory for load weight" OFF)
option(ENABLE_SPAN_ATTENTION "enable build with span attention" ON)
option(ENABLE_MULTINUMA "enable multinuma, if on cpu multinuma service will be compiled" OFF)
# set var let cmake module can get this var.
set(USE_SYSTEM_NV_LIB ${USE_SYSTEM_NV_LIB})
set(ENABLE_NV_STATIC_LIB ${ENABLE_NV_STATIC_LIB})

## x86 related option.
option(ENABLE_AVX2 "enable avx2" ON)
option(ENABLE_AVX512 "enable avx512" ON)
Expand All @@ -39,12 +57,21 @@ option(ENABLE_ARM_V84_V9 "enable v8.4-a or higher version of Arm instructions, s

option(ENABLE_FP16 "build with fp16 mode" ON)
option(ENABLE_BF16 "build with bf16 mode" ON)
option(ENABLE_FP8 "build with fp8 mode" ON)
option(ENABLE_SPARSE "build with sparse mode" ON)
option(BUILD_PYTHON "build with python api" OFF)
option(BUILD_PACKAGE "build rpm or deb package" OFF)
option(ENABLE_CUSPARSELT "build with CUSPARSELT lib" OFF)
option(BUILD_UTEST "build with unit test" ON)
option(BUILD_EXAMPLE "build with examples" ON)
option(BUILD_PYTHON "build with python api" ON)
option(PACKAGE_RPM "package with rpm " ON)
option(MEM_CHECK "check memory" OFF)
option(LOCK_CHECK "check deadlock" OFF)
option(ENABLE_GLIBCXX11_ABI "build with cxx11 abi" ON)
option(ALWAYS_READ_LOAD_MODEL "load and parse model via every read" OFF)
option(ENABLE_GLIBCXX11_ABI "build with cxx11 abi" OFF)

# Enable JSON Mode by using LMFE lib
option(ENABLE_JSON_MODE "enable json mode support" ON)

#setting compiler flags
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_CXX_STANDARD ${CXX_STD})
Expand Down Expand Up @@ -74,12 +101,21 @@ if(ENABLE_GLIBCXX11_ABI)
add_definitions(-D_GLIBCXX_USE_CXX11_ABI=1)
message("ENABLE_GLIBCXX11_ABI ${ENABLE_GLIBCXX11_ABI}")
else()
message("cxx11 abi is off")
add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
endif()

if(CMAKE_BUILD_TYPE STREQUAL "Debug")
add_definitions(-DALLSPARK_DEBUG_MODE)
if(CMAKE_COMPILER_IS_GNUCXX)
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:--coverage>)
add_link_options(--coverage)
endif()
# add_definitions(-DENABLE_SPAN_DEBUG) # will print span attention log.
# add_definitions(-DCONFIG_MEM_DEBUG) # will print memory allocation log.
# add_definitions(-DCONFIG_OP_DEBUG) # will print op forward, init, also sync after op forward.
# add_definitions(-DCONFIG_LOCK_DEBUG) # will print mutex lock unlock
else()
add_definitions(-DNVTX_DISABLE)
endif()

include(cmake/CheckGit.cmake)
Expand All @@ -95,12 +131,13 @@ conan_basic_setup(TARGETS)
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
find_package(Protobuf REQUIRED)

# thirdparth
# thirdparty
set(INSTALL_LOCATION
${CMAKE_CURRENT_BINARY_DIR}/third_party/install/${CMAKE_BUILD_TYPE})

include(threading)
include(cpp-ipc)
include(smhasher)

if (ENABLE_ARMCL)
include(armcl)
Expand All @@ -110,6 +147,16 @@ include(AutoCCache)
include(dnnl)
include(cblas)

if (ENABLE_JSON_MODE)
list(APPEND ALLSPARK_DEFINITION "-DENABLE_JSON_MODE")
include(lmfe)
endif()

if(CONFIG_HOST_CPU_TYPE STREQUAL "X86")
include(intelgemm)
endif()


if(MEM_CHECK)
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:-fsanitize=address>)
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:-fno-omit-frame-pointer>)
Expand All @@ -121,89 +168,86 @@ if(LOCK_CHECK)
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:-fPIE>)
endif()

if (ENABLE_CUDA)
include(cuda)
list(APPEND ALLSPARK_DEFINITION "-DENABLE_CUDA")

if (ENABLE_CUSPARSELT)
set(ENABLE_CUSPARSELT ON)
list(APPEND ALLSPARK_DEFINITION "-DENABLE_CUSPARSELT")
endif()

if (ENABLE_SPARSE)
list(APPEND ALLSPARK_DEFINITION "-DENABLE_SPARSE")
endif()

if (ENABLE_FP8)
list(APPEND ALLSPARK_DEFINITION "-DENABLE_FP8")
endif()
endif()

if (ENABLE_MULTINUMA)
list(APPEND ALLSPARK_DEFINITION "-DENABLE_MULTINUMA")
endif()

if (ENABLE_FP16)
list(APPEND ALLSPARK_DEFINITION "-DENABLE_FP16")
endif()
if (ENABLE_BF16)
list(APPEND ALLSPARK_DEFINITION "-DENABLE_BF16")
endif()

set(AS_MODEL_PROTO_FILE ${CMAKE_CURRENT_SOURCE_DIR}/csrc/proto/allspark.proto)
if (ALWAYS_READ_LOAD_MODEL)
list(APPEND ALLSPARK_DEFINITION "-DALWAYS_READ_LOAD_MODEL")
endif()

add_subdirectory(csrc)

if (BUILD_UTEST)
enable_testing()
add_subdirectory(tests/cpp)
endif()

if (BUILD_PYTHON)
add_subdirectory(python)
endif()

set(FILE_PATH "/etc/os-release")
file(STRINGS ${FILE_PATH} ID_LINE REGEX "^ID=.*$")

if(ID_LINE)
string(REGEX REPLACE "^ID=(.*)$" "\\1" OS_NAME ${ID_LINE})
# ~S~G~S~^~\
message(STATUS "The OS ID is: ${OS_NAME}")

if (PACKAGE_RPM)
set(CPACK_SYSTEM_NAME "alios7")
if(CONFIG_HOST_CPU_TYPE STREQUAL "ARM")
set(CPACK_SYSTEM_ARCHITECTURE "aarch64")
else()
message(FATAL "No ID line found in file.")
endif()

string (REGEX MATCH "ubuntu" IS_UBUNTU ${OS_NAME})


if (BUILD_PACKAGE)
# config system arch
if(CONFIG_HOST_CPU_TYPE STREQUAL "ARM")
set(CPACK_SYSTEM_ARCHITECTURE "aarch64")
else()
set(CPACK_SYSTEM_ARCHITECTURE "x86_64")
endif()

set(CPACK_PACKAGE_DEVICE_NAME "cpu")
set(CPACK_PACKAGE_VENDOR "Alibaba")
set(CPACK_PACKAGE_NAME "DashInfer")
set(CPACK_PACKAGE_VERSION ${project_version_in_env})
set(CPACK_PACKAGE_VENDOR "Alibaba Tongyi")
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "DashInfer AllSpark is a LLM inference engine.")
set(CPACK_PACKAGE_VERSION_MAJOR ${PROJECT_VERSION_MAJOR})
set(CPACK_PACKAGE_VERSION_MINOR ${PROJECT_VERSION_MINOR})
set(CPACK_PACKAGE_VERSION_PATCH ${PROJECT_VERSION_PATCH})
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
set(CPACK_RESOURCE_FILE_README "${CMAKE_CURRENT_SOURCE_DIR}/README.md")
set(CPACK_PACKAGE_DEVICE_NAME "cpu")

if(IS_UBUNTU)
message("build deb package.")
SET(CPACK_GENERATOR "DEB")
set(CPACK_SYSTEM_NAME "ubuntu")
SET(CPACK_THREADS 16)
set(CPACK_SOURCE_GENERATOR "TGZ")
set(CPACK_SOURCE_IGNORE_FILES
/.git
/dist
/.*build.*
/build
/\\\\.DS_Store
)
SET(CPACK_ARCHIVE_COMPONENT_INSTALL ON)
set(CPACK_COMPONENTS_ALL libraries headers)
set(CPACK_PACKAGING_INSTALL_PREFIX "")
SET(CPACK_DEBIAN_PACKAGE_MAINTAINER "Alibaba Tongyi") #required
INCLUDE(CPack)
else()
# only support centos like rpm system.
# rpm related settings.
set(CPACK_GENERATOR "RPM")
set(CPACK_SYSTEM_NAME "centos")
set(CPACK_RPM_PACKAGE_LICENSE "Apache2")
set(CPACK_RPM_PACKAGE_GROUP "DashInfer")
set(CPACK_RPM_COMPRESSION_TYPE "gzip")
set(CPACK_RPM_PACKAGE_SUMMARY "DashInfer")
set(CPACK_PACKAGING_INSTALL_PREFIX "")
set(CPACK_RPM_PACKAGE_RELOCATABLE ON)
INCLUDE(CPack)
endif()

set(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION}.${CPACK_PACKAGE_DEVICE_NAME}.${CPACK_SYSTEM_NAME}.${CPACK_SYSTEM_ARCHITECTURE}")
set(CPACK_SYSTEM_ARCHITECTURE "x86_64")
endif()

if (ENABLE_CUDA)
if(ENABLE_NV_STATIC_LIB)
set(CPACK_PACKAGE_DEVICE_NAME "cuda-${CUDA_VERSION}-static")
else()
set(CPACK_PACKAGE_DEVICE_NAME "cuda-${CUDA_VERSION}-shared")
endif()
else()
set(CPACK_PACKAGE_DEVICE_NAME "cpu")
endif()

set(CPACK_PACKAGE_NAME "DashInfer")
set(CPACK_PACKAGE_VERSION ${project_version_in_env})
set(CPACK_PACKAGE_VENDOR "Alibaba Tongyi")
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "DashInfer AllSpark is a LLM inference engine.")
set(CPACK_PACKAGE_VERSION_MAJOR ${PROJECT_VERSION_MAJOR})
set(CPACK_PACKAGE_VERSION_MINOR ${PROJECT_VERSION_MINOR})
set(CPACK_PACKAGE_VERSION_PATCH ${PROJECT_VERSION_PATCH})
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
set(CPACK_RESOURCE_FILE_README "${CMAKE_CURRENT_SOURCE_DIR}/README.md")


set(CPACK_PACKAGING_INSTALL_PREFIX "")
set(CPACK_RPM_PACKAGE_RELOCATABLE ON)

set(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION}.${CPACK_PACKAGE_DEVICE_NAME}.${CPACK_SYSTEM_NAME}.${CPACK_SYSTEM_ARCHITECTURE}")
include(CPack)
endif()

#install
Expand Down
3 changes: 3 additions & 0 deletions HIE-DNN/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
*.swp
build
.vscode
Loading

0 comments on commit a8b9f8e

Please sign in to comment.