Skip to content

Commit

Permalink
添加python方式打包
Browse files Browse the repository at this point in the history
  • Loading branch information
wildkid1024 committed Jun 20, 2023
1 parent 5880519 commit b2bc3cd
Show file tree
Hide file tree
Showing 10 changed files with 173 additions and 12 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ token
/build-android/
/build-py/
/build/
/pyfastllm/build/
/pyfastllm/dist/
/.idea/
/.vscode/
/example/Win32Demo/bin/*.*
Expand Down
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,15 @@ if (USE_CUDA)
add_compile_definitions(USE_CUDA)
set(FASTLLM_CUDA_SOURCES src/fastllm-cuda.cu src/devices/cuda/cudadevice.cpp)
set(FASTLLM_LINKED_LIBS ${FASTLLM_LINKED_LIBS} cublas)
set(CMAKE_CUDA_ARCHITECTURES "70")
# set(CMAKE_CUDA_ARCHITECTURES "70")
endif()

if (PY_API)
set(PYBIND third_party/pybind11)
add_subdirectory(${PYBIND})
add_compile_definitions(PY_API)

set(Python3_ROOT_DIR "/usr/local/python3.10.6/bin/")
# set(Python3_ROOT_DIR "/usr/local/python3.10.6/bin/")
find_package(Python3 REQUIRED)

include_directories(include third_party/pybind11/include)
Expand Down
8 changes: 0 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,6 @@ cmake -DCMAKE_TOOLCHAIN_FILE=$NDK/build/cmake/android.toolchain.cmake -DANDROID_
make -j4
```

### python-binding
```
mddir build-py
cd build-py
cmake .. -DUSE_CUDA=ON -DPY_API=ON
make -j4
python cli.py -p chatglm-6b-int8.bin -t 8 # 与cpp编译的运行结果保持一致
```

编译后会在build目录下生成:

Expand Down
4 changes: 2 additions & 2 deletions cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
import platform
import logging
import argparse
import pyfastllm
import pyfastllm # 或fastllm

logging.info(f"python gcc version:{platform.python_compiler()}")

sys.path.append('./build-py')

def args_parser():
parser = argparse.ArgumentParser(description='pyfastllm')
parser.add_argument('-m', '--model', type=int, required=False, default=0, help='模型类型,默认为0, 可以设置为0(chatglm),1(moss),2(vicuna)')
parser.add_argument('-m', '--model', type=int, required=False, default=0, help='模型类型,默认为0, 可以设置为0(chatglm),1(moss),2(vicuna),3(baichuan)')
parser.add_argument('-p', '--path', type=str, required=True, default='', help='模型文件的路径')
parser.add_argument('-t', '--threads', type=int, default=4, help='使用的线程数量')
parser.add_argument('-l', '--low', action='store_true', help='使用低内存模式')
Expand Down
File renamed without changes.
33 changes: 33 additions & 0 deletions pyfastllm/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# pyfastllm

本地编译安装fastllm的python接口,以两种方式编译运行:
1. cpp方式:编译为动态库,需放在python运行加载目录下
2. python方式:编译为wheel包,但暂不支持cuda

### cpp方式

手动编译:
```
mkdir build-py
cd build-py
cmake .. -DUSE_CUDA=ON -DPY_API=ON
make -j4
python cli.py -p chatglm-6b-int8.bin -t 8 # 与cpp编译的运行结果保持一致
```

脚本编译:

```
cd pyfastllm
python build_libs --cuda
python cli.py -p chatglm-6b-int8.bin -t 8
```

### python方式

```
cd pyfastllm
python setup.py build
python setup.py install
python cli.py -p chatglm-6b-int8.bin -t 8
```
42 changes: 42 additions & 0 deletions pyfastllm/build_libs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import os
import shutil
import platform
import sys
import argparse

parser = argparse.ArgumentParser(description='build fastllm libs')
parser.add_argument('--cuda', dest='cuda', action='store_true', default=False,
help='build with cuda support')

IS_WINDOWS = (platform.system() == 'Windows')
IS_DARWIN = (platform.system() == 'Darwin')
IS_LINUX = (platform.system() == 'Linux')

BUILD_DIR = 'build-py' # build path

def build_libs():
# create build dir
root_dir = os.path.dirname(os.getcwd())
cmake_build_dir = os.path.join(root_dir, BUILD_DIR)
if os.path.exists(cmake_build_dir):
shutil.rmtree(cmake_build_dir)
os.makedirs(cmake_build_dir)
os.chdir(cmake_build_dir)

# build it
args = parser.parse_args()
if IS_WINDOWS:
os.system('cmake -G "Ninja" -DPY_API=ON .. && ninja pyfastllm')
elif IS_LINUX:
extra_opts = ' -DPY_API=ON '
extra_opts += ' -DMNN_CUDA=ON ' if args.cuda else ' '
build_cmd = 'cmake ' + extra_opts + ' .. && make pyfastllm -j4'
print(build_cmd)
os.system('cmake ' + extra_opts + ' .. && make pyfastllm -j4')
else:
extra_opts = '-DPY_API=ON'
os.system('cmake ' + extra_opts + '.. && make pyfastllm -j4')


if __name__ == '__main__':
build_libs()
1 change: 1 addition & 0 deletions pyfastllm/fastllm/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from pyfastllm import *
83 changes: 83 additions & 0 deletions pyfastllm/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import glob
import os.path
from setuptools import setup, Extension
from setuptools import find_packages

import sys
import argparse
parser = argparse.ArgumentParser(description='build pyfastllm wheel')
parser.add_argument('--cuda', dest='cuda', action='store_true', default=False,
help='build with cuda support')
args, unknown = parser.parse_known_args()
sys.argv = [sys.argv[0]] + unknown

__VERSION__ = "0.0.1"

BASE_DIR = os.path.dirname(os.path.dirname(__file__))

ext_modules = []
try:
from pybind11.setup_helpers import Pybind11Extension, ParallelCompile, naive_recompile

# `N` is to set the bumer of threads
# `naive_recompile` makes it recompile only if the source file changes. It does not check header files!
ParallelCompile("NPY_NUM_BUILD_JOBS", needs_recompile=naive_recompile, default=4).install()

# could only be relative paths, otherwise the `build` command would fail if you use a MANIFEST.in to distribute your package
# only source files (.cpp, .c, .cc) are needed
#
source_files = glob.glob(os.path.join(BASE_DIR, "src/*.cpp"), recursive=False)
source_files.append(os.path.join(BASE_DIR, "src/devices/cpu/cpudevice.cpp"))
print(source_files)

extra_compile_args = ["-w", "-DPY_API"]
# If any libraries are used, e.g. libabc.so
include_dirs = [os.path.join(BASE_DIR, "include/")]
library_dirs = []
# (optional) if the library is not in the dir like `/usr/lib/`
# either to add its dir to `runtime_library_dirs` or to the env variable "LD_LIBRARY_PATH"
# MUST be absolute path
runtime_library_dirs = []
libraries = []

if args.cuda:
assert "Not Implement Yet!"
# source_files.append("src/devices/cpu/cpudevice.cpp", )
runtime_library_dirs.append("/usr/local/cuda/lib64/")
libraries.append("cublas")

ext_modules = [
Pybind11Extension(
"pyfastllm", # depends on the structure of your package
source_files,
define_macros=[('VERSION_INFO', __VERSION__)],
include_dirs=include_dirs,
library_dirs=library_dirs,
runtime_library_dirs=runtime_library_dirs,
libraries=libraries,
extra_compile_args=extra_compile_args,
cxx_std=17,
language='c++'
),
]
except Exception as e:
print(f"some errors happened: ")
print(e)
sys.exit(1)

cmdclass = {}

setup(
name='fastllm', # used by `pip install`
version='0.0.1',
description='python api for fastllm',
long_description='',
ext_modules=ext_modules,
packages = find_packages(), # the directory would be installed to site-packages
cmdclass=cmdclass,
setup_requires=["pybind11"],
install_requires=[""],
python_requires='>=3.6',
include_package_data=False,
zip_safe=False,
)
8 changes: 8 additions & 0 deletions src/pybinding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,14 @@ PYBIND11_MODULE(pyfastllm, m) {
.def("response", &fastllm::VicunaModel::Response)
.def("warmup", &fastllm::VicunaModel::WarmUp)
.def("save_lowbit_model", &fastllm::VicunaModel::SaveLowBitModel);

py::class_<fastllm::BaichuanModel>(m, "BaichuanModel")
.def(py::init<>())
.def("load_weights", &fastllm::BaichuanModel::LoadFromFile)
.def("response", &fastllm::BaichuanModel::Response)
.def("warmup", &fastllm::BaichuanModel::WarmUp)
.def("save_lowbit_model", &fastllm::BaichuanModel::SaveLowBitModel);


#ifdef VERSION_INFO
m.attr("__version__") = VERSION_INFO;
Expand Down

0 comments on commit b2bc3cd

Please sign in to comment.