添加python方式打包

felix-fei-fei · Jun 20, 2023 · b2bc3cd · b2bc3cd
1 parent 5880519
commit b2bc3cd
Show file tree

Hide file tree

Showing 10 changed files with 173 additions and 12 deletions.
diff --git a/.gitignore b/.gitignore
@@ -5,6 +5,8 @@ token
 /build-android/
 /build-py/
 /build/
+/pyfastllm/build/
+/pyfastllm/dist/
 /.idea/
 /.vscode/
 /example/Win32Demo/bin/*.*

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -30,15 +30,15 @@ if (USE_CUDA)
     add_compile_definitions(USE_CUDA)
     set(FASTLLM_CUDA_SOURCES src/fastllm-cuda.cu src/devices/cuda/cudadevice.cpp)
     set(FASTLLM_LINKED_LIBS ${FASTLLM_LINKED_LIBS} cublas)
-    set(CMAKE_CUDA_ARCHITECTURES "70")
+    # set(CMAKE_CUDA_ARCHITECTURES "70")
 endif()
 
 if (PY_API)
     set(PYBIND third_party/pybind11)
     add_subdirectory(${PYBIND})
     add_compile_definitions(PY_API)
 
-    set(Python3_ROOT_DIR "/usr/local/python3.10.6/bin/")
+    # set(Python3_ROOT_DIR "/usr/local/python3.10.6/bin/")
     find_package(Python3 REQUIRED)
 
     include_directories(include third_party/pybind11/include)

diff --git a/README.md b/README.md
@@ -68,14 +68,6 @@ cmake -DCMAKE_TOOLCHAIN_FILE=$NDK/build/cmake/android.toolchain.cmake -DANDROID_
 make -j4
 ```
 
-### python-binding
-```
-mddir build-py
-cd build-py
-cmake .. -DUSE_CUDA=ON -DPY_API=ON
-make -j4
-python cli.py -p chatglm-6b-int8.bin -t 8  # 与cpp编译的运行结果保持一致
-```
 
 编译后会在build目录下生成：
 

diff --git a/cli.py b/cli.py
@@ -3,15 +3,15 @@
 import platform
 import logging
 import argparse
-import pyfastllm
+import pyfastllm # 或fastllm
 
 logging.info(f"python gcc version:{platform.python_compiler()}")
 
 sys.path.append('./build-py')
 
 def args_parser():
     parser = argparse.ArgumentParser(description='pyfastllm')
-    parser.add_argument('-m', '--model', type=int, required=False, default=0, help='模型类型，默认为0, 可以设置为0(chatglm),1(moss),2(vicuna)')
+    parser.add_argument('-m', '--model', type=int, required=False, default=0, help='模型类型，默认为0, 可以设置为0(chatglm),1(moss),2(vicuna),3(baichuan)')
     parser.add_argument('-p', '--path', type=str, required=True, default='', help='模型文件的路径')
     parser.add_argument('-t', '--threads', type=int, default=4,  help='使用的线程数量')
     parser.add_argument('-l', '--low', action='store_true', help='使用低内存模式')

diff --git a/include/fastllm-cuda.h → include/fastllm-cuda.cuh b/include/fastllm-cuda.h → include/fastllm-cuda.cuh
diff --git a/pyfastllm/README.md b/pyfastllm/README.md
@@ -0,0 +1,33 @@
+# pyfastllm 
+
+本地编译安装fastllm的python接口，以两种方式编译运行：
+1. cpp方式：编译为动态库，需放在python运行加载目录下
+2. python方式：编译为wheel包，但暂不支持cuda
+
+### cpp方式
+
+手动编译：
+```
+mkdir build-py
+cd build-py
+cmake .. -DUSE_CUDA=ON -DPY_API=ON
+make -j4
+python cli.py -p chatglm-6b-int8.bin -t 8  # 与cpp编译的运行结果保持一致
+```
+
+脚本编译：
+
+```
+cd pyfastllm
+python build_libs --cuda
+python cli.py -p chatglm-6b-int8.bin -t 8 
+```
+
+### python方式
+
+```
+cd pyfastllm
+python setup.py build
+python setup.py install 
+python cli.py -p chatglm-6b-int8.bin -t 8 
+```
diff --git a/pyfastllm/build_libs.py b/pyfastllm/build_libs.py
@@ -0,0 +1,42 @@
+import os
+import shutil
+import platform
+import sys
+import argparse
+
+parser = argparse.ArgumentParser(description='build fastllm libs')
+parser.add_argument('--cuda', dest='cuda', action='store_true', default=False,
+                    help='build with cuda support')
+
+IS_WINDOWS = (platform.system() == 'Windows')
+IS_DARWIN = (platform.system() == 'Darwin')
+IS_LINUX = (platform.system() == 'Linux')
+
+BUILD_DIR = 'build-py' # build path
+
+def build_libs():
+    # create build dir
+    root_dir = os.path.dirname(os.getcwd())
+    cmake_build_dir = os.path.join(root_dir, BUILD_DIR)
+    if os.path.exists(cmake_build_dir):
+        shutil.rmtree(cmake_build_dir)
+    os.makedirs(cmake_build_dir)
+    os.chdir(cmake_build_dir)
+
+    # build it 
+    args = parser.parse_args()
+    if IS_WINDOWS:
+        os.system('cmake -G "Ninja" -DPY_API=ON .. && ninja pyfastllm')
+    elif IS_LINUX:
+        extra_opts = ' -DPY_API=ON '
+        extra_opts += ' -DMNN_CUDA=ON ' if args.cuda else ' '
+        build_cmd = 'cmake ' + extra_opts + ' .. && make pyfastllm -j4'
+        print(build_cmd)
+        os.system('cmake ' + extra_opts + ' .. && make pyfastllm -j4')
+    else:
+        extra_opts = '-DPY_API=ON'
+        os.system('cmake ' + extra_opts + '.. && make pyfastllm -j4')
+
+
+if __name__ == '__main__':
+    build_libs()
diff --git a/pyfastllm/fastllm/__init__.py b/pyfastllm/fastllm/__init__.py
@@ -0,0 +1 @@
+from pyfastllm import *
diff --git a/pyfastllm/setup.py b/pyfastllm/setup.py
@@ -0,0 +1,83 @@
+import glob
+import os.path
+from setuptools import setup, Extension
+from setuptools import find_packages
+
+import sys
+import argparse
+parser = argparse.ArgumentParser(description='build pyfastllm wheel')
+parser.add_argument('--cuda', dest='cuda', action='store_true', default=False,
+                    help='build with cuda support')
+args, unknown = parser.parse_known_args()
+sys.argv = [sys.argv[0]] + unknown
+
+__VERSION__ = "0.0.1"
+
+BASE_DIR = os.path.dirname(os.path.dirname(__file__))
+
+ext_modules = []
+try:
+    from pybind11.setup_helpers import Pybind11Extension, ParallelCompile, naive_recompile
+
+    # `N` is to set the bumer of threads
+    # `naive_recompile` makes it recompile only if the source file changes. It does not check header files!
+    ParallelCompile("NPY_NUM_BUILD_JOBS", needs_recompile=naive_recompile, default=4).install()
+
+    # could only be relative paths, otherwise the `build` command would fail if you use a MANIFEST.in to distribute your package
+    # only source files (.cpp, .c, .cc) are needed
+    # 
+    source_files = glob.glob(os.path.join(BASE_DIR, "src/*.cpp"), recursive=False)
+    source_files.append(os.path.join(BASE_DIR, "src/devices/cpu/cpudevice.cpp"))
+    print(source_files)
+
+    extra_compile_args = ["-w", "-DPY_API"]
+    # If any libraries are used, e.g. libabc.so
+    include_dirs = [os.path.join(BASE_DIR, "include/")]
+    library_dirs = []
+    # (optional) if the library is not in the dir like `/usr/lib/`
+    # either to add its dir to `runtime_library_dirs` or to the env variable "LD_LIBRARY_PATH"
+    # MUST be absolute path
+    runtime_library_dirs = []
+    libraries = []
+
+    if args.cuda:
+        assert "Not Implement Yet!"
+        # source_files.append("src/devices/cpu/cpudevice.cpp", )
+        runtime_library_dirs.append("/usr/local/cuda/lib64/")
+        libraries.append("cublas")
+
+    ext_modules = [
+        Pybind11Extension(
+            "pyfastllm", # depends on the structure of your package
+            source_files,
+            define_macros=[('VERSION_INFO', __VERSION__)],
+            include_dirs=include_dirs,
+            library_dirs=library_dirs,
+            runtime_library_dirs=runtime_library_dirs,
+            libraries=libraries,
+            extra_compile_args=extra_compile_args,
+            cxx_std=17,
+            language='c++'
+        ),
+    ]
+except Exception as e:
+    print(f"some errors happened: ")
+    print(e)
+    sys.exit(1)
+
+cmdclass = {}
+
+setup(
+    name='fastllm',  # used by `pip install`
+    version='0.0.1',
+    description='python api for fastllm',
+    long_description='',
+    ext_modules=ext_modules,
+    packages = find_packages(), # the directory would be installed to site-packages
+    cmdclass=cmdclass,
+    setup_requires=["pybind11"],
+    install_requires=[""],
+    python_requires='>=3.6',
+    include_package_data=False,
+    zip_safe=False,
+)
diff --git a/src/pybinding.cpp b/src/pybinding.cpp
@@ -43,6 +43,14 @@ PYBIND11_MODULE(pyfastllm, m) {
     .def("response", &fastllm::VicunaModel::Response)
     .def("warmup", &fastllm::VicunaModel::WarmUp)
     .def("save_lowbit_model", &fastllm::VicunaModel::SaveLowBitModel);
+
+  py::class_<fastllm::BaichuanModel>(m, "BaichuanModel")
+    .def(py::init<>())
+    .def("load_weights", &fastllm::BaichuanModel::LoadFromFile)
+    .def("response", &fastllm::BaichuanModel::Response)
+    .def("warmup", &fastllm::BaichuanModel::WarmUp)
+    .def("save_lowbit_model", &fastllm::BaichuanModel::SaveLowBitModel);
+
 
 #ifdef VERSION_INFO
     m.attr("__version__") = VERSION_INFO;