更新加载本地模型的文档

HIT-SCIR · Jun 27, 2023 · aaec1c3 · aaec1c3
1 parent b1f20c3
commit aaec1c3
Show file tree

Hide file tree

Showing 15 changed files with 149 additions and 81 deletions.
diff --git a/README.md b/README.md
@@ -63,7 +63,19 @@ LTP（Language Technology Platform） 提供了一系列中文自然语言处理
 ### [Python](python/interface/README.md)
 
 ```bash
-pip install -U ltp ltp-core ltp-extension -i https://pypi.org/simple # 安装 ltp
+# 方法 1： 使用清华源安装 LTP
+# 1. 安装 PyTorch 和 Transformers 依赖
+pip install -i https://pypi.tuna.tsinghua.edu.cn/simple torch transformers
+# 2. 安装 LTP 
+pip install -i https://pypi.tuna.tsinghua.edu.cn/simple ltp ltp-core ltp-extension
+
+# 方法 2： 先全局换源，再安装 LTP
+# 1. 全局换 TUNA 源
+pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
+# 2. 安装 PyTorch 和 Transformers 依赖
+pip install torch transformers
+# 3. 安装 LTP
+pip install ltp ltp-core ltp-extension
 ```
 
 **注：** 如果遇到任何错误，请尝试使用上述命令重新安装 ltp，如果依然报错，请在 Github issues 中反馈。
@@ -73,6 +85,8 @@ import torch
 from ltp import LTP
 
 ltp = LTP("LTP/small")  # 默认加载 Small 模型
+                        # 也可以传入模型的路径，ltp = LTP("/path/to/your/model")
+                        # /path/to/your/model 应当存在 config.json 和其他模型文件
 
 # 将模型移动到 GPU 上
 if torch.cuda.is_available():
@@ -124,20 +138,50 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
 
 ## 模型性能以及下载地址
 
-|                  深度学习模型                   |  分词   |  词性   | 命名实体  | 语义角色  | 依存句法  | 语义依存  | 速度(句/S) |
-| :---------------------------------------: | :---: | :---: | :---: | :---: | :---: | :---: | :-----: |
-|  [Base](https://huggingface.co/LTP/base)  | 98.7  | 98.5  | 95.4  | 80.6  | 89.5  | 75.2  |  39.12  |
-| [Base1](https://huggingface.co/LTP/base1) | 99.22 | 98.73 | 96.39 | 79.28 | 89.57 | 76.57 |  --.--  |
-| [Base2](https://huggingface.co/LTP/base2) | 99.18 | 98.69 | 95.97 | 79.49 | 90.19 | 76.62 |  --.--  |
-| [Small](https://huggingface.co/LTP/small) | 98.4  | 98.2  | 94.3  | 78.4  | 88.3  | 74.7  |  43.13  |
-|  [Tiny](https://huggingface.co/LTP/tiny)  | 96.8  | 97.1  | 91.6  | 70.9  | 83.8  | 70.1  |  53.22  |
+|                  深度学习模型                   |                    直链下载 ｜ 分词                    |  词性   | 命名实体  | 语义角色  | 依存句法  | 语义依存  | 速度(句/S) |
+|:-----------------------------------------:|:-----------------------------------------------:| :---: | :---: | :---: | :---: | :---: | :-----: |
+|  [🤗Base](https://huggingface.co/LTP/base)  |  [🗜Base](http://39.96.43.154/ltp/v4/base.tgz)  |  98.7    | 98.5  | 95.4  | 80.6  | 89.5  | 75.2  |  39.12  |
+| [🤗Base1](https://huggingface.co/LTP/base1) | [🗜Base1](http://39.96.43.154/ltp/v4/base1.tgz) |99.22    | 98.73 | 96.39 | 79.28 | 89.57 | 76.57 |  --.--  |
+| [🤗Base2](https://huggingface.co/LTP/base2) | [🗜Base2](http://39.96.43.154/ltp/v4/base2.tgz) |99.18    | 98.69 | 95.97 | 79.49 | 90.19 | 76.62 |  --.--  |
+| [🤗Small](https://huggingface.co/LTP/small) | [🗜Small](http://39.96.43.154/ltp/v4/small.tgz) | 98.4    | 98.2  | 94.3  | 78.4  | 88.3  | 74.7  |  43.13  |
+|  [🤗Tiny](https://huggingface.co/LTP/tiny)  |  [🗜Tiny](http://39.96.43.154/ltp/v4/tiny.tgz)  | 96.8    | 97.1  | 91.6  | 70.9  | 83.8  | 70.1  |  53.22  |
 
-|                    感知机算法                    |  分词   |  词性   | 命名实体  | 速度(句/s)  |             备注             |
-| :-----------------------------------------: | :---: | :---: | :---: | :------: | :------------------------: |
-| [Legacy](https://huggingface.co/LTP/legacy) | 97.93 | 98.41 | 94.28 | 21581.48 | [性能详情](rust/ltp/README.md) |
+|                    感知机算法                    |                       直链下载                        |  分词  |  词性   | 命名实体  | 速度(句/s)  |             备注             |
+| :-----------------------------------------: |:-------------------------------------------------:|:----------:| :---: | :---: | :------: | :------------------------: |
+| [🤗Legacy](https://huggingface.co/LTP/legacy) | [🗜Legacy](http://39.96.43.154/ltp/v4/legacy.tgz) | 97.93   | 98.41 | 94.28 | 21581.48 | [性能详情](rust/ltp/README.md) |
 
 **注：感知机算法速度为开启16线程速度**
 
+### 如何下载对应的模型
+
+```bash
+# 使用 HTTP 链接下载
+# 确保已安装 git-lfs (https://git-lfs.com)
+git lfs install
+git clone https://huggingface.co/LTP/base
+
+# 使用 ssh 下载
+# 确保已安装 git-lfs (https://git-lfs.com)
+git lfs install
+git clone [email protected]:LTP/base
+
+# 下载压缩包
+wget http://39.96.43.154/ltp/v4/base.tgz
+tar -zxvf base.tgz -C base
+```
+
+### 如何使用下载的模型
+
+```python
+from ltp import LTP
+
+# 在路径中给出模型下载或解压后的路径
+# 例如：base 模型的文件夹路径为 "path/to/base"
+#      "path/to/base" 下应当存在 "config.json"
+ltp = LTP("path/to/base")
+```
+
+
 ## 构建 Wheel 包
 
 ```shell script

diff --git a/python/core/setup.py b/python/core/setup.py
@@ -48,6 +48,6 @@
     ],
     packages=find_packages(),
     include_dirs=["ltp_core"],
-    python_requires=">=3.6.*, <4",
+    python_requires=">=3.6, <4",
     zip_safe=True,
 )
diff --git a/python/extension/Cargo.toml b/python/extension/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "ltp-extension"
-version = "0.1.10"
+version = "0.1.11"
 edition = "2021"
 authors = ["ylfeng <[email protected]>"]
 description = "Rust Extension For Language Technology Platform(Python)."
@@ -17,11 +17,11 @@ crate-type = ["cdylib"]
 
 [dependencies]
 libc = { version = "0.2" }
-rayon = { version = "1.5" }
+rayon = { version = "1.7" }
 rayon-cond = { version = "0.2" }
 anyhow = { version = "1.0" }
 serde = { version = "1.0", features = ["derive"] }
-pyo3 = { version = "0.18", features = ["extension-module", "anyhow", "serde"] }
+pyo3 = { version = "0.19", features = ["extension-module", "anyhow", "serde"] }
 mimalloc = { version = "0.1", default-features = false, optional = true }
 
 [dependencies.ltp]

diff --git a/python/extension/pyproject.toml b/python/extension/pyproject.toml
@@ -1,3 +1,3 @@
 [build-system]
-requires = ["maturin>=0.13,<0.14"]
+requires = ["maturin>=1.0,<2.0"]
 build-backend = "maturin"
diff --git a/python/extension/src/hook.rs b/python/extension/src/hook.rs
@@ -2,7 +2,6 @@ use ltp::hook::Hook;
 use pyo3::prelude::*;
 
 #[pyclass(module = "ltp_extension.algorithms", name = "Hook", subclass)]
-#[pyo3(text_signature = "(self)")]
 #[derive(Clone, Debug)]
 pub struct PyHook {
     pub hook: Hook,
@@ -11,6 +10,7 @@ pub struct PyHook {
 #[pymethods]
 impl PyHook {
     #[new]
+    #[pyo3(text_signature = "(self)")]
     pub fn new() -> PyResult<Self> {
         Ok(Self { hook: Hook::new() })
     }

diff --git a/python/extension/src/perceptron/alg.rs b/python/extension/src/perceptron/alg.rs
@@ -9,7 +9,6 @@ use std::fmt::{Display, Formatter};
 /// AP: average perceptron, param is the threads
 /// PA: parallel average perceptron, param is c(margin)
 #[pyclass(module = "ltp_extension.perceptron", name = "Algorithm", subclass)]
-#[pyo3(text_signature = "(self, algorithm, param = None)")]
 #[derive(Clone, Serialize, Deserialize, Default, Debug, PartialEq)]
 pub struct PyAlgorithm {
     pub(crate) algorithm: Algorithm<f64>,
@@ -24,6 +23,7 @@ impl Display for PyAlgorithm {
 #[pymethods]
 impl PyAlgorithm {
     #[new]
+    #[pyo3(text_signature = "(self, algorithm, param = None)")]
     pub fn new(py: Python, algorithm: &str, param: Option<PyObject>) -> PyResult<Self> {
         let algorithm: Algorithm<f64> = match algorithm {
             "AP" => {

diff --git a/python/extension/src/perceptron/model.rs b/python/extension/src/perceptron/model.rs
@@ -9,7 +9,6 @@ use std::fmt::{Display, Formatter};
 #[allow(clippy::upper_case_acronyms)]
 #[derive(Clone, Copy, Debug, Deserialize, Serialize)]
 #[pyclass(module = "ltp_extension.perceptron", name = "ModelType")]
-#[pyo3(text_signature = "(self, model_type=None)")]
 pub enum ModelType {
     Auto,
     CWS,
@@ -20,6 +19,7 @@ pub enum ModelType {
 #[pymethods]
 impl ModelType {
     #[new]
+    #[pyo3(text_signature = "(self, model_type=None)")]
     pub fn new(model_type: Option<&str>) -> PyResult<Self> {
         Ok(match model_type {
             Some("cws") => ModelType::CWS,
@@ -70,7 +70,6 @@ impl Display for EnumModel {
 }
 
 #[pyclass(module = "ltp_extension.perceptron", name = "Model", subclass)]
-#[pyo3(text_signature = "(self, path, model_type=ModelType.Auto)")]
 #[derive(Clone, Serialize, Deserialize, Debug)]
 pub struct PyModel {
     pub model: EnumModel,
@@ -79,6 +78,7 @@ pub struct PyModel {
 #[pymethods]
 impl PyModel {
     #[new]
+    #[pyo3(text_signature = "(self, path, model_type=ModelType.Auto)")]
     pub fn new(path: &str, model_type: ModelType) -> PyResult<Self> {
         Self::load(path, model_type)
     }

diff --git a/python/extension/src/perceptron/specialization/cws.rs b/python/extension/src/perceptron/specialization/cws.rs
@@ -9,7 +9,6 @@ use serde::{Deserialize, Serialize};
 pub type Model = Perceptron<Definition>;
 
 #[pyclass(module = "ltp_extension.perceptron", name = "CWSModel", subclass)]
-#[pyo3(text_signature = "(self, path)")]
 #[derive(Clone, Serialize, Deserialize, Default, Debug)]
 pub struct PyCWSModel {
     pub model: Model,
@@ -49,6 +48,7 @@ pub enum CharacterType {
 #[pymethods]
 impl PyCWSModel {
     #[new]
+    #[pyo3(text_signature = "(self, path)")]
     pub fn new(path: &str) -> PyResult<Self> {
         Ok(Self::inner_load(path)?)
     }
@@ -198,7 +198,6 @@ impl PyCWSModel {
 }
 
 #[pyclass(module = "ltp_extension.perceptron", name = "CWSTrainer", subclass)]
-#[pyo3(text_signature = "(self)")]
 #[derive(Clone, Serialize, Deserialize, Default, Debug)]
 pub struct PyCWSTrainer {
     pub trainer: Trainer<Definition>,
@@ -207,6 +206,7 @@ pub struct PyCWSTrainer {
 #[pymethods]
 impl PyCWSTrainer {
     #[new]
+    #[pyo3(text_signature = "(self)")]
     pub fn new() -> PyResult<Self> {
         Ok(Self {
             trainer: Trainer::new(),

diff --git a/python/extension/src/perceptron/specialization/ner.rs b/python/extension/src/perceptron/specialization/ner.rs
@@ -9,7 +9,6 @@ use serde::{Deserialize, Serialize};
 pub type Model = Perceptron<Definition>;
 
 #[pyclass(module = "ltp_extension.perceptron", name = "NERModel", subclass)]
-#[pyo3(text_signature = "(self, path)")]
 #[derive(Clone, Serialize, Deserialize, Default, Debug)]
 pub struct PyNERModel {
     pub model: Model,
@@ -20,6 +19,7 @@ impl_model!(PyNERModel);
 #[pymethods]
 impl PyNERModel {
     #[new]
+    #[pyo3(text_signature = "(self, path)")]
     pub fn new(path: &str) -> PyResult<Self> {
         Ok(Self::inner_load(path)?)
     }
@@ -118,7 +118,6 @@ impl PyNERModel {
 }
 
 #[pyclass(module = "ltp_extension.perceptron", name = "NERTrainer", subclass)]
-#[pyo3(text_signature = "(self, labels)")]
 #[derive(Clone, Serialize, Deserialize, Default, Debug)]
 pub struct PyNERTrainer {
     pub trainer: Trainer<Definition>,
@@ -127,6 +126,7 @@ pub struct PyNERTrainer {
 #[pymethods]
 impl PyNERTrainer {
     #[new]
+    #[pyo3(text_signature = "(self, labels)")]
     pub fn new(labels: Vec<String>) -> PyResult<Self> {
         Ok(Self {
             trainer: Trainer::new_with_define(Definition::new(labels)),

diff --git a/python/extension/src/perceptron/specialization/pos.rs b/python/extension/src/perceptron/specialization/pos.rs
@@ -9,7 +9,6 @@ use serde::{Deserialize, Serialize};
 pub type Model = Perceptron<Definition>;
 
 #[pyclass(module = "ltp_extension.perceptron", name = "POSModel", subclass)]
-#[pyo3(text_signature = "(self, path)")]
 #[derive(Clone, Serialize, Deserialize, Default, Debug)]
 pub struct PyPOSModel {
     pub model: Model,
@@ -20,6 +19,7 @@ impl_model!(PyPOSModel);
 #[pymethods]
 impl PyPOSModel {
     #[new]
+    #[pyo3(text_signature = "(self, path)")]
     pub fn new(path: &str) -> PyResult<Self> {
         Ok(Self::inner_load(path)?)
     }
@@ -107,7 +107,6 @@ impl PyPOSModel {
 }
 
 #[pyclass(module = "ltp_extension.perceptron", name = "POSTrainer", subclass)]
-#[pyo3(text_signature = "(self, labels)")]
 #[derive(Clone, Serialize, Deserialize, Default, Debug)]
 pub struct PyPOSTrainer {
     pub trainer: Trainer<Definition>,
@@ -116,6 +115,7 @@ pub struct PyPOSTrainer {
 #[pymethods]
 impl PyPOSTrainer {
     #[new]
+    #[pyo3(text_signature = "(self, labels)")]
     pub fn new(labels: Vec<String>) -> PyResult<Self> {
         Ok(Self {
             trainer: Trainer::new_with_define(Definition::new(labels)),

diff --git a/python/extension/src/perceptron/trainer.rs b/python/extension/src/perceptron/trainer.rs
@@ -30,7 +30,6 @@ impl Display for EnumTrainer {
 }
 
 #[pyclass(module = "ltp_extension.perceptron", name = "Trainer", subclass)]
-#[pyo3(text_signature = "(self, model_type=ModelType.Auto, labels=None)")]
 #[derive(Clone, Serialize, Deserialize, Debug)]
 pub struct PyTrainer {
     pub trainer: EnumTrainer,
@@ -39,6 +38,7 @@ pub struct PyTrainer {
 #[pymethods]
 impl PyTrainer {
     #[new]
+    #[pyo3(text_signature = "(self, model_type=ModelType.Auto, labels=None)")]
     pub fn new(model_type: ModelType, labels: Option<Vec<String>>) -> PyResult<Self> {
         let trainer = match (model_type, labels) {
             (ModelType::CWS, _) => EnumTrainer::CWS(Default::default()),

diff --git a/python/extension/src/stnsplit.rs b/python/extension/src/stnsplit.rs
@@ -5,7 +5,6 @@ use rayon::prelude::*;
 use serde::{Deserialize, Serialize};
 
 #[pyclass(module = "ltp_extension.algorithms", name = "StnSplit", subclass)]
-#[pyo3(text_signature = "(self)")]
 #[derive(Clone, Serialize, Deserialize, Default, Debug, PartialEq, Eq)]
 pub struct StnSplit {
     pub options: SplitOptions,
@@ -14,6 +13,7 @@ pub struct StnSplit {
 #[pymethods]
 impl StnSplit {
     #[new]
+    #[pyo3(text_signature = "(self)")]
     pub fn new() -> PyResult<Self> {
         Ok(Self {
             options: SplitOptions {