Skip to content

Commit

Permalink
更新加载本地模型的文档
Browse files Browse the repository at this point in the history
  • Loading branch information
AlongWY committed Jun 27, 2023
1 parent b1f20c3 commit aaec1c3
Show file tree
Hide file tree
Showing 15 changed files with 149 additions and 81 deletions.
66 changes: 55 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,19 @@ LTP(Language Technology Platform) 提供了一系列中文自然语言处理
### [Python](python/interface/README.md)

```bash
pip install -U ltp ltp-core ltp-extension -i https://pypi.org/simple # 安装 ltp
# 方法 1: 使用清华源安装 LTP
# 1. 安装 PyTorch 和 Transformers 依赖
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple torch transformers
# 2. 安装 LTP
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple ltp ltp-core ltp-extension

# 方法 2: 先全局换源,再安装 LTP
# 1. 全局换 TUNA 源
pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
# 2. 安装 PyTorch 和 Transformers 依赖
pip install torch transformers
# 3. 安装 LTP
pip install ltp ltp-core ltp-extension
```

**注:** 如果遇到任何错误,请尝试使用上述命令重新安装 ltp,如果依然报错,请在 Github issues 中反馈。
Expand All @@ -73,6 +85,8 @@ import torch
from ltp import LTP

ltp = LTP("LTP/small") # 默认加载 Small 模型
# 也可以传入模型的路径,ltp = LTP("/path/to/your/model")
# /path/to/your/model 应当存在 config.json 和其他模型文件

# 将模型移动到 GPU 上
if torch.cuda.is_available():
Expand Down Expand Up @@ -124,20 +138,50 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {

## 模型性能以及下载地址

| 深度学习模型 | 分词 | 词性 | 命名实体 | 语义角色 | 依存句法 | 语义依存 | 速度(句/S) |
| :---------------------------------------: | :---: | :---: | :---: | :---: | :---: | :---: | :-----: |
| [Base](https://huggingface.co/LTP/base) | 98.7 | 98.5 | 95.4 | 80.6 | 89.5 | 75.2 | 39.12 |
| [Base1](https://huggingface.co/LTP/base1) | 99.22 | 98.73 | 96.39 | 79.28 | 89.57 | 76.57 | --.-- |
| [Base2](https://huggingface.co/LTP/base2) | 99.18 | 98.69 | 95.97 | 79.49 | 90.19 | 76.62 | --.-- |
| [Small](https://huggingface.co/LTP/small) | 98.4 | 98.2 | 94.3 | 78.4 | 88.3 | 74.7 | 43.13 |
| [Tiny](https://huggingface.co/LTP/tiny) | 96.8 | 97.1 | 91.6 | 70.9 | 83.8 | 70.1 | 53.22 |
| 深度学习模型 | 直链下载 | 分词 | 词性 | 命名实体 | 语义角色 | 依存句法 | 语义依存 | 速度(句/S) |
|:-----------------------------------------:|:-----------------------------------------------:| :---: | :---: | :---: | :---: | :---: | :-----: |
| [🤗Base](https://huggingface.co/LTP/base) | [🗜Base](http://39.96.43.154/ltp/v4/base.tgz) | 98.7 | 98.5 | 95.4 | 80.6 | 89.5 | 75.2 | 39.12 |
| [🤗Base1](https://huggingface.co/LTP/base1) | [🗜Base1](http://39.96.43.154/ltp/v4/base1.tgz) |99.22 | 98.73 | 96.39 | 79.28 | 89.57 | 76.57 | --.-- |
| [🤗Base2](https://huggingface.co/LTP/base2) | [🗜Base2](http://39.96.43.154/ltp/v4/base2.tgz) |99.18 | 98.69 | 95.97 | 79.49 | 90.19 | 76.62 | --.-- |
| [🤗Small](https://huggingface.co/LTP/small) | [🗜Small](http://39.96.43.154/ltp/v4/small.tgz) | 98.4 | 98.2 | 94.3 | 78.4 | 88.3 | 74.7 | 43.13 |
| [🤗Tiny](https://huggingface.co/LTP/tiny) | [🗜Tiny](http://39.96.43.154/ltp/v4/tiny.tgz) | 96.8 | 97.1 | 91.6 | 70.9 | 83.8 | 70.1 | 53.22 |

| 感知机算法 | 分词 | 词性 | 命名实体 | 速度(句/s) | 备注 |
| :-----------------------------------------: | :---: | :---: | :---: | :------: | :------------------------: |
| [Legacy](https://huggingface.co/LTP/legacy) | 97.93 | 98.41 | 94.28 | 21581.48 | [性能详情](rust/ltp/README.md) |
| 感知机算法 | 直链下载 | 分词 | 词性 | 命名实体 | 速度(句/s) | 备注 |
| :-----------------------------------------: |:-------------------------------------------------:|:----------:| :---: | :---: | :------: | :------------------------: |
| [🤗Legacy](https://huggingface.co/LTP/legacy) | [🗜Legacy](http://39.96.43.154/ltp/v4/legacy.tgz) | 97.93 | 98.41 | 94.28 | 21581.48 | [性能详情](rust/ltp/README.md) |

**注:感知机算法速度为开启16线程速度**

### 如何下载对应的模型

```bash
# 使用 HTTP 链接下载
# 确保已安装 git-lfs (https://git-lfs.com)
git lfs install
git clone https://huggingface.co/LTP/base

# 使用 ssh 下载
# 确保已安装 git-lfs (https://git-lfs.com)
git lfs install
git clone [email protected]:LTP/base

# 下载压缩包
wget http://39.96.43.154/ltp/v4/base.tgz
tar -zxvf base.tgz -C base
```

### 如何使用下载的模型

```python
from ltp import LTP

# 在路径中给出模型下载或解压后的路径
# 例如:base 模型的文件夹路径为 "path/to/base"
# "path/to/base" 下应当存在 "config.json"
ltp = LTP("path/to/base")
```


## 构建 Wheel 包

```shell script
Expand Down
2 changes: 1 addition & 1 deletion python/core/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,6 @@
],
packages=find_packages(),
include_dirs=["ltp_core"],
python_requires=">=3.6.*, <4",
python_requires=">=3.6, <4",
zip_safe=True,
)
6 changes: 3 additions & 3 deletions python/extension/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "ltp-extension"
version = "0.1.10"
version = "0.1.11"
edition = "2021"
authors = ["ylfeng <[email protected]>"]
description = "Rust Extension For Language Technology Platform(Python)."
Expand All @@ -17,11 +17,11 @@ crate-type = ["cdylib"]

[dependencies]
libc = { version = "0.2" }
rayon = { version = "1.5" }
rayon = { version = "1.7" }
rayon-cond = { version = "0.2" }
anyhow = { version = "1.0" }
serde = { version = "1.0", features = ["derive"] }
pyo3 = { version = "0.18", features = ["extension-module", "anyhow", "serde"] }
pyo3 = { version = "0.19", features = ["extension-module", "anyhow", "serde"] }
mimalloc = { version = "0.1", default-features = false, optional = true }

[dependencies.ltp]
Expand Down
2 changes: 1 addition & 1 deletion python/extension/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[build-system]
requires = ["maturin>=0.13,<0.14"]
requires = ["maturin>=1.0,<2.0"]
build-backend = "maturin"
2 changes: 1 addition & 1 deletion python/extension/src/hook.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ use ltp::hook::Hook;
use pyo3::prelude::*;

#[pyclass(module = "ltp_extension.algorithms", name = "Hook", subclass)]
#[pyo3(text_signature = "(self)")]
#[derive(Clone, Debug)]
pub struct PyHook {
pub hook: Hook,
Expand All @@ -11,6 +10,7 @@ pub struct PyHook {
#[pymethods]
impl PyHook {
#[new]
#[pyo3(text_signature = "(self)")]
pub fn new() -> PyResult<Self> {
Ok(Self { hook: Hook::new() })
}
Expand Down
2 changes: 1 addition & 1 deletion python/extension/src/perceptron/alg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ use std::fmt::{Display, Formatter};
/// AP: average perceptron, param is the threads
/// PA: parallel average perceptron, param is c(margin)
#[pyclass(module = "ltp_extension.perceptron", name = "Algorithm", subclass)]
#[pyo3(text_signature = "(self, algorithm, param = None)")]
#[derive(Clone, Serialize, Deserialize, Default, Debug, PartialEq)]
pub struct PyAlgorithm {
pub(crate) algorithm: Algorithm<f64>,
Expand All @@ -24,6 +23,7 @@ impl Display for PyAlgorithm {
#[pymethods]
impl PyAlgorithm {
#[new]
#[pyo3(text_signature = "(self, algorithm, param = None)")]
pub fn new(py: Python, algorithm: &str, param: Option<PyObject>) -> PyResult<Self> {
let algorithm: Algorithm<f64> = match algorithm {
"AP" => {
Expand Down
4 changes: 2 additions & 2 deletions python/extension/src/perceptron/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ use std::fmt::{Display, Formatter};
#[allow(clippy::upper_case_acronyms)]
#[derive(Clone, Copy, Debug, Deserialize, Serialize)]
#[pyclass(module = "ltp_extension.perceptron", name = "ModelType")]
#[pyo3(text_signature = "(self, model_type=None)")]
pub enum ModelType {
Auto,
CWS,
Expand All @@ -20,6 +19,7 @@ pub enum ModelType {
#[pymethods]
impl ModelType {
#[new]
#[pyo3(text_signature = "(self, model_type=None)")]
pub fn new(model_type: Option<&str>) -> PyResult<Self> {
Ok(match model_type {
Some("cws") => ModelType::CWS,
Expand Down Expand Up @@ -70,7 +70,6 @@ impl Display for EnumModel {
}

#[pyclass(module = "ltp_extension.perceptron", name = "Model", subclass)]
#[pyo3(text_signature = "(self, path, model_type=ModelType.Auto)")]
#[derive(Clone, Serialize, Deserialize, Debug)]
pub struct PyModel {
pub model: EnumModel,
Expand All @@ -79,6 +78,7 @@ pub struct PyModel {
#[pymethods]
impl PyModel {
#[new]
#[pyo3(text_signature = "(self, path, model_type=ModelType.Auto)")]
pub fn new(path: &str, model_type: ModelType) -> PyResult<Self> {
Self::load(path, model_type)
}
Expand Down
4 changes: 2 additions & 2 deletions python/extension/src/perceptron/specialization/cws.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ use serde::{Deserialize, Serialize};
pub type Model = Perceptron<Definition>;

#[pyclass(module = "ltp_extension.perceptron", name = "CWSModel", subclass)]
#[pyo3(text_signature = "(self, path)")]
#[derive(Clone, Serialize, Deserialize, Default, Debug)]
pub struct PyCWSModel {
pub model: Model,
Expand Down Expand Up @@ -49,6 +48,7 @@ pub enum CharacterType {
#[pymethods]
impl PyCWSModel {
#[new]
#[pyo3(text_signature = "(self, path)")]
pub fn new(path: &str) -> PyResult<Self> {
Ok(Self::inner_load(path)?)
}
Expand Down Expand Up @@ -198,7 +198,6 @@ impl PyCWSModel {
}

#[pyclass(module = "ltp_extension.perceptron", name = "CWSTrainer", subclass)]
#[pyo3(text_signature = "(self)")]
#[derive(Clone, Serialize, Deserialize, Default, Debug)]
pub struct PyCWSTrainer {
pub trainer: Trainer<Definition>,
Expand All @@ -207,6 +206,7 @@ pub struct PyCWSTrainer {
#[pymethods]
impl PyCWSTrainer {
#[new]
#[pyo3(text_signature = "(self)")]
pub fn new() -> PyResult<Self> {
Ok(Self {
trainer: Trainer::new(),
Expand Down
4 changes: 2 additions & 2 deletions python/extension/src/perceptron/specialization/ner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ use serde::{Deserialize, Serialize};
pub type Model = Perceptron<Definition>;

#[pyclass(module = "ltp_extension.perceptron", name = "NERModel", subclass)]
#[pyo3(text_signature = "(self, path)")]
#[derive(Clone, Serialize, Deserialize, Default, Debug)]
pub struct PyNERModel {
pub model: Model,
Expand All @@ -20,6 +19,7 @@ impl_model!(PyNERModel);
#[pymethods]
impl PyNERModel {
#[new]
#[pyo3(text_signature = "(self, path)")]
pub fn new(path: &str) -> PyResult<Self> {
Ok(Self::inner_load(path)?)
}
Expand Down Expand Up @@ -118,7 +118,6 @@ impl PyNERModel {
}

#[pyclass(module = "ltp_extension.perceptron", name = "NERTrainer", subclass)]
#[pyo3(text_signature = "(self, labels)")]
#[derive(Clone, Serialize, Deserialize, Default, Debug)]
pub struct PyNERTrainer {
pub trainer: Trainer<Definition>,
Expand All @@ -127,6 +126,7 @@ pub struct PyNERTrainer {
#[pymethods]
impl PyNERTrainer {
#[new]
#[pyo3(text_signature = "(self, labels)")]
pub fn new(labels: Vec<String>) -> PyResult<Self> {
Ok(Self {
trainer: Trainer::new_with_define(Definition::new(labels)),
Expand Down
4 changes: 2 additions & 2 deletions python/extension/src/perceptron/specialization/pos.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ use serde::{Deserialize, Serialize};
pub type Model = Perceptron<Definition>;

#[pyclass(module = "ltp_extension.perceptron", name = "POSModel", subclass)]
#[pyo3(text_signature = "(self, path)")]
#[derive(Clone, Serialize, Deserialize, Default, Debug)]
pub struct PyPOSModel {
pub model: Model,
Expand All @@ -20,6 +19,7 @@ impl_model!(PyPOSModel);
#[pymethods]
impl PyPOSModel {
#[new]
#[pyo3(text_signature = "(self, path)")]
pub fn new(path: &str) -> PyResult<Self> {
Ok(Self::inner_load(path)?)
}
Expand Down Expand Up @@ -107,7 +107,6 @@ impl PyPOSModel {
}

#[pyclass(module = "ltp_extension.perceptron", name = "POSTrainer", subclass)]
#[pyo3(text_signature = "(self, labels)")]
#[derive(Clone, Serialize, Deserialize, Default, Debug)]
pub struct PyPOSTrainer {
pub trainer: Trainer<Definition>,
Expand All @@ -116,6 +115,7 @@ pub struct PyPOSTrainer {
#[pymethods]
impl PyPOSTrainer {
#[new]
#[pyo3(text_signature = "(self, labels)")]
pub fn new(labels: Vec<String>) -> PyResult<Self> {
Ok(Self {
trainer: Trainer::new_with_define(Definition::new(labels)),
Expand Down
2 changes: 1 addition & 1 deletion python/extension/src/perceptron/trainer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ impl Display for EnumTrainer {
}

#[pyclass(module = "ltp_extension.perceptron", name = "Trainer", subclass)]
#[pyo3(text_signature = "(self, model_type=ModelType.Auto, labels=None)")]
#[derive(Clone, Serialize, Deserialize, Debug)]
pub struct PyTrainer {
pub trainer: EnumTrainer,
Expand All @@ -39,6 +38,7 @@ pub struct PyTrainer {
#[pymethods]
impl PyTrainer {
#[new]
#[pyo3(text_signature = "(self, model_type=ModelType.Auto, labels=None)")]
pub fn new(model_type: ModelType, labels: Option<Vec<String>>) -> PyResult<Self> {
let trainer = match (model_type, labels) {
(ModelType::CWS, _) => EnumTrainer::CWS(Default::default()),
Expand Down
2 changes: 1 addition & 1 deletion python/extension/src/stnsplit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ use rayon::prelude::*;
use serde::{Deserialize, Serialize};

#[pyclass(module = "ltp_extension.algorithms", name = "StnSplit", subclass)]
#[pyo3(text_signature = "(self)")]
#[derive(Clone, Serialize, Deserialize, Default, Debug, PartialEq, Eq)]
pub struct StnSplit {
pub options: SplitOptions,
Expand All @@ -14,6 +13,7 @@ pub struct StnSplit {
#[pymethods]
impl StnSplit {
#[new]
#[pyo3(text_signature = "(self)")]
pub fn new() -> PyResult<Self> {
Ok(Self {
options: SplitOptions {
Expand Down
Loading

0 comments on commit aaec1c3

Please sign in to comment.