Skip to content

Commit

Permalink
rm models module (PaddlePaddle#366)
Browse files Browse the repository at this point in the history
* rm models module
  • Loading branch information
Steffy-zxf authored May 12, 2021
1 parent f151caf commit fa6b699
Show file tree
Hide file tree
Showing 12 changed files with 193 additions and 182 deletions.
3 changes: 2 additions & 1 deletion examples/text_classification/rnn/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ rnn/
│   └── python
│   └── predict.py # python预测部署示例
├── export_model.py # 动态图参数导出静态图参数脚本
├── model.py # 模型组网脚本
├── predict.py # 模型预测
├── utils.py # 数据处理工具
├── train.py # 训练模型主程序入口,包括训练、评估
Expand Down Expand Up @@ -181,7 +182,7 @@ python train.py --vocab_path='./senta_word_dict.txt' \

* `vocab_path`: 词汇表文件路径。
* `device`: 选用什么设备进行训练,可选cpu、gpu或者xpu。如使用gpu训练则参数gpus指定GPU卡号。目前xpu只支持模型网络设置为lstm。
* `network`: 模型网络名称,默认为`bilstm_attn`, 可更换为bilstm, bigru, birnn,bow,lstm,rnn,gru,bilstm_attn,textcnn等
* `network`: 模型网络名称,默认为`bilstm`, 可更换为bilstmbigrubirnn,bow,lstm,rnn,gru,bilstm_attn,cnn等
* `lr`: 学习率, 默认为5e-5。
* `batch_size`: 运行一个batch大小,默认为64。
* `epochs`: 训练轮次,默认为10。
Expand Down
66 changes: 62 additions & 4 deletions examples/text_classification/rnn/export_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
# yapf: disable
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--vocab_path", type=str, default="./senta_word_dict.txt", help="The path to vocabulary.")
parser.add_argument('--network', choices=['bow', 'lstm', 'bilstm', 'gru', 'bigru', 'rnn', 'birnn', 'bilstm_attn', 'cnn', 'textcnn'],
parser.add_argument('--network', choices=['bow', 'lstm', 'bilstm', 'gru', 'bigru', 'rnn', 'birnn', 'bilstm_attn', 'cnn'],
default="bilstm", help="Select which network to train, defaults to bilstm.")
parser.add_argument('--device', choices=['cpu', 'gpu', 'xpu'], default="gpu", help="Select which device to train model, defaults to gpu.")
parser.add_argument("--params_path", type=str, default='./checkpoints/final.pdparams', help="The path of model parameter to be loaded.")
Expand All @@ -35,9 +35,67 @@ def main():
vocab = Vocab.load_vocabulary(args.vocab_path)
label_map = {0: 'negative', 1: 'positive'}

# Construct the newtork.
model = ppnlp.models.Senta(
network=args.network, vocab_size=len(vocab), num_classes=len(label_map))
# Constructs the newtork.
network = args.network.lower()
vocab_size = len(vocab)
num_classes = len(label_map)
pad_token_id = vocab.to_indices('[PAD]')
if network == 'bow':
model = BoWModel(vocab_size, num_classes, padding_idx=pad_token_id)
elif network == 'bigru':
model = GRUModel(
vocab_size,
num_classes,
direction='bidirect',
padding_idx=pad_token_id)
elif network == 'bilstm':
model = LSTMModel(
vocab_size,
num_classes,
direction='bidirect',
padding_idx=pad_token_id)
elif network == 'bilstm_attn':
lstm_hidden_size = 196
attention = SelfInteractiveAttention(hidden_size=2 * stm_hidden_size)
model = BiLSTMAttentionModel(
attention_layer=attention,
vocab_size=vocab_size,
lstm_hidden_size=lstm_hidden_size,
num_classes=num_classes,
padding_idx=pad_token_id)
elif network == 'birnn':
model = RNNModel(
vocab_size,
num_classes,
direction='bidirect',
padding_idx=pad_token_id)
elif network == 'cnn':
model = CNNModel(vocab_size, num_classes, padding_idx=pad_token_id)
elif network == 'gru':
model = GRUModel(
vocab_size,
num_classes,
direction='forward',
padding_idx=pad_token_id,
pooling_type='max')
elif network == 'lstm':
model = LSTMModel(
vocab_size,
num_classes,
direction='forward',
padding_idx=pad_token_id,
pooling_type='max')
elif network == 'rnn':
model = RNNModel(
vocab_size,
num_classes,
direction='forward',
padding_idx=pad_token_id,
pooling_type='max')
else:
raise ValueError(
"Unknown network: %s, it must be one of bow, lstm, bilstm, cnn, gru, bigru, rnn, birnn and bilstm_attn."
% network)

# Load model parameters.
state_dict = paddle.load(args.params_path)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,90 +21,6 @@
INF = 1. * 1e12


class Senta(nn.Layer):
def __init__(self,
network,
vocab_size,
num_classes,
emb_dim=128,
pad_token_id=0):
super().__init__()

network = network.lower()
if network == 'bow':
self.model = BoWModel(
vocab_size, num_classes, emb_dim, padding_idx=pad_token_id)
elif network == 'bigru':
self.model = GRUModel(
vocab_size,
num_classes,
emb_dim,
direction='bidirect',
padding_idx=pad_token_id)
elif network == 'bilstm':
self.model = LSTMModel(
vocab_size,
num_classes,
emb_dim,
direction='bidirect',
padding_idx=pad_token_id)
elif network == 'bilstm_attn':
lstm_hidden_size = 196
attention = SelfInteractiveAttention(hidden_size=2 *
lstm_hidden_size)
self.model = BiLSTMAttentionModel(
attention_layer=attention,
vocab_size=vocab_size,
lstm_hidden_size=lstm_hidden_size,
num_classes=num_classes,
padding_idx=pad_token_id)
elif network == 'birnn':
self.model = RNNModel(
vocab_size,
num_classes,
emb_dim,
direction='bidirect',
padding_idx=pad_token_id)
elif network == 'cnn':
self.model = CNNModel(
vocab_size, num_classes, emb_dim, padding_idx=pad_token_id)
elif network == 'gru':
self.model = GRUModel(
vocab_size,
num_classes,
emb_dim,
direction='forward',
padding_idx=pad_token_id,
pooling_type='max')
elif network == 'lstm':
self.model = LSTMModel(
vocab_size,
num_classes,
emb_dim,
direction='forward',
padding_idx=pad_token_id,
pooling_type='max')
elif network == 'rnn':
self.model = RNNModel(
vocab_size,
num_classes,
emb_dim,
direction='forward',
padding_idx=pad_token_id,
pooling_type='max')
elif network == 'textcnn':
self.model = TextCNNModel(
vocab_size, num_classes, emb_dim, padding_idx=pad_token_id)
else:
raise ValueError(
"Unknown network: %s, it must be one of bow, lstm, bilstm, cnn, gru, bigru, rnn, birnn, bilstm_attn and textcnn."
% network)

def forward(self, text, seq_len=None):
logits = self.model(text, seq_len)
return logits


class BoWModel(nn.Layer):
"""
This class implements the Bag of Words Classification Network model to classify texts.
Expand Down
65 changes: 62 additions & 3 deletions examples/text_classification/rnn/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,15 @@
import paddlenlp as ppnlp
from paddlenlp.data import JiebaTokenizer, Stack, Tuple, Pad, Vocab

from model import BoWModel, BiLSTMAttentionModel, CNNModel, LSTMModel, GRUModel, RNNModel, SelfInteractiveAttention
from utils import preprocess_prediction_data

# yapf: disable
parser = argparse.ArgumentParser(__doc__)
parser.add_argument('--device', choices=['cpu', 'gpu', 'xpu'], default="gpu", help="Select which device to train model, defaults to gpu.")
parser.add_argument("--batch_size", type=int, default=1, help="Total examples' number of a batch for training.")
parser.add_argument("--vocab_path", type=str, default="./senta_word_dict.txt", help="The path to vocabulary.")
parser.add_argument('--network', choices=['bow', 'lstm', 'bilstm', 'gru', 'bigru', 'rnn', 'birnn', 'bilstm_attn', 'cnn', 'textcnn'],
parser.add_argument('--network', choices=['bow', 'lstm', 'bilstm', 'gru', 'bigru', 'rnn', 'birnn', 'bilstm_attn', 'cnn'],
default="bilstm", help="Select which network to train, defaults to bilstm.")
parser.add_argument("--params_path", type=str, default='./checkpoints/final.pdparams', help="The path of model parameter to be loaded.")
args = parser.parse_args()
Expand Down Expand Up @@ -81,8 +82,66 @@ def predict(model, data, label_map, batch_size=1, pad_token_id=0):
label_map = {0: 'negative', 1: 'positive'}

# Constructs the newtork.
model = ppnlp.models.Senta(
network=args.network, vocab_size=len(vocab), num_classes=len(label_map))
network = args.network.lower()
vocab_size = len(vocab)
num_classes = len(label_map)
pad_token_id = vocab.to_indices('[PAD]')
if network == 'bow':
model = BoWModel(vocab_size, num_classes, padding_idx=pad_token_id)
elif network == 'bigru':
model = GRUModel(
vocab_size,
num_classes,
direction='bidirect',
padding_idx=pad_token_id)
elif network == 'bilstm':
model = LSTMModel(
vocab_size,
num_classes,
direction='bidirect',
padding_idx=pad_token_id)
elif network == 'bilstm_attn':
lstm_hidden_size = 196
attention = SelfInteractiveAttention(hidden_size=2 * stm_hidden_size)
model = BiLSTMAttentionModel(
attention_layer=attention,
vocab_size=vocab_size,
lstm_hidden_size=lstm_hidden_size,
num_classes=num_classes,
padding_idx=pad_token_id)
elif network == 'birnn':
model = RNNModel(
vocab_size,
num_classes,
direction='bidirect',
padding_idx=pad_token_id)
elif network == 'cnn':
model = CNNModel(vocab_size, num_classes, padding_idx=pad_token_id)
elif network == 'gru':
model = GRUModel(
vocab_size,
num_classes,
direction='forward',
padding_idx=pad_token_id,
pooling_type='max')
elif network == 'lstm':
model = LSTMModel(
vocab_size,
num_classes,
direction='forward',
padding_idx=pad_token_id,
pooling_type='max')
elif network == 'rnn':
model = RNNModel(
vocab_size,
num_classes,
direction='forward',
padding_idx=pad_token_id,
pooling_type='max')
else:
raise ValueError(
"Unknown network: %s, it must be one of bow, lstm, bilstm, cnn, gru, bigru, rnn, birnn and bilstm_attn."
% network)

# Loads model parameters.
state_dict = paddle.load(args.params_path)
Expand Down
67 changes: 62 additions & 5 deletions examples/text_classification/rnn/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from paddlenlp.data import JiebaTokenizer, Pad, Stack, Tuple, Vocab
from paddlenlp.datasets import load_dataset

from model import BoWModel, BiLSTMAttentionModel, CNNModel, LSTMModel, GRUModel, RNNModel, SelfInteractiveAttention
from utils import convert_example

# yapf: disable
Expand All @@ -32,7 +33,7 @@
parser.add_argument("--save_dir", type=str, default='checkpoints/', help="Directory to save model checkpoint")
parser.add_argument("--batch_size", type=int, default=64, help="Total examples' number of a batch for training.")
parser.add_argument("--vocab_path", type=str, default="./senta_word_dict.txt", help="The directory to dataset.")
parser.add_argument('--network', choices=['bow', 'lstm', 'bilstm', 'gru', 'bigru', 'rnn', 'birnn', 'bilstm_attn', 'cnn', 'textcnn'],
parser.add_argument('--network', choices=['bow', 'lstm', 'bilstm', 'gru', 'bigru', 'rnn', 'birnn', 'bilstm_attn', 'cnn'],
default="bilstm", help="Select which network to train, defaults to bilstm.")
parser.add_argument("--init_from_ckpt", type=str, default=None, help="The path of checkpoint to be loaded.")
args = parser.parse_args()
Expand Down Expand Up @@ -97,10 +98,66 @@ def create_dataloader(dataset,
"chnsenticorp", splits=["train", "dev", "test"])

# Constructs the newtork.
model = ppnlp.models.Senta(
network=args.network,
vocab_size=len(vocab),
num_classes=len(train_ds.label_list))
network = args.network.lower()
vocab_size = len(vocab)
num_classes = len(train_ds.label_list)
pad_token_id = vocab.to_indices('[PAD]')
if network == 'bow':
model = BoWModel(vocab_size, num_classes, padding_idx=pad_token_id)
elif network == 'bigru':
model = GRUModel(
vocab_size,
num_classes,
direction='bidirect',
padding_idx=pad_token_id)
elif network == 'bilstm':
model = LSTMModel(
vocab_size,
num_classes,
direction='bidirect',
padding_idx=pad_token_id)
elif network == 'bilstm_attn':
lstm_hidden_size = 196
attention = SelfInteractiveAttention(hidden_size=2 * stm_hidden_size)
model = BiLSTMAttentionModel(
attention_layer=attention,
vocab_size=vocab_size,
lstm_hidden_size=lstm_hidden_size,
num_classes=num_classes,
padding_idx=pad_token_id)
elif network == 'birnn':
model = RNNModel(
vocab_size,
num_classes,
direction='bidirect',
padding_idx=pad_token_id)
elif network == 'cnn':
model = CNNModel(vocab_size, num_classes, padding_idx=pad_token_id)
elif network == 'gru':
model = GRUModel(
vocab_size,
num_classes,
direction='forward',
padding_idx=pad_token_id,
pooling_type='max')
elif network == 'lstm':
model = LSTMModel(
vocab_size,
num_classes,
direction='forward',
padding_idx=pad_token_id,
pooling_type='max')
elif network == 'rnn':
model = RNNModel(
vocab_size,
num_classes,
direction='forward',
padding_idx=pad_token_id,
pooling_type='max')
else:
raise ValueError(
"Unknown network: %s, it must be one of bow, lstm, bilstm, cnn, gru, bigru, rnn, birnn and bilstm_attn."
% network)
model = paddle.Model(model)

# Reads data and generates mini-batches.
Expand Down
1 change: 1 addition & 0 deletions examples/text_matching/simnet/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ SimNet框架在百度各产品上广泛应用,主要包括BOW、CNN、RNN、MM

```text
simnet/
├── model.py # 模型组网
├── predict.py # 模型预测
├── utils.py # 数据处理工具
├── train.py # 训练模型主程序入口,包括训练、评估
Expand Down
File renamed without changes.
3 changes: 2 additions & 1 deletion examples/text_matching/simnet/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import paddlenlp as ppnlp
from paddlenlp.data import JiebaTokenizer, Pad, Stack, Tuple, Vocab

from model import SimNet
from utils import preprocess_prediction_data

# yapf: disable
Expand Down Expand Up @@ -88,7 +89,7 @@ def predict(model, data, label_map, batch_size=1, pad_token_id=0):
label_map = {0: 'dissimilar', 1: 'similar'}

# Constructs the newtork.
model = ppnlp.models.SimNet(
model = SimNet(
network=args.network, vocab_size=len(vocab), num_classes=len(label_map))

# Loads model parameters.
Expand Down
Loading

0 comments on commit fa6b699

Please sign in to comment.