rm models module (PaddlePaddle#366)

* rm models module
RayShark · May 12, 2021 · fa6b699 · fa6b699
1 parent f151caf
commit fa6b699
Show file tree

Hide file tree

Showing 12 changed files with 193 additions and 182 deletions.
diff --git a/examples/text_classification/rnn/README.md b/examples/text_classification/rnn/README.md
@@ -109,6 +109,7 @@ rnn/
 │   └── python
 │       └── predict.py # python预测部署示例
 ├── export_model.py # 动态图参数导出静态图参数脚本
+├── model.py # 模型组网脚本
 ├── predict.py # 模型预测
 ├── utils.py # 数据处理工具
 ├── train.py # 训练模型主程序入口，包括训练、评估
@@ -181,7 +182,7 @@ python train.py --vocab_path='./senta_word_dict.txt' \
 
 * `vocab_path`: 词汇表文件路径。
 * `device`: 选用什么设备进行训练，可选cpu、gpu或者xpu。如使用gpu训练则参数gpus指定GPU卡号。目前xpu只支持模型网络设置为lstm。
-* `network`: 模型网络名称，默认为`bilstm_attn`， 可更换为bilstm, bigru, birnn，bow，lstm，rnn，gru，bilstm_attn，textcnn等。
+* `network`: 模型网络名称，默认为`bilstm`， 可更换为bilstm，bigru，birnn，bow，lstm，rnn，gru，bilstm_attn，cnn等。
 * `lr`: 学习率， 默认为5e-5。
 * `batch_size`: 运行一个batch大小，默认为64。
 * `epochs`: 训练轮次，默认为10。

diff --git a/examples/text_classification/rnn/export_model.py b/examples/text_classification/rnn/export_model.py
@@ -21,7 +21,7 @@
 # yapf: disable
 parser = argparse.ArgumentParser(__doc__)
 parser.add_argument("--vocab_path", type=str, default="./senta_word_dict.txt", help="The path to vocabulary.")
-parser.add_argument('--network', choices=['bow', 'lstm', 'bilstm', 'gru', 'bigru', 'rnn', 'birnn', 'bilstm_attn', 'cnn', 'textcnn'],
+parser.add_argument('--network', choices=['bow', 'lstm', 'bilstm', 'gru', 'bigru', 'rnn', 'birnn', 'bilstm_attn', 'cnn'],
     default="bilstm", help="Select which network to train, defaults to bilstm.")
 parser.add_argument('--device', choices=['cpu', 'gpu', 'xpu'], default="gpu", help="Select which device to train model, defaults to gpu.")
 parser.add_argument("--params_path", type=str, default='./checkpoints/final.pdparams', help="The path of model parameter to be loaded.")
@@ -35,9 +35,67 @@ def main():
     vocab = Vocab.load_vocabulary(args.vocab_path)
     label_map = {0: 'negative', 1: 'positive'}
 
-    # Construct the newtork.
-    model = ppnlp.models.Senta(
-        network=args.network, vocab_size=len(vocab), num_classes=len(label_map))
+    # Constructs the newtork.
+    network = args.network.lower()
+    vocab_size = len(vocab)
+    num_classes = len(label_map)
+    pad_token_id = vocab.to_indices('[PAD]')
+    if network == 'bow':
+        model = BoWModel(vocab_size, num_classes, padding_idx=pad_token_id)
+    elif network == 'bigru':
+        model = GRUModel(
+            vocab_size,
+            num_classes,
+            direction='bidirect',
+            padding_idx=pad_token_id)
+    elif network == 'bilstm':
+        model = LSTMModel(
+            vocab_size,
+            num_classes,
+            direction='bidirect',
+            padding_idx=pad_token_id)
+    elif network == 'bilstm_attn':
+        lstm_hidden_size = 196
+        attention = SelfInteractiveAttention(hidden_size=2 * stm_hidden_size)
+        model = BiLSTMAttentionModel(
+            attention_layer=attention,
+            vocab_size=vocab_size,
+            lstm_hidden_size=lstm_hidden_size,
+            num_classes=num_classes,
+            padding_idx=pad_token_id)
+    elif network == 'birnn':
+        model = RNNModel(
+            vocab_size,
+            num_classes,
+            direction='bidirect',
+            padding_idx=pad_token_id)
+    elif network == 'cnn':
+        model = CNNModel(vocab_size, num_classes, padding_idx=pad_token_id)
+    elif network == 'gru':
+        model = GRUModel(
+            vocab_size,
+            num_classes,
+            direction='forward',
+            padding_idx=pad_token_id,
+            pooling_type='max')
+    elif network == 'lstm':
+        model = LSTMModel(
+            vocab_size,
+            num_classes,
+            direction='forward',
+            padding_idx=pad_token_id,
+            pooling_type='max')
+    elif network == 'rnn':
+        model = RNNModel(
+            vocab_size,
+            num_classes,
+            direction='forward',
+            padding_idx=pad_token_id,
+            pooling_type='max')
+    else:
+        raise ValueError(
+            "Unknown network: %s, it must be one of bow, lstm, bilstm, cnn, gru, bigru, rnn, birnn and bilstm_attn."
+            % network)
 
     # Load model parameters.
     state_dict = paddle.load(args.params_path)

diff --git a/paddlenlp/models/senta.py → examples/text_classification/rnn/model.py b/paddlenlp/models/senta.py → examples/text_classification/rnn/model.py
@@ -21,90 +21,6 @@
 INF = 1. * 1e12
 
 
-class Senta(nn.Layer):
-    def __init__(self,
-                 network,
-                 vocab_size,
-                 num_classes,
-                 emb_dim=128,
-                 pad_token_id=0):
-        super().__init__()
-
-        network = network.lower()
-        if network == 'bow':
-            self.model = BoWModel(
-                vocab_size, num_classes, emb_dim, padding_idx=pad_token_id)
-        elif network == 'bigru':
-            self.model = GRUModel(
-                vocab_size,
-                num_classes,
-                emb_dim,
-                direction='bidirect',
-                padding_idx=pad_token_id)
-        elif network == 'bilstm':
-            self.model = LSTMModel(
-                vocab_size,
-                num_classes,
-                emb_dim,
-                direction='bidirect',
-                padding_idx=pad_token_id)
-        elif network == 'bilstm_attn':
-            lstm_hidden_size = 196
-            attention = SelfInteractiveAttention(hidden_size=2 *
-                                                 lstm_hidden_size)
-            self.model = BiLSTMAttentionModel(
-                attention_layer=attention,
-                vocab_size=vocab_size,
-                lstm_hidden_size=lstm_hidden_size,
-                num_classes=num_classes,
-                padding_idx=pad_token_id)
-        elif network == 'birnn':
-            self.model = RNNModel(
-                vocab_size,
-                num_classes,
-                emb_dim,
-                direction='bidirect',
-                padding_idx=pad_token_id)
-        elif network == 'cnn':
-            self.model = CNNModel(
-                vocab_size, num_classes, emb_dim, padding_idx=pad_token_id)
-        elif network == 'gru':
-            self.model = GRUModel(
-                vocab_size,
-                num_classes,
-                emb_dim,
-                direction='forward',
-                padding_idx=pad_token_id,
-                pooling_type='max')
-        elif network == 'lstm':
-            self.model = LSTMModel(
-                vocab_size,
-                num_classes,
-                emb_dim,
-                direction='forward',
-                padding_idx=pad_token_id,
-                pooling_type='max')
-        elif network == 'rnn':
-            self.model = RNNModel(
-                vocab_size,
-                num_classes,
-                emb_dim,
-                direction='forward',
-                padding_idx=pad_token_id,
-                pooling_type='max')
-        elif network == 'textcnn':
-            self.model = TextCNNModel(
-                vocab_size, num_classes, emb_dim, padding_idx=pad_token_id)
-        else:
-            raise ValueError(
-                "Unknown network: %s, it must be one of bow, lstm, bilstm, cnn, gru, bigru, rnn, birnn, bilstm_attn and textcnn."
-                % network)
-
-    def forward(self, text, seq_len=None):
-        logits = self.model(text, seq_len)
-        return logits
-
-
 class BoWModel(nn.Layer):
     """
     This class implements the Bag of Words Classification Network model to classify texts.

diff --git a/examples/text_classification/rnn/predict.py b/examples/text_classification/rnn/predict.py
@@ -18,14 +18,15 @@
 import paddlenlp as ppnlp
 from paddlenlp.data import JiebaTokenizer, Stack, Tuple, Pad, Vocab
 
+from model import BoWModel, BiLSTMAttentionModel, CNNModel, LSTMModel, GRUModel, RNNModel, SelfInteractiveAttention
 from utils import preprocess_prediction_data
 
 # yapf: disable
 parser = argparse.ArgumentParser(__doc__)
 parser.add_argument('--device', choices=['cpu', 'gpu', 'xpu'], default="gpu", help="Select which device to train model, defaults to gpu.")
 parser.add_argument("--batch_size", type=int, default=1, help="Total examples' number of a batch for training.")
 parser.add_argument("--vocab_path", type=str, default="./senta_word_dict.txt", help="The path to vocabulary.")
-parser.add_argument('--network', choices=['bow', 'lstm', 'bilstm', 'gru', 'bigru', 'rnn', 'birnn', 'bilstm_attn', 'cnn', 'textcnn'],
+parser.add_argument('--network', choices=['bow', 'lstm', 'bilstm', 'gru', 'bigru', 'rnn', 'birnn', 'bilstm_attn', 'cnn'],
     default="bilstm", help="Select which network to train, defaults to bilstm.")
 parser.add_argument("--params_path", type=str, default='./checkpoints/final.pdparams', help="The path of model parameter to be loaded.")
 args = parser.parse_args()
@@ -81,8 +82,66 @@ def predict(model, data, label_map, batch_size=1, pad_token_id=0):
     label_map = {0: 'negative', 1: 'positive'}
 
     # Constructs the newtork.
-    model = ppnlp.models.Senta(
-        network=args.network, vocab_size=len(vocab), num_classes=len(label_map))
+    network = args.network.lower()
+    vocab_size = len(vocab)
+    num_classes = len(label_map)
+    pad_token_id = vocab.to_indices('[PAD]')
+    if network == 'bow':
+        model = BoWModel(vocab_size, num_classes, padding_idx=pad_token_id)
+    elif network == 'bigru':
+        model = GRUModel(
+            vocab_size,
+            num_classes,
+            direction='bidirect',
+            padding_idx=pad_token_id)
+    elif network == 'bilstm':
+        model = LSTMModel(
+            vocab_size,
+            num_classes,
+            direction='bidirect',
+            padding_idx=pad_token_id)
+    elif network == 'bilstm_attn':
+        lstm_hidden_size = 196
+        attention = SelfInteractiveAttention(hidden_size=2 * stm_hidden_size)
+        model = BiLSTMAttentionModel(
+            attention_layer=attention,
+            vocab_size=vocab_size,
+            lstm_hidden_size=lstm_hidden_size,
+            num_classes=num_classes,
+            padding_idx=pad_token_id)
+    elif network == 'birnn':
+        model = RNNModel(
+            vocab_size,
+            num_classes,
+            direction='bidirect',
+            padding_idx=pad_token_id)
+    elif network == 'cnn':
+        model = CNNModel(vocab_size, num_classes, padding_idx=pad_token_id)
+    elif network == 'gru':
+        model = GRUModel(
+            vocab_size,
+            num_classes,
+            direction='forward',
+            padding_idx=pad_token_id,
+            pooling_type='max')
+    elif network == 'lstm':
+        model = LSTMModel(
+            vocab_size,
+            num_classes,
+            direction='forward',
+            padding_idx=pad_token_id,
+            pooling_type='max')
+    elif network == 'rnn':
+        model = RNNModel(
+            vocab_size,
+            num_classes,
+            direction='forward',
+            padding_idx=pad_token_id,
+            pooling_type='max')
+    else:
+        raise ValueError(
+            "Unknown network: %s, it must be one of bow, lstm, bilstm, cnn, gru, bigru, rnn, birnn and bilstm_attn."
+            % network)
 
     # Loads model parameters.
     state_dict = paddle.load(args.params_path)

diff --git a/examples/text_classification/rnn/train.py b/examples/text_classification/rnn/train.py
@@ -22,6 +22,7 @@
 from paddlenlp.data import JiebaTokenizer, Pad, Stack, Tuple, Vocab
 from paddlenlp.datasets import load_dataset
 
+from model import BoWModel, BiLSTMAttentionModel, CNNModel, LSTMModel, GRUModel, RNNModel, SelfInteractiveAttention
 from utils import convert_example
 
 # yapf: disable
@@ -32,7 +33,7 @@
 parser.add_argument("--save_dir", type=str, default='checkpoints/', help="Directory to save model checkpoint")
 parser.add_argument("--batch_size", type=int, default=64, help="Total examples' number of a batch for training.")
 parser.add_argument("--vocab_path", type=str, default="./senta_word_dict.txt", help="The directory to dataset.")
-parser.add_argument('--network', choices=['bow', 'lstm', 'bilstm', 'gru', 'bigru', 'rnn', 'birnn', 'bilstm_attn', 'cnn', 'textcnn'],
+parser.add_argument('--network', choices=['bow', 'lstm', 'bilstm', 'gru', 'bigru', 'rnn', 'birnn', 'bilstm_attn', 'cnn'],
     default="bilstm", help="Select which network to train, defaults to bilstm.")
 parser.add_argument("--init_from_ckpt", type=str, default=None, help="The path of checkpoint to be loaded.")
 args = parser.parse_args()
@@ -97,10 +98,66 @@ def create_dataloader(dataset,
         "chnsenticorp", splits=["train", "dev", "test"])
 
     # Constructs the newtork.
-    model = ppnlp.models.Senta(
-        network=args.network,
-        vocab_size=len(vocab),
-        num_classes=len(train_ds.label_list))
+    network = args.network.lower()
+    vocab_size = len(vocab)
+    num_classes = len(train_ds.label_list)
+    pad_token_id = vocab.to_indices('[PAD]')
+    if network == 'bow':
+        model = BoWModel(vocab_size, num_classes, padding_idx=pad_token_id)
+    elif network == 'bigru':
+        model = GRUModel(
+            vocab_size,
+            num_classes,
+            direction='bidirect',
+            padding_idx=pad_token_id)
+    elif network == 'bilstm':
+        model = LSTMModel(
+            vocab_size,
+            num_classes,
+            direction='bidirect',
+            padding_idx=pad_token_id)
+    elif network == 'bilstm_attn':
+        lstm_hidden_size = 196
+        attention = SelfInteractiveAttention(hidden_size=2 * stm_hidden_size)
+        model = BiLSTMAttentionModel(
+            attention_layer=attention,
+            vocab_size=vocab_size,
+            lstm_hidden_size=lstm_hidden_size,
+            num_classes=num_classes,
+            padding_idx=pad_token_id)
+    elif network == 'birnn':
+        model = RNNModel(
+            vocab_size,
+            num_classes,
+            direction='bidirect',
+            padding_idx=pad_token_id)
+    elif network == 'cnn':
+        model = CNNModel(vocab_size, num_classes, padding_idx=pad_token_id)
+    elif network == 'gru':
+        model = GRUModel(
+            vocab_size,
+            num_classes,
+            direction='forward',
+            padding_idx=pad_token_id,
+            pooling_type='max')
+    elif network == 'lstm':
+        model = LSTMModel(
+            vocab_size,
+            num_classes,
+            direction='forward',
+            padding_idx=pad_token_id,
+            pooling_type='max')
+    elif network == 'rnn':
+        model = RNNModel(
+            vocab_size,
+            num_classes,
+            direction='forward',
+            padding_idx=pad_token_id,
+            pooling_type='max')
+    else:
+        raise ValueError(
+            "Unknown network: %s, it must be one of bow, lstm, bilstm, cnn, gru, bigru, rnn, birnn and bilstm_attn."
+            % network)
     model = paddle.Model(model)
 
     # Reads data and generates mini-batches.

diff --git a/examples/text_matching/simnet/README.md b/examples/text_matching/simnet/README.md
@@ -35,6 +35,7 @@ SimNet框架在百度各产品上广泛应用，主要包括BOW、CNN、RNN、MM
 
 ```text
 simnet/
+├── model.py # 模型组网
 ├── predict.py # 模型预测
 ├── utils.py # 数据处理工具
 ├── train.py # 训练模型主程序入口，包括训练、评估

diff --git a/paddlenlp/models/simnet.py → examples/text_matching/simnet/model.py b/paddlenlp/models/simnet.py → examples/text_matching/simnet/model.py
diff --git a/examples/text_matching/simnet/predict.py b/examples/text_matching/simnet/predict.py
@@ -20,6 +20,7 @@
 import paddlenlp as ppnlp
 from paddlenlp.data import JiebaTokenizer, Pad, Stack, Tuple, Vocab
 
+from model import SimNet
 from utils import preprocess_prediction_data
 
 # yapf: disable
@@ -88,7 +89,7 @@ def predict(model, data, label_map, batch_size=1, pad_token_id=0):
     label_map = {0: 'dissimilar', 1: 'similar'}
 
     # Constructs the newtork.
-    model = ppnlp.models.SimNet(
+    model = SimNet(
         network=args.network, vocab_size=len(vocab), num_classes=len(label_map))
 
     # Loads model parameters.