Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
HawkingC authored Jun 9, 2023
1 parent ec0756c commit af0669c
Show file tree
Hide file tree
Showing 9 changed files with 109 additions and 24 deletions.
19 changes: 15 additions & 4 deletions models/DPCNN.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import mindspore.nn as nn
import mindspore.ops as ops
import numpy as np
from mindspore import Tensor
from mindspore.common.initializer import initializer, Normal

class Config(object):

Expand All @@ -29,18 +31,27 @@ def __init__(self, dataset, embedding):
self.batch_size = 128 # mini-batch大小
self.pad_size = 32 # 每句话处理成的长度(短填长切)
self.learning_rate = 1e-3 # 学习率
self.embed = self.embedding_pretrained.size(1)\
if self.embedding_pretrained is not None else 300 # 字向量维度
self.embed = 300 # 字向量维度
self.num_filters = 250 # 卷积核数量(channels数)

'''Deep Pyramid Convolutional Neural Networks for Text Categorization'''

class Embedding(nn.Embedding):
def __init__(self, vocab_size, embedding_size, use_one_hot=False, embedding_table='normal', dtype=mindspore.float32, padding_idx=None):
if embedding_table == 'normal':
embedding_table = Normal(1.0)
super().__init__(vocab_size, embedding_size, use_one_hot, embedding_table, dtype, padding_idx)
@classmethod
def from_pretrained_embedding(cls, embeddings:Tensor, freeze=True, padding_idx=None):
rows, cols = embeddings.shape
embedding = cls(rows, cols, embedding_table=embeddings, padding_idx=padding_idx)
embedding.embedding_table.requires_grad = not freeze
return embedding

class Model(nn.Cell):
def __init__(self, config):
super(Model, self).__init__()
if config.embedding_pretrained is not None:
self.embedding = nn.Embedding.from_pretrained(config.embedding_pretrained, freeze=False)
self.embedding = Embedding.from_pretrained_embedding(config.embedding_pretrained, freeze=False)
else:
self.embedding = nn.Embedding(config.n_vocab, config.embed, padding_idx=config.n_vocab - 1)
self.conv_region = nn.Conv2d(1, config.num_filters, (3, config.embed), stride=1, has_bias=True)
Expand Down
19 changes: 16 additions & 3 deletions models/FastText.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import mindspore.nn as nn
import mindspore.ops as ops
import numpy as np
from mindspore import Tensor
from mindspore.common.initializer import initializer, Normal

class Config(object):

Expand All @@ -29,20 +31,31 @@ def __init__(self, dataset, embedding):
self.batch_size = 128 # mini-batch大小
self.pad_size = 32 # 每句话处理成的长度(短填长切)
self.learning_rate = 1e-3 # 学习率
self.embed = self.embedding_pretrained.size(1)\
if self.embedding_pretrained is not None else 300 # 字向量维度
self.embed = 300 # 字向量维度
self.hidden_size = 256 # 隐藏层大小
self.n_gram_vocab = 250499


'''Bag of Tricks for Efficient Text Classification'''
class Embedding(nn.Embedding):
def __init__(self, vocab_size, embedding_size, use_one_hot=False, embedding_table='normal', dtype=mindspore.float32, padding_idx=None):
if embedding_table == 'normal':
embedding_table = Normal(1.0)
super().__init__(vocab_size, embedding_size, use_one_hot, embedding_table, dtype, padding_idx)
@classmethod
def from_pretrained_embedding(cls, embeddings:Tensor, freeze=True, padding_idx=None):
rows, cols = embeddings.shape
embedding = cls(rows, cols, embedding_table=embeddings, padding_idx=padding_idx)
embedding.embedding_table.requires_grad = not freeze
return embedding



class Model(nn.Cell):
def __init__(self, config):
super(Model, self).__init__()
if config.embedding_pretrained is not None:
self.embedding = nn.Embedding.from_pretrained(config.embedding_pretrained, freeze=False)
self.embedding = Embedding.from_pretrained_embedding(config.embedding_pretrained, freeze=False)
else:
self.embedding = nn.Embedding(config.n_vocab, config.embed, padding_idx=config.n_vocab - 1)
self.embedding_ngram2 = nn.Embedding(config.n_gram_vocab, config.embed)
Expand Down
20 changes: 16 additions & 4 deletions models/TextCNN.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
import mindspore.nn as nn
import mindspore.ops as ops
import numpy as np
from mindspore import Tensor, CSRTensor, COOTensor
from mindspore import Tensor
from mindspore.common.initializer import initializer, Normal

class Config(object):

Expand All @@ -29,20 +30,31 @@ def __init__(self, dataset, embedding):
self.batch_size = 128 # mini-batch大小
self.pad_size = 32 # 每句话处理成的长度(短填长切)
self.learning_rate = 1e-3 # 学习率
self.embed = self.embedding_pretrained.size(1)\
if self.embedding_pretrained is not None else 300 # 字向量维度
self.embed = 300 # 字向量维度
self.filter_sizes = (2, 3, 4) # 卷积核尺寸
self.num_filters = 256 # 卷积核数量(channels数)


'''Convolutional Neural Networks for Sentence Classification'''
class Embedding(nn.Embedding):
def __init__(self, vocab_size, embedding_size, use_one_hot=False, embedding_table='normal', dtype=mindspore.float32, padding_idx=None):
if embedding_table == 'normal':
embedding_table = Normal(1.0)
super().__init__(vocab_size, embedding_size, use_one_hot, embedding_table, dtype, padding_idx)
@classmethod
def from_pretrained_embedding(cls, embeddings:Tensor, freeze=True, padding_idx=None):
rows, cols = embeddings.shape
embedding = cls(rows, cols, embedding_table=embeddings, padding_idx=padding_idx)
embedding.embedding_table.requires_grad = not freeze
return embedding



class Model(nn.Cell):
def __init__(self, config):
super(Model, self).__init__() # 继承自nn.Cell类 创建一个模型类实例(self)
if config.embedding_pretrained is not None:
self.embedding = nn.Embedding.from_pretrained(config.embedding_pretrained, freeze=False)
self.embedding = Embedding.from_pretrained_embedding(config.embedding_pretrained, freeze=False)
else:
self.embedding = nn.Embedding(config.n_vocab, config.embed, padding_idx=config.n_vocab - 1)
self.convs = nn.CellList(
Expand Down
18 changes: 15 additions & 3 deletions models/TextRCNN.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import mindspore.ops as ops
import numpy as np
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.common.initializer import initializer, Normal

class Config(object):

Expand All @@ -29,19 +31,29 @@ def __init__(self, dataset, embedding):
self.batch_size = 128 # mini-batch大小
self.pad_size = 32 # 每句话处理成的长度(短填长切)
self.learning_rate = 1e-3 # 学习率
self.embed = self.embedding_pretrained.size(1)\
if self.embedding_pretrained is not None else 300 # 字向量维度, 若使用了预训练词向量,则维度统一
self.embed = 300 # 字向量维度, 若使用了预训练词向量,则维度统一
self.hidden_size = 256 # lstm隐藏层
self.num_layers = 1 # lstm层数

'''Recurrent Convolutional Neural Networks for Text Classification'''
class Embedding(nn.Embedding):
def __init__(self, vocab_size, embedding_size, use_one_hot=False, embedding_table='normal', dtype=mindspore.float32, padding_idx=None):
if embedding_table == 'normal':
embedding_table = Normal(1.0)
super().__init__(vocab_size, embedding_size, use_one_hot, embedding_table, dtype, padding_idx)
@classmethod
def from_pretrained_embedding(cls, embeddings:Tensor, freeze=True, padding_idx=None):
rows, cols = embeddings.shape
embedding = cls(rows, cols, embedding_table=embeddings, padding_idx=padding_idx)
embedding.embedding_table.requires_grad = not freeze
return embedding


class Model(nn.Cell):
def __init__(self, config):
super(Model, self).__init__()
if config.embedding_pretrained is not None:
self.embedding = nn.Embedding.from_pretrained(config.embedding_pretrained, freeze=False)
self.embedding = Embedding.from_pretrained_embedding(config.embedding_pretrained, freeze=False)
else:
self.embedding = nn.Embedding(config.n_vocab, config.embed, padding_idx=config.n_vocab - 1)
self.lstm = nn.LSTM(config.embed, config.hidden_size, config.num_layers,
Expand Down
20 changes: 17 additions & 3 deletions models/TextRNN.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import mindspore
import mindspore.nn as nn
import numpy as np
from mindspore import Tensor
from mindspore.common.initializer import initializer, Normal


class Config(object):
Expand Down Expand Up @@ -29,20 +31,32 @@ def __init__(self, dataset, embedding):
self.batch_size = 128 # mini-batch大小
self.pad_size = 32 # 每句话处理成的长度(短填长切)
self.learning_rate = 1e-3 # 学习率
self.embed = self.embedding_pretrained.size(1)\
if self.embedding_pretrained is not None else 300 # 字向量维度, 若使用了预训练词向量,则维度统一
# self.embed = self.embedding_pretrained.size(1) if self.embedding_pretrained is not None else 300 # 字向量维度, 若使用了预训练词向量,则维度统一
self.embed = 300
self.hidden_size = 128 # lstm隐藏层
self.num_layers = 2 # lstm层数


'''Recurrent Neural Network for Text Classification with Multi-Task Learning'''

class Embedding(nn.Embedding):
def __init__(self, vocab_size, embedding_size, use_one_hot=False, embedding_table='normal', dtype=mindspore.float32, padding_idx=None):
if embedding_table == 'normal':
embedding_table = Normal(1.0)
super().__init__(vocab_size, embedding_size, use_one_hot, embedding_table, dtype, padding_idx)
@classmethod
def from_pretrained_embedding(cls, embeddings:Tensor, freeze=True, padding_idx=None):
rows, cols = embeddings.shape
embedding = cls(rows, cols, embedding_table=embeddings, padding_idx=padding_idx)
embedding.embedding_table.requires_grad = not freeze
return embedding


class Model(nn.Cell):
def __init__(self, config):
super(Model, self).__init__()
if config.embedding_pretrained is not None:
self.embedding = nn.Embedding.from_pretrained(config.embedding_pretrained, freeze=False)
self.embedding = Embedding.from_pretrained_embedding(config.embedding_pretrained, freeze=False)
else:
self.embedding = nn.Embedding(config.n_vocab, config.embed, padding_idx=config.n_vocab - 1)
self.lstm = nn.LSTM(config.embed, config.hidden_size, config.num_layers,
Expand Down
18 changes: 15 additions & 3 deletions models/TextRNN_Att.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import mindspore.ops as ops
import numpy as np
from mindspore import Parameter, Tensor
from mindspore.common.initializer import initializer, Normal

class Config(object):

Expand All @@ -30,20 +31,31 @@ def __init__(self, dataset, embedding):
self.batch_size = 128 # mini-batch大小
self.pad_size = 32 # 每句话处理成的长度(短填长切)
self.learning_rate = 1e-3 # 学习率
self.embed = self.embedding_pretrained.size(1)\
if self.embedding_pretrained is not None else 300 # 字向量维度, 若使用了预训练词向量,则维度统一
self.embed = 300 # 字向量维度, 若使用了预训练词向量,则维度统一
self.hidden_size = 128 # lstm隐藏层
self.num_layers = 2 # lstm层数
self.hidden_size2 = 64

'''Attention-Based Bidirectional Long Short-Term Memory Networks for Relation Classification'''
class Embedding(nn.Embedding):
def __init__(self, vocab_size, embedding_size, use_one_hot=False, embedding_table='normal', dtype=mindspore.float32, padding_idx=None):
if embedding_table == 'normal':
embedding_table = Normal(1.0)
super().__init__(vocab_size, embedding_size, use_one_hot, embedding_table, dtype, padding_idx)
@classmethod
def from_pretrained_embedding(cls, embeddings:Tensor, freeze=True, padding_idx=None):
rows, cols = embeddings.shape
embedding = cls(rows, cols, embedding_table=embeddings, padding_idx=padding_idx)
embedding.embedding_table.requires_grad = not freeze
return embedding



class Model(nn.Cell):
def __init__(self, config):
super(Model, self).__init__()
if config.embedding_pretrained is not None:
self.embedding = nn.Embedding.from_pretrained(config.embedding_pretrained, freeze=False)
self.embedding = Embedding.from_pretrained_embedding(config.embedding_pretrained, freeze=False)
else:
self.embedding = nn.Embedding(config.n_vocab, config.embed, padding_idx=config.n_vocab - 1)
self.lstm = nn.LSTM(config.embed, config.hidden_size, config.num_layers,
Expand Down
19 changes: 15 additions & 4 deletions models/Transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
import mindspore.ops as ops
import numpy as np
import copy
from mindspore import Parameter, Tensor
from mindspore import Tensor
from mindspore.common.initializer import initializer, Normal

class Config(object):

Expand All @@ -30,22 +31,32 @@ def __init__(self, dataset, embedding):
self.batch_size = 128 # mini-batch大小
self.pad_size = 32 # 每句话处理成的长度(短填长切)
self.learning_rate = 5e-4 # 学习率
self.embed = self.embedding_pretrained.size(1)\
if self.embedding_pretrained is not None else 300 # 字向量维度
self.embed = 300 # 字向量维度
self.dim_model = 300
self.hidden = 1024
self.last_hidden = 512
self.num_head = 5
self.num_encoder = 2

'''Attention Is All You Need'''
class Embedding(nn.Embedding):
def __init__(self, vocab_size, embedding_size, use_one_hot=False, embedding_table='normal', dtype=mindspore.float32, padding_idx=None):
if embedding_table == 'normal':
embedding_table = Normal(1.0)
super().__init__(vocab_size, embedding_size, use_one_hot, embedding_table, dtype, padding_idx)
@classmethod
def from_pretrained_embedding(cls, embeddings:Tensor, freeze=True, padding_idx=None):
rows, cols = embeddings.shape
embedding = cls(rows, cols, embedding_table=embeddings, padding_idx=padding_idx)
embedding.embedding_table.requires_grad = not freeze
return embedding


class Model(nn.Cell):
def __init__(self, config):
super(Model, self).__init__()
if config.embedding_pretrained is not None:
self.embedding = nn.Embedding.from_pretrained(config.embedding_pretrained, freeze=False)
self.embedding = Embedding.from_pretrained_embedding(config.embedding_pretrained, freeze=False)
else:
self.embedding = nn.Embedding(config.n_vocab, config.embed, padding_idx=config.n_vocab - 1)

Expand Down
Binary file added models/__pycache__/DPCNN.cpython-37.pyc
Binary file not shown.
Binary file added models/__pycache__/TextRNN.cpython-37.pyc
Binary file not shown.

0 comments on commit af0669c

Please sign in to comment.