forked from FengQuanLi/ResnetGPT
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Embed.py
88 lines (78 loc) · 3.19 KB
/
Embed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import torch
import torch.nn as nn
import math
from torch.autograd import Variable
import torch.nn.functional as F
import numpy as np
class Embedder(nn.Module):
def __init__(self, vocab_size, d_model):
super().__init__()
self.d_model = d_model
self.embed = Embedder2(vocab_size, d_model)
def forward(self, x):
return self.embed(x)
class PositionalEncoder(nn.Module):
def __init__(self, d_model, max_seq_len=1024, dropout=0.1):
super().__init__()
self.d_model = d_model
self.dropout = nn.Dropout(dropout)
# create constant 'pe' matrix with values dependant on
# pos and i
pe = torch.zeros(max_seq_len, d_model)
for pos in range(max_seq_len):
for i in range(0, d_model, 2):
pe[pos, i] = \
math.sin(pos / (10000 ** ((2 * i) / d_model)))
pe[pos, i + 1] = \
math.cos(pos / (10000 ** ((2 * (i + 1)) / d_model)))
pe = pe.unsqueeze(0)
self.register_buffer('pe', pe)
def forward(self, x):
# make embeddings relatively larger
x = x * math.sqrt(self.d_model)
# add constant to embedding
seq_len = x.size(1)
pe = Variable(self.pe[:, :seq_len], requires_grad=False)
if x.is_cuda:
pe.cuda()
x = x + pe
x = self.dropout(x)
return x
class Embedder2(nn.Module):
def __init__(self, num_embeddings, embedding_dim, padding_idx=None,
max_norm=None, norm_type=2., scale_grad_by_freq=False,
sparse=False, _weight=None):
super(Embedder2, self).__init__()
self.num_embeddings = num_embeddings
self.embedding_dim = embedding_dim
if padding_idx is not None:
if padding_idx > 0:
assert padding_idx < self.num_embeddings, 'Padding_idx must be within num_embeddings'
elif padding_idx < 0:
assert padding_idx >= -self.num_embeddings, 'Padding_idx must be within num_embeddings'
padding_idx = self.num_embeddings + padding_idx
self.padding_idx = padding_idx
self.max_norm = max_norm
self.norm_type = norm_type
self.scale_grad_by_freq = scale_grad_by_freq
if _weight is None:
np.random.seed(1)
np数 = np.random.uniform(0, 1, (num_embeddings, embedding_dim))
self.weight = nn.Parameter(torch.Tensor(np数))
# self.weight = nn.Parameter(torch.Tensor(num_embeddings, embedding_dim))
#self.reset_parameters()
else:
assert list(_weight.shape) == [num_embeddings, embedding_dim], \
'Shape of weight does not match num_embeddings and embedding_dim'
self.weight = nn.Parameter(_weight)
self.sparse = sparse
a = 0
def reset_parameters(self):
nn.init.normal_(self.weight)
if self.padding_idx is not None:
with torch.no_grad():
self.weight[self.padding_idx].fill_(0)
def forward(self, input):
return F.embedding(
input, self.weight, self.padding_idx, self.max_norm,
self.norm_type, self.scale_grad_by_freq, self.sparse)