-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAttention.py
executable file
·84 lines (59 loc) · 2.6 KB
/
Attention.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import torch
import torch.nn as nn
import torch.nn.functional as F
def conv1x1(in_planes, out_planes):
"1x1 convolution with padding"
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1,
padding=0, bias=False)
def func_attention(query, context, gamma1):
batch_size, queryL = query.size(0), query.size(2)
ih, iw = context.size(2), context.size(3)
sourceL = ih * iw
context = context.view(batch_size, -1, sourceL)
contextT = torch.transpose(context, 1, 2).contiguous()
attn = torch.bmm(contextT, query)
attn = attn.view(batch_size*sourceL, queryL)
attn = nn.Softmax()(attn)
attn = attn.view(batch_size, sourceL, queryL)
attn = torch.transpose(attn, 1, 2).contiguous()
attn = attn.view(batch_size*queryL, sourceL)
attn = attn * gamma1
attn = nn.Softmax()(attn)
attn = attn.view(batch_size, queryL, sourceL)
attnT = torch.transpose(attn, 1, 2).contiguous()
weightedContext = torch.bmm(context, attnT)
return weightedContext, attn.view(batch_size, -1, ih, iw)
class SpatialAttention(nn.Module):
def __init__(self, idf, cdf):
super(SpatialAttention, self).__init__()
self.conv_context = conv1x1(cdf, idf)
self.tanh = nn.Tanh()
self.conv_img = conv1x1(idf, idf)
self.conv_text = conv1x1(cdf, idf)
def forward(self, input, context):
ih, iw = input.size(2), input.size(3)
queryL = ih * iw
batch_size, sourceL = context.size(0), context.size(2)
target = input.view(batch_size, -1, queryL)
targetT = torch.transpose(target, 1, 2).contiguous()
sourceT = context.unsqueeze(3)
text = F.tanh(self.conv_text(sourceT).squeeze(3))
sourceT = self.conv_context(sourceT).squeeze(3)
img = F.tanh(self.conv_img(input))
img = img.view(batch_size, -1, queryL)
img = torch.transpose(img, 1, 2).contiguous()
attn = torch.bmm(targetT, sourceT)
combine = torch.bmm(img, text)
combine = self.tanh(combine)
combine = torch.transpose(combine, 1, 2).contiguous()
attn = attn.view(batch_size*queryL, sourceL)
attn = F.softmax(attn, dim = 1)
attn = attn.view(batch_size, queryL, sourceL)
attn = torch.transpose(attn, 1, 2).contiguous()
attn = attn.view(batch_size*sourceL, queryL)
attn = F.softmax(attn, dim = 1)
attn = attn.view(batch_size, sourceL, queryL)
attn = attn * combine
weightedContext = torch.bmm(sourceT, attn)
attn = attn.view(batch_size, -1, ih, iw)
return weightedContext, attn