forked from jacobgil/pytorch-grad-cam
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgrad-cam.py
136 lines (109 loc) · 4.12 KB
/
grad-cam.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import torch
from torch.autograd import Variable
from torchvision import models
import cv2
import sys
import numpy as np
class FeatureExtractor():
""" Class for extracting activations and
registering gradientsfrom targetted intermediate layers """
def __init__(self, model, target_layers):
self.model = model
self.target_layers = target_layers
self.gradients = []
def save_gradient(self, grad):
self.gradients.append(grad)
def __call__(self, x):
outputs = []
self.gradients = []
for name, module in self.model._modules.items():
x = module(x)
if name in self.target_layers:
x.register_hook(self.save_gradient)
outputs += [x]
return outputs, x
class ModelOutputs():
""" Class for making a forward pass, and getting:
1. The network output.
2. Activations from intermeddiate targetted layers.
3. Gradients from intermeddiate targetted layers. """
def __init__(self, model, target_layers):
self.model = model
self.feature_extractor = FeatureExtractor(self.model.features, target_layers)
def get_gradients(self):
return self.feature_extractor.gradients
def __call__(self, x):
target_activations, output = self.feature_extractor(x)
output = output.view(output.size(0), -1)
output = self.model.classifier(output)
return target_activations, output
def preprocess_image(img):
means=[0.485, 0.456, 0.406]
stds=[0.229, 0.224, 0.225]
preprocessed_img = img.copy()[: , :, ::-1]
for i in range(3):
preprocessed_img[:, :, i] = preprocessed_img[:, :, i] - means[i]
preprocessed_img[:, :, i] = preprocessed_img[:, :, i] / stds[i]
preprocessed_img = \
np.ascontiguousarray(np.transpose(preprocessed_img, (2, 0, 1)))
preprocessed_img = torch.from_numpy(preprocessed_img)
preprocessed_img.unsqueeze_(0)
input = Variable(preprocessed_img, requires_grad = True)
return input
def show_cam_on_image(img, mask):
heatmap = cv2.applyColorMap(np.uint8(255*mask), cv2.COLORMAP_JET)
heatmap = np.float32(heatmap) / 255
cam = heatmap + np.float32(img)
cam = cam / np.max(cam)
cv2.imwrite("cam.jpg", np.uint8(255 * cam))
class GradCam:
def __init__(self, model, target_layer_names):
self.model = model
self.model.eval()
self.extractor = ModelOutputs(self.model, target_layer_names)
def forward(self, input):
return self.model(input)
def __call__(self, input, index = None):
features, output = self.extractor(input)
if index == None:
index = np.argmax(output.data.numpy())
one_hot = np.zeros((1, output.size()[-1]), dtype = np.float32)
one_hot[0][index] = 1
one_hot = Variable(torch.from_numpy(one_hot), requires_grad = True)
one_hot = torch.sum(one_hot * output)
self.model.features.zero_grad()
self.model.classifier.zero_grad()
one_hot.backward(retain_variables=True)
grads_val = self.extractor.get_gradients()[-1].data.numpy()
target = features[-1]
target = target.data.numpy()[0, :]
weights = np.mean(grads_val, axis = (2, 3))[0, :]
cam = np.ones(target.shape[1 : ], dtype = np.float32)
for i, w in enumerate(weights):
cam += w * target[i, :, :]
cam = np.maximum(cam, 0)
cam = cv2.resize(cam, (224, 224))
cam = cam - np.min(cam)
cam = cam / np.max(cam)
return cam
if __name__ == '__main__':
""" python grad_cam.py <path_to_image>
1. Loads an image with opencv.
2. Preprocesses it for VGG19 and converts to a pytorch variable.
3. Makes a forward pass to find the category index with the highest score,
and computes intermediate activations.
Makes the visualization.
TBD: Add CUDA support, add guided backpropagation. """
# Can work with any model, but it assumes that the model has a
# feature method, and a classifier method,
# as in the VGG models in torchvision.
grad_cam = GradCam(model = models.vgg19(pretrained=True), \
target_layer_names = ["35"])
img = cv2.imread(sys.argv[1], 1)
img = np.float32(cv2.resize(img, (224, 224))) / 255
input = preprocess_image(img)
# If None, returns the map for the highest scoring category.
# Otherwise, targets the requested index.
target_index = None
mask = grad_cam(input, target_index)
show_cam_on_image(img, mask)