-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutil.py
175 lines (144 loc) · 5.9 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import os
import numpy as np
import skimage.data
import skimage.transform
import matplotlib.pyplot as plt
import zipfile
import urllib.request
import shutil
from sklearn.metrics import confusion_matrix, accuracy_score
import itertools
def unzipFile(fileToUnzip, folderToUnzip):
"""Unzip a file to a specific folder
fileToUnzip: file to unzip.
folderToUnzip: new location for the unzipped files.
"""
with zipfile.ZipFile(fileToUnzip, "r") as zip_ref:
zip_ref.extractall(folderToUnzip)
def loadDb():
"""Download the database"""
folderDb = os.path.dirname(os.path.abspath(__file__))
linkTraining = 'http://btsd.ethz.ch/shareddata/BelgiumTSC/BelgiumTSC_Training.zip'
linkTestDb = 'http://btsd.ethz.ch/shareddata/BelgiumTSC/BelgiumTSC_Testing.zip'
trainingZip = 'BelgiumTSC_Training.zip'
testZip = 'BelgiumTSC_Testing.zip'
if not os.path.exists(trainingZip):
print("Downloading {}".format(trainingZip))
# Download the file from `url` and save it locally under `file_name`:
with urllib.request.urlopen(linkTraining) as response, open(trainingZip, 'wb') as outFile:
shutil.copyfileobj(response, outFile)
print("Unzip {}".format(trainingZip))
unzipFile(trainingZip, folderDb)
if not os.path.exists(testZip):
print("Downloading {}".format(testZip))
# Download the file from `url` and save it locally under `file_name`:
with urllib.request.urlopen(linkTestDb) as response, open(testZip, 'wb') as outFile:
shutil.copyfileobj(response, outFile)
print("Unzip {}".format(testZip))
unzipFile(testZip, folderDb)
def loadData(dataDir, resize=False, size = (32, 32)):
"""Loads a data set and returns two lists:
images: a list of Numpy arrays, each representing an image.
labels: a list of numbers that represent the images labels.
"""
# Get all subdirectories of data_dir. Each represents a label.
directories = [d for d in os.listdir(dataDir)
if os.path.isdir(os.path.join(dataDir, d))]
# Loop through the label directories and collect the data in
# two lists, labels and images.
labels = []
images = []
for d in directories:
label_dir = os.path.join(dataDir, d)
file_names = [os.path.join(label_dir, f)
for f in os.listdir(label_dir) if f.endswith(".ppm")]
# For each label, load it's images and add them to the images list.
# And add the label number (i.e. directory name) to the labels list.
for f in file_names:
if resize:
image = skimage.transform.resize(skimage.data.imread(f), size)
images.append(image)
else:
images.append(skimage.data.imread(f))
labels.append(int(d))
return images, labels
def readDatabase(size=(32, 32)):
from keras.utils.np_utils import to_categorical
print('Reading dataset ...')
xTrain, yTrain = loadData("Training", resize=True, size=size)
xTest, yTest = loadData("Testing", resize=True, size=size)
# Preprocess the training data
labelsCount = len(set(yTrain))
yTrainCategorical = to_categorical(yTrain, num_classes=labelsCount)
yTestCategorical = to_categorical(yTest, num_classes=labelsCount)
# Scale between 0 and 1
xTrain = np.array(xTrain)
xTest = np.array(xTest)
xTrain = xTrain / 255.0
xTest = xTest / 255.0
return xTrain, yTrainCategorical, xTest, yTestCategorical, yTest
def displayImagesAndLabels(images, labels):
"""Display the first image of each label."""
unique_labels = set(labels)
plt.figure(figsize=(15, 15))
i = 1
for label in unique_labels:
# Pick the first image for each label.
image = images[labels.index(label)]
plt.subplot(4, 16, i) # A grid of 4 rows x 16 columns
plt.axis('off')
plt.title("Label {0} ({1})".format(label, labels.count(label)))
i += 1
_ = plt.imshow(image)
plt.show()
def displayLabelImages(images, labels, label):
"""Display images of a specific label."""
limit = 24 # show a max of 24 images
plt.figure(figsize=(15, 5))
i = 1
start = labels.index(label)
end = start + labels.count(label)
for image in images[start:end][:limit]:
plt.subplot(3, 8, i) # 3 rows, 8 per row
plt.axis('off')
i += 1
plt.imshow(image)
plt.show()
def plotConfusionMatrix(cm, classes,
normalize=False,
title='Confusion matrix',
cmap=plt.cm.Blues):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix")
else:
print('Confusion matrix, without normalization')
print(cm)
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, format(cm[i, j], fmt),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
def showConfusionMatrix(yLabels, predictedValues):
predictedLabels = np.argmax(predictedValues, axis=1)
accuracy = accuracy_score(y_true=yLabels, y_pred=predictedLabels)
matrix = confusion_matrix(y_true=yLabels, y_pred=predictedLabels)
print(matrix.shape)
plotConfusionMatrix(matrix,
classes=[str(i) for i in range(0, 62)],
title='Confusion matrix')