-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdigits_OCR.py
194 lines (161 loc) · 5.88 KB
/
digits_OCR.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
__author__ = 'victor'
import sys
sys.path.append("/home/victor/Programming/caffe-master/python")
import caffe
import cv2
import matplotlib.pyplot as plt
import numpy as np
import time
import math
def show_image(img):
plt.imshow(img)
plt.axis("off")
plt.show()
chars = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
def validate_point((x, y), img):
return (x >= 0) and (y >= 0) and (x < img.shape[0]) and (y < img.shape[1] and (img[x,y] != -1))
def find_closest_and_merge(blob, blobs):
cm_blob = np.array(blob_center_of_mass(blob))
distance = np.array([])
for current_blob in blobs:
cm_current_blob = np.array(blob_center_of_mass(current_blob))
diff = cm_current_blob-cm_blob
out=np.sqrt(diff.dot(diff))
distance = np.append(distance, [out])
blobs[distance.argmin()] += blob
def blobs_filter(blobs):
print "Filter"
main_blobs = []
aux_blobs = []
blobs_average_points = sum([len(blob) for blob in blobs])/len(blobs)
print "Blobs_len: ", len(blobs)
if (blobs_average_points*0.3 > 20): # we need at least 20 pts
for blob in blobs:
if len(blob) < 10:
continue
if len(blob) >= blobs_average_points*0.35:
main_blobs.append(blob)
elif len(blob) >= blobs_average_points*0.03:
aux_blobs.append(blob)
for blob in aux_blobs:
find_closest_and_merge(blob, main_blobs)
return main_blobs
def find_bounds(blob_points): # finds rectangular bound of the blob
top, left = blob_points[0]
bottom, right = blob_points[0]
for x, y in blob_points:
if top > x:
top = x
elif bottom < x:
bottom = x
if left > y:
left = y
elif right < y:
right = y
return top, left, bottom, right
def find_blobs(img_orig):
blobs = []
img = np.array(img_orig[:, :], np.int16)
step = 0
way = [(0, 0)] # assumes that first point is not a blob(can be mistake but usually not)
# rewrite it
img[way[step]] = -1
while True:
# Try to find a blob
blob_point = None
while (step < len(way)) and (blob_point is None):
for dx in range(-1, 2):
for dy in range(-1, 2):
point = (way[step][0]+dx, way[step][1]+dy)
if validate_point(point, img):
if img[point] > 0:
blob_point = point
break
way.append(point) # new non-blob point
img[point] = -1
step += 1
if blob_point is not None: # found new blob
blob_points = [blob_point]
img[point] = -1
blob_step = 0
while blob_step < len(blob_points): #passing blob's points
for dx in range(-1, 2):
for dy in range(-1, 2):
point = (blob_points[blob_step][0]+dx, blob_points[blob_step][1]+dy)
if validate_point(point, img):
if img[point] > 0:
blob_points.append(point)
else:
way.append(point)
img[point] = -1
blob_step += 1
blobs.append(blob_points)
else:
break
return blobs
def blob_center_of_mass(blob_points):
sum_x, sum_y = 0.0, 0.0
for x, y in blob_points:
sum_x += x
sum_y += y
return sum_x/len(blob_points), sum_y/len(blob_points)
def reproduce_blob(blob_points):
top, left, bottom, right = find_bounds(blob_points)
center_x, center_y = blob_center_of_mass(blob_points)
if bottom-top > right - left:
max_side = bottom-top
else:
max_side = right-left
tmp_img = np.zeros((max_side*1.4, max_side*1.4, 1), np.uint8)
dx = math.floor(0.7*max_side-center_x)
dy = math.floor(0.7*max_side-center_y)
for x, y in blob_points:
if validate_point((x+dx, y+dy), tmp_img):
tmp_img[x+dx, y+dy, 0] = 255
tmp_img = caffe.io.resize_image(tmp_img, (28, 28))
return tmp_img
def binary(caffe_img, threshold=0):
height, width = caffe_img.shape[:2]
ret_img = np.zeros((height, width, 1), np.uint8)
for i in range(height):
for j in range(width):
if caffe_img[i, j, 0] > threshold:
ret_img[i, j, 0] = 255
else:
ret_img[i, j, 0] = 0
return ret_img
image_file = sys.argv[1]
print "Recognize ", image_file
orig_img = caffe.io.load_image(image_file, color=False)
img = orig_img
print "Convert image to network type:"
img = img*(255/img.max())
blobs = blobs_filter(find_blobs(img[:, :, 0]))
print "Found " + str(len(blobs)) + " blobs"
print "Load caffe network"
MODEL_FILE = '/home/victor/Programming/caffe-master/examples/small_letters_experimental/lenet.prototxt'
PRETRAINED = '/home/victor/Programming/caffe-master/examples/small_letters_experimental/lenet_iter_20000.caffemodel'
net = caffe.Net(MODEL_FILE, PRETRAINED,caffe.TEST)
caffe.set_mode_cpu()
print "Obtain result"
result_list = []
current_pos = 0
start = time.time()
# while current_pos+28 <= resized_width:
# cut = img[:, current_pos:current_pos+28,:]
# #plt.imshow(cut[:,:,0])
# #plt.show()
# out = net.forward_all(data=np.asarray([cut.transpose(2,0,1)]))
# result_list.append(out['prob'][0].argmax())
# current_pos += step
print len(blobs)
for blob in blobs:
blob_img = reproduce_blob(blob)
if len(sys.argv) > 2:
plt.imshow(blob_img[:, :, 0])
plt.show()
out = net.forward_all(data=np.asarray([blob_img.transpose(2,0,1)]))
result_list.append(chars[out['prob'][0].argmax()])
end = time.time()
print result_list
print "Time elapsed: " + str(end - start) + " s"