forked from spmallick/learnopencv
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added Character Classification code for Synthetic Dataset
- Loading branch information
1 parent
4cdb311
commit fc1776d
Showing
3 changed files
with
320 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
import os | ||
import random | ||
import numpy as np | ||
|
||
list1=[] | ||
for i in range(65, 65+26): # List of Characters from A....Z | ||
list1.append(chr(i)) | ||
|
||
list2=[] | ||
for j in range(48,48+10): # List of digits from 0....9 | ||
list2.append(chr(j)) | ||
|
||
list3=list1+list2 | ||
|
||
def get_distort_arg(): # Function to create Distortion | ||
''' | ||
Each set of four values represent a source image coordinate, followed immediately by the destination image coordinate. | ||
The arguement in Distort Perspective is given in form on Sx1,Sy1 Dx1,Dy1 Sx2,Sy2 Dx2,Dy2 Sx3,Sy3 Dx3,Dy3 ... Sxn,Syn Dxn,Dyn | ||
where S is for source image and D is for destination image | ||
''' | ||
amount = 5 | ||
hundred_minus_amount = 100 - amount | ||
return '\'0,0 ' + str(np.random.randint(0,amount)) + ',' + str(np.random.randint(0,amount)) + ' 100,0 ' + str(np.random.randint(hundred_minus_amount,100)) + ',' + str(np.random.randint(0,amount)) + ' 0,100 ' + str(np.random.randint(0,amount)) + ',' + str(np.random.randint(hundred_minus_amount,100)) + ' 100,100 ' + str(np.random.randint(hundred_minus_amount,100)) + ',' + str(np.random.randint(hundred_minus_amount,100)) + '\'' | ||
|
||
distort_arg = get_distort_arg() | ||
|
||
|
||
blur_list = ['0x1','0x1','0x2','0x3','0x2'] # List of blur values | ||
blur_e = random.choice(blur_list) # randomly generate blur | ||
GN = random.randint(0,1) # Select figure randomly to generate noise | ||
|
||
gravity = ['south','north','east'] # Assign gravity to place text | ||
color_light = ['white','lime','gray','yellow','silver','aqua'] # List of light font colors | ||
color_dark = ['black','green','maroon','blue','purple','red'] # List of light dark colors | ||
|
||
path1 = 'path to light backgrounds ' | ||
path2 = 'path to dark backgrounds' | ||
|
||
|
||
list_files_light=(os.listdir('path1')) # To make list of light backgrounds | ||
list_files_light = ['path1' + x for x in list_files_light] | ||
|
||
|
||
list_files_fontt = (os.listdir('path of font file')) | ||
list_files_fontt = ['path of font file'+ y for y in list_files_fontt] # To make list of fonts | ||
|
||
|
||
|
||
list_files_dark = (os.listdir('path2')) # To make list of dark backgrounds | ||
list_files_dark = ['path2' + x for x in list_files_dark] | ||
|
||
|
||
final_list = [list_files_dark, list_files_light] # Make list of paths of light and dark backgrounds | ||
|
||
# Resize all the background images to 32x32 | ||
for k in range(0,len(list_files_light)): | ||
p1 = random.randint(0,300) # Randomly select first co-ordinate of square for cropping image. 300 is a approximate figure as the background image size is 1240x1240 originally. | ||
p2 = random.randint(0,300) | ||
command = "magick convert "+ str(list_files_light[k])+ " -crop 32x32"+"+"+str(p1)+"+"+str(p2)+" " + str(list_files_light[k]) | ||
print(command) | ||
os.system(str(command)) | ||
|
||
for m in range(0,len(list_files_dark)): | ||
p1 = random.randint(0,300) # Randomly select first co-ordinate of square for cropping image | ||
p2 = random.randint(0,300) | ||
command = "magick convert "+ str(list_files_dark[m])+ " -crop 32x32"+"+"+str(p1)+"+"+str(p2)+" " + str(list_files_dark[m]) | ||
os.system(str(command)) | ||
|
||
|
||
|
||
# Sample Command----- magick convert image.jpg -fill Black -font Courier-Oblique -weight 50 -pointsize 12 -gravity center -blur 0x8 -evaluate Gaussian-noise 1.2 -annotate 0+0 "Some text" output_image | ||
|
||
for i in range(0,len(list3)): | ||
|
||
directory = "path to save images of each label" | ||
char = list3[i] | ||
directory = directory + str(char) + "/" | ||
|
||
if not os.path.exists(directory): # Create directory for each label | ||
os.makedirs(directory) | ||
print("Directory made") | ||
|
||
for j in range(0,1000): # To generate 1000 images for each label. | ||
gv = random.choice(gravity) | ||
path = random.choice(final_list) | ||
list_filernd = random.choice(path) | ||
list_rfo = random.choice(list_files_fontt) | ||
''' | ||
# Sample Command----- magick convert image.jpg -fill Black -font Courier-Oblique -weight 50 -pointsize 12 -gravity center -blur 0x8 -evaluate Gaussian-noise 1.2 -annotate 0+0 "Some text" output_image | ||
''' | ||
if( path == final_list[0]): | ||
color = random.choice(color_light) | ||
command = "magick convert " + str(list_filernd) + " -fill "+str(color)+" -font "+ \ | ||
str(list_rfo) + " -weight 200 -pointsize 24 -distort Perspective "+str(distort_arg)+" "+"-gravity "+str(gv) + " -blur " + str(blur_e) \ | ||
+ " -evaluate Gaussian-noise " + str(GN) + " " + " -annotate +0+0 "+ str(list3[i]) + " " + directory + "output_file"+str(i)+str(j)+".jpg" | ||
print(command) | ||
os.system(str(command)) | ||
|
||
|
||
elif(path == final_list[1]): | ||
color = random.choice(color_dark) | ||
command = "magick convert " + str(list_filernd) + " -fill "+str(color)+" -font "+ \ | ||
str(list_rfo) + " -weight 200 -pointsize 24 -distort Perspective "+str(distort_arg)+" "+"-gravity "+str(gv) + " -blur " + str(blur_e) \ | ||
+ " -evaluate Gaussian-noise " + str(GN) + " " + " -annotate +0+0 "+ str(list3[i]) + " " + directory + "output_file"+str(i)+str(j)+".jpg" | ||
|
||
os.system(str(command)) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
#import necessary modules | ||
import keras | ||
import tensorflow as tf | ||
import time | ||
from keras.preprocessing.image import ImageDataGenerator | ||
from keras.models import Sequential | ||
from keras.layers import Convolution2D, MaxPooling2D | ||
from keras.layers import Activation, Dropout, Flatten, Dense | ||
from keras import backend as K | ||
from keras import optimizers | ||
from keras.layers.convolutional import Conv2D | ||
|
||
|
||
# dimensions of our images. | ||
img_width, img_height = 32,32 | ||
|
||
train_data_dir = 'train' # train data Directory | ||
validation_data_dir = 'test' # test data Directory | ||
|
||
|
||
nb_train_samples = 28800 # training samples | ||
nb_validation_samples = 7200 # test Samples | ||
epochs = 80 # number of epochs | ||
batch_size = 128 | ||
|
||
# This checks when to put channels first | ||
|
||
if K.image_data_format() == 'channels_first': | ||
input_shape = (3, img_width, img_height) | ||
else: | ||
input_shape = (img_width, img_height, 3) | ||
|
||
# This is the main model structure | ||
# conv => relu => maxpool2d | ||
model = Sequential() | ||
model.add(Conv2D(32, (3, 3), input_shape=input_shape)) # First convolution Layer | ||
model.add(Activation('relu')) | ||
model.add(MaxPooling2D(pool_size=(2, 2))) | ||
|
||
model.add(Conv2D(32, (3, 3))) | ||
model.add(Activation('relu')) | ||
model.add(MaxPooling2D(pool_size=(2, 2))) #Second Convolution Layer | ||
|
||
model.add(Conv2D(64, (3, 3))) | ||
model.add(Activation('relu')) | ||
model.add(MaxPooling2D(pool_size=(2, 2))) | ||
|
||
model.add(Flatten()) # Flatten the layers | ||
model.add(Dense(256)) # FC layer with 256 neurons | ||
model.add(Activation('relu')) | ||
model.add(Dropout(0.5)) | ||
model.add(Dense(36)) #As classes are 36, model.add(Dense(36)) | ||
model.add(Activation('softmax')) | ||
|
||
|
||
# Compile model | ||
model.compile(loss='categorical_crossentropy', | ||
optimizer='rmsprop', | ||
metrics=['accuracy']) | ||
|
||
# This is the augmentation configuration we will use for training | ||
train_datagen = ImageDataGenerator( | ||
rescale=1. / 255, | ||
shear_range =0.1, | ||
zoom_range =0.1, | ||
rotation_range =10, | ||
width_shift_range =0.1, | ||
height_shift_range =0.1, | ||
horizontal_flip =False) # Flip = False as it we need to retain Characters | ||
|
||
# This is the augmentation configuration we will use for testing:only rescaling | ||
test_datagen = ImageDataGenerator(rescale=1. / 255) | ||
|
||
|
||
train_generator = train_datagen.flow_from_directory( | ||
train_data_dir, | ||
target_size=(img_width, img_height), | ||
batch_size=batch_size, | ||
shuffle=1, | ||
class_mode='categorical') | ||
|
||
validation_generator = test_datagen.flow_from_directory( | ||
validation_data_dir, | ||
target_size=(img_width, img_height), | ||
batch_size=batch_size, | ||
class_mode='categorical') | ||
|
||
validation_batch_size = batch_size | ||
validationSamples = 7200 | ||
# fit the model | ||
history = model.fit_generator( | ||
train_generator, | ||
steps_per_epoch=nb_train_samples / batch_size, | ||
epochs=epochs, | ||
validation_data=validation_generator, | ||
validation_steps=validationSamples/validation_batch_size) | ||
|
||
#save the weights | ||
model.save_weights('check.h5') | ||
model.evaluate_generator(validation_generator) | ||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
import cv2 # for reading,writing or showing image | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
|
||
# import necessary modules | ||
import keras | ||
from keras.preprocessing import image | ||
|
||
import tensorflow as tf # For conversion of image to a tensor | ||
import time | ||
from keras.preprocessing.image import ImageDataGenerator | ||
from keras.models import Sequential | ||
from keras.layers import MaxPooling2D | ||
from keras.layers import Activation, Dropout, Flatten, Dense | ||
from keras import backend as K | ||
from keras import optimizers | ||
from keras.layers.convolutional import Conv2D | ||
from keras.models import load_model | ||
# Specify model structure | ||
def create_model(): | ||
model = Sequential() | ||
|
||
# conv => relu => maxpool2d | ||
model.add(Conv2D(32, (3, 3), input_shape=input_shape)) | ||
model.add(Activation('relu')) | ||
model.add(MaxPooling2D(pool_size=(2, 2))) | ||
|
||
model.add(Conv2D(32, (3, 3))) | ||
model.add(Activation('relu')) | ||
model.add(MaxPooling2D(pool_size=(2, 2))) | ||
|
||
model.add(Conv2D(64, (3, 3))) | ||
model.add(Activation('relu')) | ||
model.add(MaxPooling2D(pool_size=(2, 2))) | ||
|
||
model.add(Flatten()) | ||
model.add(Dense(64)) # FC Layer with 64 neurons | ||
model.add(Activation('relu')) | ||
model.add(Dropout(0.5)) # To overcome 'overfitting' | ||
|
||
model.add(Dense(36)) # number of classes = 36 | ||
model.add(Activation('softmax')) | ||
|
||
return model | ||
|
||
def load_image(img_path, show=False): | ||
''' | ||
Function: Convert image to tensor | ||
Input: image_path (eg. /home/user/filename.jpg) | ||
(Note prefer having absolute path) | ||
show (default = False), set if you want to visualize the image | ||
Return: tensor format of image | ||
''' | ||
# load image using image module | ||
# convert to (32, 32) - if not. | ||
img = image.load_img(img_path, target_size=(32, 32)) # Path of test image | ||
# show the image if show=True | ||
if show: | ||
plt.imshow(img) | ||
plt.axis('off') | ||
|
||
# converting image to a tensor | ||
img_tensor = image.img_to_array(img) # (height, width, channels) | ||
img_tensor = np.expand_dims(img_tensor, axis=0) | ||
img_tensor /= 255. | ||
|
||
# return converted image | ||
return img_tensor | ||
|
||
def predict(weights_path, image_path): | ||
''' | ||
Function: loads a trained model and predicts the class of given image | ||
Input: weights_path (.h5 file, prefer adding absolute path) | ||
image_path (image to predict, prefer adding absolute path) | ||
Returns: none | ||
''' | ||
model = create_model() | ||
model.load_weights(weights_path) | ||
image = load_image(image_path, show=True) # load image, rescale to 0 to 1 | ||
class_ = model.predict(image) # predict the output, returns 36 length array | ||
print("Detected: ", class_[0]) # print what is predicted | ||
output_indice = -1 # set it initially to -1 | ||
|
||
# get class index having maximum predicted score | ||
for i in range(36): | ||
if(i == 0): | ||
max = class_[0][i] | ||
output_indice = 0 | ||
else: | ||
if(class_[0][i] > max): | ||
max = class_[0][i] | ||
output_indice = i | ||
|
||
# append 26 characters (A to Z) to list characters | ||
characters = [] | ||
for i in range(65, 65+26): | ||
characters.append(chr(i)) | ||
# if output indice > 9 (means characters) | ||
if(output_indice > 9): | ||
final_result = characters[(output_indice - 9) - 1] | ||
print("Predicted: ", final_result) | ||
print("value: ", max) # print predicted score | ||
# else it's a digit, print directly | ||
else: | ||
print("Predicted: ", output_indice) | ||
print("value: ", max) # print it's predicted score | ||
|
||
predict("weights.h5", "image.jpg") # Specify weights file and Test image |