|
| 1 | +_author__ = 'Santanu Pattanayak' |
| 2 | + |
| 3 | +import numpy as np |
| 4 | +np.random.seed(1000) |
| 5 | + |
| 6 | +import os |
| 7 | +import glob |
| 8 | +import cv2 |
| 9 | +import datetime |
| 10 | +import pandas as pd |
| 11 | +import time |
| 12 | +import warnings |
| 13 | +warnings.filterwarnings("ignore") |
| 14 | +from sklearn.model_selection import KFold |
| 15 | +from sklearn.metrics import cohen_kappa_score |
| 16 | +from keras.models import Sequential,Model |
| 17 | +from keras.layers.core import Dense, Dropout, Flatten |
| 18 | +from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D |
| 19 | +from keras.layers import GlobalMaxPooling2D,GlobalAveragePooling2D |
| 20 | +from keras.optimizers import SGD |
| 21 | +from keras.callbacks import EarlyStopping |
| 22 | +from keras.utils import np_utils |
| 23 | +from sklearn.metrics import log_loss |
| 24 | +import keras |
| 25 | +from keras import __version__ as keras_version |
| 26 | +from keras.applications.inception_v3 import InceptionV3 |
| 27 | +from keras.applications.resnet50 import ResNet50 |
| 28 | +from keras.applications.vgg16 import VGG16 |
| 29 | +from keras.preprocessing.image import ImageDataGenerator |
| 30 | +from keras import optimizers |
| 31 | +from keras.callbacks import EarlyStopping, ModelCheckpoint, CSVLogger, Callback |
| 32 | +from keras.applications.resnet50 import preprocess_input |
| 33 | +import h5py |
| 34 | + |
| 35 | + |
| 36 | + |
| 37 | +def get_im_cv2(path,dim=224): |
| 38 | + img = cv2.imread(path) |
| 39 | + resized = cv2.resize(img, (dim,dim), cv2.INTER_LINEAR) |
| 40 | + return resized |
| 41 | + |
| 42 | +# Pre Process the Images based on the ImageNet pre-trained model Image transformation |
| 43 | +def pre_process(img): |
| 44 | + img[:,:,0] = img[:,:,0] - 103.939 |
| 45 | + img[:,:,1] = img[:,:,0] - 116.779 |
| 46 | + img[:,:,2] = img[:,:,0] - 123.68 |
| 47 | + return img |
| 48 | + |
| 49 | +# Function to build X, y in numpy format based on the train/validation datasets |
| 50 | +def read_data(class_folders,path,num_class,dim,train_val='train'): |
| 51 | + print train_val |
| 52 | + train_X,train_y = [],[] |
| 53 | + for c in class_folders: |
| 54 | + path_class = path + str(train_val) + '/' + str(c) |
| 55 | + file_list = os.listdir(path_class) |
| 56 | + for f in file_list: |
| 57 | + img = get_im_cv2(path_class + '/' + f) |
| 58 | + img = pre_process(img) |
| 59 | + train_X.append(img) |
| 60 | + train_y.append(int(c)) |
| 61 | + train_y = keras.utils.np_utils.to_categorical(np.array(train_y),num_class) |
| 62 | + return np.array(train_X),train_y |
| 63 | + |
| 64 | +def inception_pseudo(dim=224,freeze_layers=30,full_freeze='N'): |
| 65 | + model = InceptionV3(weights='imagenet',include_top=False) |
| 66 | + x = model.output |
| 67 | + x = GlobalAveragePooling2D()(x) |
| 68 | + x = Dense(512, activation='relu')(x) |
| 69 | + x = Dropout(0.5)(x) |
| 70 | + x = Dense(512, activation='relu')(x) |
| 71 | + x = Dropout(0.5)(x) |
| 72 | + out = Dense(5,activation='softmax')(x) |
| 73 | + model_final = Model(input = model.input,outputs=out) |
| 74 | + if full_freeze != 'N': |
| 75 | + for layer in model.layers[0:freeze_layers]: |
| 76 | + layer.trainable = False |
| 77 | + return model_final |
| 78 | + |
| 79 | +# ResNet50 Model for transfer Learning |
| 80 | +def resnet_pseudo(dim=224,freeze_layers=10,full_freeze='N'): |
| 81 | + model = ResNet50(weights='imagenet',include_top=False) |
| 82 | + x = model.output |
| 83 | + x = GlobalAveragePooling2D()(x) |
| 84 | + x = Dense(512, activation='relu')(x) |
| 85 | + x = Dropout(0.5)(x) |
| 86 | + x = Dense(512, activation='relu')(x) |
| 87 | + x = Dropout(0.5)(x) |
| 88 | + out = Dense(5,activation='softmax')(x) |
| 89 | + model_final = Model(input = model.input,outputs=out) |
| 90 | + if full_freeze != 'N': |
| 91 | + for layer in model.layers[0:freeze_layers]: |
| 92 | + layer.trainable = False |
| 93 | + return model_final |
| 94 | + |
| 95 | +# VGG16 Model for transfer Learning |
| 96 | + |
| 97 | +def VGG16_pseudo(dim=224,freeze_layers=10,full_freeze='N'): |
| 98 | + model = VGG16(weights='imagenet',include_top=False) |
| 99 | + x = model.output |
| 100 | + x = GlobalAveragePooling2D()(x) |
| 101 | + x = Dense(512, activation='relu')(x) |
| 102 | + x = Dropout(0.5)(x) |
| 103 | + x = Dense(512, activation='relu')(x) |
| 104 | + x = Dropout(0.5)(x) |
| 105 | + out = Dense(1,activation='softmax')(x) |
| 106 | + model_final = Model(input = model.input,outputs=out) |
| 107 | + if full_freeze != 'N': |
| 108 | + for layer in model.layers[0:freeze_layers]: |
| 109 | + layer.trainable = False |
| 110 | + return model_final |
| 111 | + |
| 112 | + |
| 113 | +def train_model(train_X,train_y,n_fold=5,batch_size=16,dim=224,lr=1e-5,model='ResNet50'): |
| 114 | + model_save_dest = {} |
| 115 | + k = 0 |
| 116 | + kf = KFold(n_splits=n_fold, random_state=0, shuffle=True) |
| 117 | + |
| 118 | + for train_index, test_index in kf.split(train_X): |
| 119 | + |
| 120 | + k += 1 |
| 121 | + X_train,X_test = train_X[train_index],train_X[test_index] |
| 122 | + y_train, y_test = train_y[train_index],train_y[test_index] |
| 123 | + |
| 124 | + if model == 'Resnet50': |
| 125 | + model_final = resnet_pseudo(dim=224,freeze_layers=10,full_freeze='N') |
| 126 | + if model == 'VGG16': |
| 127 | + model_final = VGG16_pseudo(dim=224,freeze_layers=10,full_freeze='N') |
| 128 | + if model == 'InceptionV3': |
| 129 | + model_final = inception_pseudo(dim=224,freeze_layers=10,full_freeze='N') |
| 130 | + |
| 131 | + datagen = ImageDataGenerator( |
| 132 | + horizontal_flip = True, |
| 133 | + vertical_flip = True, |
| 134 | + width_shift_range = 0.1, |
| 135 | + height_shift_range = 0.1, |
| 136 | + channel_shift_range=0, |
| 137 | + zoom_range = 0.2, |
| 138 | + rotation_range = 20) |
| 139 | + |
| 140 | + |
| 141 | + adam = optimizers.Adam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) |
| 142 | + model_final.compile(optimizer=adam, loss=["binary_crossentropy"],metrics=['accuracy']) |
| 143 | + reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.50, |
| 144 | + patience=3, min_lr=0.000001) |
| 145 | + |
| 146 | + callbacks = [ |
| 147 | + EarlyStopping(monitor='val_loss', patience=10, mode='min', verbose=1), |
| 148 | + CSVLogger('keras-5fold-run-01-v1-epochs_ib.log', separator=',', append=False),reduce_lr, |
| 149 | + ModelCheckpoint( |
| 150 | + 'kera1-5fold-run-01-v1-fold-' + str('%02d' % (k + 1)) + '-run-' + str('%02d' % (1 + 1)) + '.check', |
| 151 | + monitor='val_loss', mode='min', # mode must be set to max or Keras will be confused |
| 152 | + save_best_only=True, |
| 153 | + verbose=1) |
| 154 | + ] |
| 155 | + |
| 156 | + model_final.fit_generator(datagen.flow(X_train,y_train, batch_size=batch_size), |
| 157 | + steps_per_epoch=X_train.shape[0]/batch_size,epochs=20,verbose=1,validation_data=(X_test,y_test),callbacks=callbacks) |
| 158 | + |
| 159 | + model_name = 'kera1-5fold-run-01-v1-fold-' + str('%02d' % (k + 1)) + '-run-' + str('%02d' % (1 + 1)) + '.check' |
| 160 | + del model_final |
| 161 | + f = h5py.File(model_name, 'r+') |
| 162 | + del f['optimizer_weights'] |
| 163 | + f.close() |
| 164 | + model_final = keras.models.load_model(model_name) |
| 165 | + model_name1 = '/home/santanu/Downloads/Diabetic Retinopathy/' + str(model) + '___' + str(k) |
| 166 | + model_final.save(model_name1) |
| 167 | + model_save_dest[k] = model_name1 |
| 168 | + |
| 169 | + return model_save_dest |
| 170 | + |
| 171 | +# Hold out dataset validation function |
| 172 | + |
| 173 | +def inference_validation(test_X,test_y,model_save_dest,n_class=5,folds=5): |
| 174 | + pred = np.zeros((len(test_X),n_class)) |
| 175 | + |
| 176 | + for k in xrange(1,folds + 1): |
| 177 | + model = keras.models.load_model(model_save_dest[k]) |
| 178 | + pred = pred + model.predict(test_X) |
| 179 | + pred = pred/(1.0*folds) |
| 180 | + pred_class = np.argmax(pred,axis=1) |
| 181 | + act_class = np.argmax(test_y,axis=1) |
| 182 | + accuracy = np.sum([pred_class == act_class])*1.0/len(test_X) |
| 183 | + kappa = cohen_kappa_score(pred_class,act_class,weights='quadratic') |
| 184 | + return pred_class,accuracy,kappa |
| 185 | + |
| 186 | + |
| 187 | +if __name__ == "__main__": |
| 188 | + start_time = time.time() |
| 189 | + path = '/home/santanu/Downloads/Diabetic Retinopathy/New/' |
| 190 | + class_folders = ['0','1','2','3','4'] |
| 191 | + num_class = len(class_folders) |
| 192 | + dim = 224 |
| 193 | + lr = 1e-5 |
| 194 | + print 'Starting time:',start_time |
| 195 | + train_X,train_y = read_data(class_folders,path,num_class,dim,train_val='train') |
| 196 | + model_save_dest = train_model(train_X,train_y,n_fold=5,batch_size=16,dim=224,lr=1e-5,model='InceptionV3') |
| 197 | + #model_save_dest = {1:'InceptionV3__1'} |
| 198 | + test_X,test_y = read_data(class_folders,path,num_class,dim,train_val='validation') |
| 199 | + pred_class,accuracy,kappa = inference_validation(test_X,test_y,model_save_dest,n_class=5,folds=5) |
| 200 | + np.save(path + "dict_model",model_save_dest) |
| 201 | + print "-----------------------------------------------------" |
| 202 | + print "Kappa score:", kappa |
| 203 | + print "accuracy:", accuracy |
| 204 | + print "End of training" |
| 205 | + print "-----------------------------------------------------" |
| 206 | + |
| 207 | + |
0 commit comments