diff --git a/deeplearning1/nbs/vgg16.py b/deeplearning1/nbs/vgg16.py index 362b7a469..7a3e18a40 100755 --- a/deeplearning1/nbs/vgg16.py +++ b/deeplearning1/nbs/vgg16.py @@ -19,12 +19,23 @@ vgg_mean = np.array([123.68, 116.779, 103.939], dtype=np.float32).reshape((3,1,1)) def vgg_preprocess(x): + """ + Subtracts the mean RGB value, and transposes RGB to BGR. + The mean RGB was computed on the image set used to train the VGG model. + + Args: + x: Image array (height x width x channels) + Returns: + Image array (height x width x transposed_channels) + """ x = x - vgg_mean return x[:, ::-1] # reverse axis rgb->bgr class Vgg16(): - """The VGG 16 Imagenet model""" + """ + The VGG 16 Imagenet model + """ def __init__(self): @@ -34,6 +45,10 @@ def __init__(self): def get_classes(self): + """ + Downloads the Imagenet classes index file and loads it to self.classes. + The file is downloaded only if it not already in the cache. + """ fname = 'imagenet_class_index.json' fpath = get_file(fname, self.FILE_PATH+fname, cache_subdir='models') with open(fpath) as f: @@ -41,14 +56,40 @@ def get_classes(self): self.classes = [class_dict[str(i)][1] for i in range(len(class_dict))] def predict(self, imgs, details=False): + """ + Predict the labels of a set of images using the VGG16 model. + + Args: + imgs (ndarray) : An array of N images (size: N x width x height x channels). + details : ?? + + Returns: + preds (np.array) : Highest confidence value of the predictions for each image. + idxs (np.ndarray): Class index of the predictions with the max confidence. + classes (list) : Class labels of the predictions with the max confidence. + """ + # predict probability of each class for each image all_preds = self.model.predict(imgs) + # for each image get the index of the class with max probability idxs = np.argmax(all_preds, axis=1) + # get the values of the highest probability for each image preds = [all_preds[i, idxs[i]] for i in range(len(idxs))] + # get the label of the class with the highest probability for each image classes = [self.classes[idx] for idx in idxs] return np.array(preds), idxs, classes def ConvBlock(self, layers, filters): + """ + Adds a specified number of ZeroPadding and Covolution layers + to the model, and a MaxPooling layer at the very end. + + Args: + layers (int): The number of zero padded convolution layers + to be added to the model. + filters (int): The number of convolution filters to be + created for each layer. + """ model = self.model for i in range(layers): model.add(ZeroPadding2D((1, 1))) @@ -57,12 +98,25 @@ def ConvBlock(self, layers, filters): def FCBlock(self): + """ + Adds a fully connected layer of 4096 neurons to the model with a + Dropout of 0.5 + + Args: None + Returns: None + """ model = self.model model.add(Dense(4096, activation='relu')) model.add(Dropout(0.5)) def create(self): + """ + Creates the VGG16 network achitecture and loads the pretrained weights. + + Args: None + Returns: None + """ model = self.model = Sequential() model.add(Lambda(vgg_preprocess, input_shape=(3,224,224), output_shape=(3,224,224))) @@ -82,11 +136,26 @@ def create(self): def get_batches(self, path, gen=image.ImageDataGenerator(), shuffle=True, batch_size=8, class_mode='categorical'): + """ + Takes the path to a directory, and generates batches of augmented/normalized data. Yields batches indefinitely, in an infinite loop. + + See Keras documentation: https://keras.io/preprocessing/image/ + """ return gen.flow_from_directory(path, target_size=(224,224), class_mode=class_mode, shuffle=shuffle, batch_size=batch_size) def ft(self, num): + """ + Replace the last layer of the model with a Dense (fully connected) layer of num neurons. + Will also lock the weights of all layers except the new layer so that we only learn + weights for the last layer in subsequent training. + + Args: + num (int) : Number of neurons in the Dense layer + Returns: + None + """ model = self.model model.pop() for layer in model.layers: layer.trainable=False @@ -94,29 +163,65 @@ def ft(self, num): self.compile() def finetune(self, batches): + """ + Modifies the original VGG16 network architecture and updates self.classes for new training data. + + Args: + batches : A keras.preprocessing.image.ImageDataGenerator object. + See definition for get_batches(). + """ self.ft(batches.nb_class) - classes = list(iter(batches.class_indices)) + classes = list(iter(batches.class_indices)) # get a list of all the class labels + + # batches.class_indices is a dict with the class name as key and an index as value + # eg. {'cats': 0, 'dogs': 1} + + # sort the class labels by index according to batches.class_indices and update model.classes for c in batches.class_indices: classes[batches.class_indices[c]] = c self.classes = classes def compile(self, lr=0.001): + """ + Configures the model for training. + See Keras documentation: https://keras.io/models/model/ + """ self.model.compile(optimizer=Adam(lr=lr), loss='categorical_crossentropy', metrics=['accuracy']) def fit_data(self, trn, labels, val, val_labels, nb_epoch=1, batch_size=64): + """ + Trains the model for a fixed number of epochs (iterations on a dataset). + See Keras documentation: https://keras.io/models/model/ + """ self.model.fit(trn, labels, nb_epoch=nb_epoch, validation_data=(val, val_labels), batch_size=batch_size) def fit(self, batches, val_batches, nb_epoch=1): + """ + Fits the model on data yielded batch-by-batch by a Python generator. + See Keras documentation: https://keras.io/models/model/ + """ self.model.fit_generator(batches, samples_per_epoch=batches.nb_sample, nb_epoch=nb_epoch, validation_data=val_batches, nb_val_samples=val_batches.nb_sample) def test(self, path, batch_size=8): + """ + Predicts the classes using the trained model on data yielded batch-by-batch. + + Args: + path (string): Path to the target directory. It should contain one subdirectory + per class. + batch_size (int): The number of images to be considered in each batch. + + Returns: + test_batches, numpy array(s) of predictions for the test_batches. + + """ test_batches = self.get_batches(path, shuffle=False, batch_size=batch_size, class_mode=None) return test_batches, self.model.predict_generator(test_batches, test_batches.nb_sample)