From 086f7cb131f85061bee620b90522c19048658b88 Mon Sep 17 00:00:00 2001 From: Fabian Loosli Date: Fri, 9 Aug 2019 13:49:50 +0200 Subject: [PATCH] Initial commit CP_GAN implemetation in keras after the paper in the README.md --- CP_GAN_models.py | 309 +++++++++++++++++++++++++ CP_GAN_train_and_eval.py | 470 +++++++++++++++++++++++++++++++++++++++ Input_and_Utils.py | 376 +++++++++++++++++++++++++++++++ 3 files changed, 1155 insertions(+) create mode 100644 CP_GAN_models.py create mode 100644 CP_GAN_train_and_eval.py create mode 100644 Input_and_Utils.py diff --git a/CP_GAN_models.py b/CP_GAN_models.py new file mode 100644 index 0000000..49dbc30 --- /dev/null +++ b/CP_GAN_models.py @@ -0,0 +1,309 @@ +from keras.models import Model +from keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, Flatten, Dense, Lambda, AveragePooling2D, ReLU,\ + LeakyReLU +from keras.layers.normalization import BatchNormalization +from keras.layers.core import Activation +from keras.layers.merge import concatenate +from keras import backend as K + + +# Input shape of new background +INPUT_BACKGROUND = (72, 72, 3) +# Input shape of cropped image +INPUT_CROP = (128, 128, 3) +# Input shape of discriminator +INPUT_DISC = (72, 72, 3) +# Number of filters used by the convolution +FILTERS = 64 +# At which axis to concatenate and to normalize with batch norm +AXIS = -1 +# Size of padding boarder +PADDING = 4 + + +# metrics +def conf_metric(y_true, y_pred): + return K.mean(K.minimum(y_pred, 1 - y_pred)) + + +def min_metric(y_true, y_pred): + return K.mean(K.maximum((-1 * K.mean(y_pred, axis=[1, 2, 3]) + 0.2), 0)) + + +# The computed mask should not be smaller than 20% of the bbox +def min_mask_loss(y_true, y_pred): + return K.mean(K.maximum((-1 * K.mean(y_pred, axis=[1, 2, 3]) + 0.2), 0)) + + +# mask should make confident predictions, each pixel should be either 0 or 1 +def confidents_loss(y_true, y_pred): + return K.mean(K.minimum(y_pred, 1-y_pred)) + + +def mask_loss(y_true, y_pred): + return confidents_loss(y_true, y_pred) + min_mask_loss(y_true, y_pred) + + +# Cut and paste function, custom lambda layer +# Creates a new image from a given image, it's mask and given background +def cut_and_paste(x): + + background = x[0] + crop = x[1] + mask = x[2] + + mask_to_paste = mask * crop + mask_to_paste = K.spatial_2d_padding(mask_to_paste, + padding=((PADDING*2, 0), (PADDING, PADDING)), + data_format='channels_last') + + prep_mask = K.spatial_2d_padding(mask, + padding=((PADDING*2, 0), (PADDING, PADDING)), + data_format='channels_last') + + inverted_mask = 1 - prep_mask + + cp_img = (background * inverted_mask) + mask_to_paste + + return cp_img + + +# Double convolution layer build into the U-NET +def double_conv_layer(inputs, filters): + + conv = Conv2D(filters=filters, kernel_size=(3, 3), strides=(1, 1), padding='same')(inputs) + conv = BatchNormalization(axis=AXIS, momentum=0.5)(conv) + conv = ReLU()(conv) + conv = Conv2D(filters=filters, kernel_size=(3, 3), strides=(1, 1), padding='same')(conv) + conv = BatchNormalization(axis=AXIS, momentum=0.5)(conv) + conv = ReLU()(conv) + + return conv + + +# Architecture of generator, U-NET +# Input shapes: +# background_img: 72*72*3 +# crop: 128*128*3 +# Output shapes: +# new_img: 72*72*3 +# mask: 64*64*3 +def unet_gen_for_gan(): + + background = Input(shape=INPUT_BACKGROUND) + crop = Input(shape=INPUT_CROP) + + conv_256 = double_conv_layer(inputs=crop, filters=FILTERS) + pool_128 = MaxPooling2D(pool_size=(2, 2))(conv_256) + + conv_128 = double_conv_layer(inputs=pool_128, filters=2*FILTERS) + pool_64 = MaxPooling2D(pool_size=(2, 2))(conv_128) + + conv_64 = double_conv_layer(inputs=pool_64, filters=4*FILTERS) + pool_32 = MaxPooling2D(pool_size=(2, 2))(conv_64) + + conv_32 = double_conv_layer(inputs=pool_32, filters=8*FILTERS) + pool_16 = MaxPooling2D(pool_size=(2, 2))(conv_32) + + conv_16 = double_conv_layer(inputs=pool_16, filters=16*FILTERS) + pool_8 = MaxPooling2D(pool_size=(2, 2))(conv_16) + + conv_8 = double_conv_layer(inputs=pool_8, filters=32*FILTERS) + + up_16 = concatenate([UpSampling2D(size=(2, 2))(conv_8), conv_16], axis=AXIS) + up_conv_16 = double_conv_layer(inputs=up_16, filters=16*FILTERS) + + up_32 = concatenate([UpSampling2D(size=(2, 2))(up_conv_16), conv_32], axis=AXIS) + up_conv_32 = double_conv_layer(inputs=up_32, filters=8*FILTERS) + + up_64 = concatenate([UpSampling2D(size=(2, 2))(up_conv_32), conv_64], axis=AXIS) + up_conv_64 = double_conv_layer(inputs=up_64, filters=4*FILTERS) + + up_128 = concatenate([UpSampling2D(size=(2, 2))(up_conv_64), conv_128], axis=AXIS) + up_conv_128 = double_conv_layer(inputs=up_128, filters=2*FILTERS) + + up_256 = concatenate([UpSampling2D(size=(2, 2))(up_conv_128), conv_256], axis=AXIS) + up_conv_256 = double_conv_layer(inputs=up_256, filters=FILTERS) + + conv_final = Conv2D(filters=1, kernel_size=(2, 2), strides=(2, 2))(up_conv_256) + mask = Activation('sigmoid')(conv_final) + + org_crop = AveragePooling2D(pool_size=(2, 2), padding='valid')(crop) + + cut_paste_layer = Lambda(cut_and_paste, output_shape=None) + new_img = cut_paste_layer([background, org_crop, mask]) + + model = Model([background, crop], [new_img, mask], name='Gen') + + model.summary() + + return model + + +# Architecture of generator for supervised learning, U-Net +# Inputshape: 128*128*3 +# Outputshape: 64*64*3 +def super_unet_gen(): + + crop = Input(shape=INPUT_CROP) + + conv_256 = double_conv_layer(inputs=crop, filters=FILTERS) + pool_128 = MaxPooling2D(pool_size=(2, 2))(conv_256) + + conv_128 = double_conv_layer(inputs=pool_128, filters=2 * FILTERS) + pool_64 = MaxPooling2D(pool_size=(2, 2))(conv_128) + + conv_64 = double_conv_layer(inputs=pool_64, filters=4 * FILTERS) + pool_32 = MaxPooling2D(pool_size=(2, 2))(conv_64) + + conv_32 = double_conv_layer(inputs=pool_32, filters=8 * FILTERS) + pool_16 = MaxPooling2D(pool_size=(2, 2))(conv_32) + + conv_16 = double_conv_layer(inputs=pool_16, filters=16 * FILTERS) + pool_8 = MaxPooling2D(pool_size=(2, 2))(conv_16) + + conv_8 = double_conv_layer(inputs=pool_8, filters=32 * FILTERS) + + up_16 = concatenate([UpSampling2D(size=(2, 2))(conv_8), conv_16], axis=AXIS) + up_conv_16 = double_conv_layer(inputs=up_16, filters=16 * FILTERS) + + up_32 = concatenate([UpSampling2D(size=(2, 2))(up_conv_16), conv_32], axis=AXIS) + up_conv_32 = double_conv_layer(inputs=up_32, filters=8 * FILTERS) + + up_64 = concatenate([UpSampling2D(size=(2, 2))(up_conv_32), conv_64], axis=AXIS) + up_conv_64 = double_conv_layer(inputs=up_64, filters=4 * FILTERS) + + up_128 = concatenate([UpSampling2D(size=(2, 2))(up_conv_64), conv_128], axis=AXIS) + up_conv_128 = double_conv_layer(inputs=up_128, filters=2 * FILTERS) + + up_256 = concatenate([UpSampling2D(size=(2, 2))(up_conv_128), conv_256], axis=AXIS) + up_conv_256 = double_conv_layer(inputs=up_256, filters=FILTERS) + + conv_final = Conv2D(filters=1, kernel_size=(4, 4), strides=(4, 4))(up_conv_256) + mask = Activation('sigmoid')(conv_final) + + model = Model(crop, mask, name='Super_gen') + + model.summary() + + return model + + +# Discriminator architecture according to the paper +# Input shape: 72*72*3 +# Output shape: scalar +def paper_disc(): + + img = Input(shape=INPUT_DISC) + + conv_1 = Conv2D(filters=FILTERS, kernel_size=3, strides=1, padding='valid')(img) + batch_norm_1 = BatchNormalization(axis=AXIS, momentum=0.5)(conv_1) + lrelu_1 = LeakyReLU(alpha=0.2)(batch_norm_1) + + conv_2 = Conv2D(filters=2*FILTERS, kernel_size=3, strides=2, padding='valid')(lrelu_1) + batch_norm_2 = BatchNormalization(axis=AXIS, momentum=0.5)(conv_2) + lrelu_2 = LeakyReLU(alpha=0.2)(batch_norm_2) + + conv_3 = Conv2D(filters=4*FILTERS, kernel_size=3, strides=2, padding='valid')(lrelu_2) + batch_norm_3 = BatchNormalization(axis=AXIS, momentum=0.5)(conv_3) + lrelu_3 = LeakyReLU(alpha=0.2)(batch_norm_3) + + conv_4 = Conv2D(filters=8*FILTERS, kernel_size=3, strides=2, padding='valid')(lrelu_3) + batch_norm_4 = BatchNormalization(axis=AXIS, momentum=0.5)(conv_4) + lrelu_4 = LeakyReLU(alpha=0.2)(batch_norm_4) + + flatten = Flatten()(lrelu_4) + + dense = Dense(units=1)(flatten) + + valid = Activation('sigmoid')(dense) + + model = Model(img, valid, name='Disc') + + model.summary() + + return model + + +# Generator architecture according to the paper +# Input shapes: +# background_img: 72*72*3 +# crop: 128*128*3 +# Output shapes: +# new_img: 72*72*3 +# mask: 64*64*3 +def paper_gen(): + + background = Input(shape=INPUT_BACKGROUND) + crop = Input(shape=INPUT_CROP) + + conv_1 = Conv2D(filters=FILTERS, kernel_size=1, strides=2, padding='same')(crop) + batch_norm_1 = BatchNormalization(axis=AXIS, momentum=0.5)(conv_1) + relu_1 = ReLU()(batch_norm_1) + + conv_2 = Conv2D(filters=4*FILTERS, kernel_size=3, strides=1, padding='same')(relu_1) + batch_norm_2 = BatchNormalization(axis=AXIS, momentum=0.5)(conv_2) + relu_2 = ReLU()(batch_norm_2) + + up_1 = UpSampling2D(size=(2, 2), data_format='channels_last', interpolation='bilinear')(relu_2) + + conv_3 = Conv2D(filters=4*FILTERS, kernel_size=3, strides=1, padding='same')(up_1) + batch_norm_3 = BatchNormalization(axis=AXIS, momentum=0.5)(conv_3) + relu_3 = ReLU()(batch_norm_3) + + up_2 = UpSampling2D(size=(2, 2), data_format='channels_last', interpolation='bilinear')(relu_3) + + conv_4 = Conv2D(filters=4*FILTERS, kernel_size=3, strides=2, padding='same')(up_2) + batch_norm_4 = BatchNormalization(axis=AXIS, momentum=0.5)(conv_4) + relu_4 = ReLU()(batch_norm_4) + + conv_5 = Conv2D(filters=1, kernel_size=3, strides=2, padding='same')(relu_4) + mask = Activation('sigmoid')(conv_5) + + org_crop = AveragePooling2D(pool_size=(2, 2), padding='valid')(crop) + + cut_paste_layer = Lambda(cut_and_paste, output_shape=None) + new_img = cut_paste_layer([background, org_crop, mask]) + + model = Model([background, crop], [new_img, mask], name='Gen') + + model.summary() + + return model + + +# Generator architecture according to the paper, supervised +# Input shape: 128*128*3 +# Output shape: 64*64*3 +def super_paper_gen(): + + crop = Input(shape=INPUT_CROP) + + conv_1 = Conv2D(filters=FILTERS, kernel_size=1, strides=1, padding='same')(crop) + batch_norm_1 = BatchNormalization(axis=AXIS, momentum=0.5)(conv_1) + relu_1 = ReLU()(batch_norm_1) + + conv_2 = Conv2D(filters=4*FILTERS, kernel_size=3, strides=2, padding='same')(relu_1) + batch_norm_2 = BatchNormalization(axis=AXIS, momentum=0.5)(conv_2) + relu_2 = ReLU()(batch_norm_2) + + up_1 = UpSampling2D(size=(2, 2), data_format='channels_last', interpolation='bilinear')(relu_2) + + conv_3 = Conv2D(filters=4*FILTERS, kernel_size=3, strides=2, padding='same')(up_1) + batch_norm_3 = BatchNormalization(axis=AXIS, momentum=0.5)(conv_3) + relu_3 = ReLU()(batch_norm_3) + + up_2 = UpSampling2D(size=(2, 2), data_format='channels_last', interpolation='bilinear')(relu_3) + + conv_4 = Conv2D(filters=4*FILTERS, kernel_size=3, strides=2, padding='same')(up_2) + batch_norm_4 = BatchNormalization(axis=AXIS, momentum=0.5)(conv_4) + relu_4 = ReLU()(batch_norm_4) + + conv_5 = Conv2D(filters=1, kernel_size=3, strides=1, padding='same')(relu_4) + mask = Activation('sigmoid')(conv_5) + + model = Model([crop], [mask, mask], name='Paper_gen') + + model.summary() + + return model diff --git a/CP_GAN_train_and_eval.py b/CP_GAN_train_and_eval.py new file mode 100644 index 0000000..26657a8 --- /dev/null +++ b/CP_GAN_train_and_eval.py @@ -0,0 +1,470 @@ +import Input_and_Utils +import os +import tensorflow as tf +import time +import numpy as np + +from CPGAN_model import * +from keras.optimizers import Adam, SGD, RMSprop +from keras.callbacks import CSVLogger +from keras.losses import binary_crossentropy +from keras import backend as K + + +# Path for Discriminator and Generator weights of the original architecture +DISC_MODEL_PATH = '../disc_weight.h5' +GEN_MODEL_PATH = '../gen_weight.h5' +# Path for Generator weights of the U-NET architecture +UNET_GEN_MODEL_PATH = '../unet_gen_weight.h5' +# Path for the weights of the supervised network +SUPERVISED_GEN_PATH = '../super_weight.h5' +# Input shapes, careful!! the proportions have to be in check to work +WIDTH_CROP = 128 +HEIGHT_CROP = 128 +WIDTH_ORG = 144 # WIDTH_CROP + PADDING in CPGAN_model +HEIGHT_ORG = 144 # HEIGHT_CROP + PADDING in CPGAN_model +WIDTH_REAL = 72 # Half the size of WIDTH_ORG +HEIGHT_REAL = 72 # Half the size of HEIGHT_ORG +CHANNELS = 3 +# Some parameters for fine-tuning and options for training +BATCH_SIZE = 4 +DISC_ITER = 1 +GAN_ITER = 1 +CYCLES = 1002 +NOISE = False +BLUR = False +LEARNING_RATE = 0.0001 +# Available Optimizers 'adam', 'sgd', 'RMSprop' +CPGAN_OPTIMIZER = 'adam' +DISC_OPTIMIZER = 'adam' +SUPER_OPTIMIZER = 'adam' +PSEUDO_OPTIMIZER = 'adam' +# Some options concerning modes and computational sources +PRETRAINED = True +MODEL = 'paper_cpgan' # 'unet_cpgan', 'paper_cpgan' or 'supervised' +SUPERVISED_MODEL = 'paper' # 'paper' or 'unet +UNIT = 'CPU' # 'GPU' or 'CPU' +NUM_CORES = 4 +MODE = 'train' # Mode 'train' or 'evaluate' +# Category to determine which images to use +CATEGORY = 'person' +# CSV Logger for extracting the results to a external csv file +GEN_LOGGER = CSVLogger(filename='../gen_logger.csv', + separator=';', + append=True) +DISC_LOGGER = CSVLogger(filename='../disc_logger.csv', + separator=';', + append=True) +SUPER_LOGGER = CSVLogger(filename='../super_logger.csv', + separator=';', + append=True) + + +# Dictionary of available optimizers, selection at the top of the page +# Adjust learning rate at the top of the page +def get_optim(optim): + return { + 'sgd': SGD(lr=LEARNING_RATE, momentum=0, decay=0, nesterov=False), + 'adam': Adam(lr=LEARNING_RATE, beta_1=0.9, beta_2=0.999, epsilon=0.0000001, decay=0, amsgrad=False), + 'RMSprop': RMSprop(lr=LEARNING_RATE, rho=0.9, epsilon=None, decay=0.0) + }.get(optim, Adam(lr=LEARNING_RATE)) + + +# CPGAN class to create a GAN with 'mse' as loss function +class Paper_CPGAN: + + def __init__(self): + + if MODEL == 'unet_cpgan': + self.model_gen = unet_gen_for_gan() + GEN_PATH = UNET_GEN_MODEL_PATH + DISC_PATH = UNET_DISC_MODEL_PATH + else: + self.model_gen = paper_gen() + GEN_PATH = GEN_MODEL_PATH + DISC_PATH = DISC_MODEL_PATH + + # Load Discriminator + self.model_disc = paper_disc() + if os.path.isfile(DISC_PATH) and PRETRAINED: + print('loading weights for discriminator...') + self.model_disc.load_weights(DISC_PATH) + + # Load Generator + if os.path.isfile(GEN_PATH) and PRETRAINED: + print('loading weights for generator...') + self.model_gen.load_weights(GEN_PATH) + + # Construct and compile GAN + org_crop = Input(shape=(WIDTH_CROP, HEIGHT_CROP, CHANNELS)) + background = Input(shape=(WIDTH_REAL, HEIGHT_REAL, CHANNELS)) + + pred = self.model_gen([background, org_crop]) + fake = pred[0] + mask = pred[1] + + self.model_disc.trainable = False + self.model_gen.trainable = True + res_fake = self.model_disc(fake) + + self.cp_gan = Model(inputs=[background, org_crop], outputs=[res_fake, mask], name='CPGAN') + self.cp_gan.compile(optimizer=get_optim(CPGAN_OPTIMIZER), + loss={'Disc': 'mse', 'Gen': mask_loss}, + loss_weights={'Disc': 1.0, 'Gen': 1.0}, + metrics={'Disc': 'accuracy', 'Gen': [conf_metric, min_metric]} + ) + + self.model_disc.trainable = True + + # Compile Discriminator + self.model_disc.compile(optimizer=get_optim(DISC_OPTIMIZER), + loss=['mse'], + loss_weights=[1], + metrics=['accuracy']) + + # Train CPGAN with 'mse' as a loss function + def train_paper_cpgan(self): + + if MODEL == 'unet_cpgan': + GEN_PATH = UNET_GEN_MODEL_PATH + DISC_PATH = UNET_DISC_MODEL_PATH + else: + GEN_PATH = GEN_MODEL_PATH + DISC_PATH = DISC_MODEL_PATH + + dataset = Input_and_Utils.get_train_dataset() + gan_train = 0 + disc_train = 0 + + start_time = time.time() + print('Start training....') + + for cycle in range(CYCLES): + print('Cycle %i | %i' % (cycle, CYCLES)) + + # Prepare the input data + images, anns = Input_and_Utils.get_image_and_anns(dataset=dataset, batch_size=6, category=CATEGORY, + noise=NOISE, blur=BLUR) + + org_images, crop_list, bboxes = Input_and_Utils.get_cropped_images(images=images, anns=anns, + shape_0=WIDTH_CROP, shape_1=HEIGHT_CROP, + custom_size=True) + + temp1, real_list, temp2 = Input_and_Utils.get_cropped_images(images=images, anns=anns, + shape_0=WIDTH_ORG, shape_1=HEIGHT_ORG, + custom_size=True) + + real_list = Input_and_Utils.resize_images(real_list, WIDTH_REAL, HEIGHT_REAL) + + temp1, backgrounds, temp2 = Input_and_Utils.get_cropped_images(images=org_images, + shape_0=WIDTH_ORG, shape_1=HEIGHT_ORG, + random_position=True, + custom_size=True) + + backgrounds = Input_and_Utils.resize_images(backgrounds, WIDTH_REAL, HEIGHT_REAL) + + backgrounds = backgrounds[0:BATCH_SIZE] + crop_list = crop_list[0:BATCH_SIZE] + + if len(backgrounds) < BATCH_SIZE or len(crop_list) < BATCH_SIZE: + continue + + real = np.ones((BATCH_SIZE, 1)) + real_list = np.array(real_list[0:BATCH_SIZE]) + + placeholder_mask = np.ones((BATCH_SIZE, 64, 64)) + + backgrounds = np.array(backgrounds) + crop_list = np.array(crop_list) + + fake = np.zeros((BATCH_SIZE, 1)) + + predictions = self.model_gen.predict([backgrounds, crop_list], + verbose=0, + steps=None) + + if (cycle % 50) == 0: + Input_and_Utils.save_image((predictions[0][0] + 1) / 2, 'cycle %s' % cycle) + + # Train GAN + print('Generator training.....') + self.cp_gan.fit(x=[backgrounds, crop_list], + y=[real, placeholder_mask], + batch_size=BATCH_SIZE, epochs=GAN_ITER, + verbose=1, callbacks=[GEN_LOGGER]) + + gan_train += len(crop_list) + print('GAN trained on %s images' % gan_train) + + # Train Discriminator + print('Discriminator training.....') + + self.model_disc.fit(x=predictions[0], y=fake, + batch_size=BATCH_SIZE, epochs=DISC_ITER, + verbose=1, callbacks=[DISC_LOGGER]) + + self.model_disc.fit(real_list, y=real, + batch_size=BATCH_SIZE, epochs=DISC_ITER, + verbose=1, callbacks=[DISC_LOGGER]) + + disc_train += len(fake) + print('Discriminator trained on %s images' % disc_train) + + if (cycle % 25) == 0: + self.model_disc.save_weights(DISC_PATH) + self.model_gen.save_weights(GEN_PATH) + print('save weights....') + + self.model_disc.save_weights(DISC_PATH) + self.model_gen.save_weights(GEN_PATH) + print('save weights....') + print('training finished in %s' % (time.time() - start_time)) + + +# Train generator with ground truth +def train_with_gt(model='paper'): + + dataset = Input_and_Utils.get_train_dataset() + + if model == 'paper': + model_gen = super_paper_gen() + if os.path.isfile(SUPERVISED_GEN_PATH) and PRETRAINED: + print('loading weights for generator...') + model_gen.load_weights(SUPERVISED_GEN_PATH) + + elif model == 'unet': + model_gen = super_unet_gen() + if os.path.isfile(SUPERVISED_GEN_PATH) and PRETRAINED: + print('loading weights for generator...') + model_gen.load_weights(SUPERVISED_GEN_PATH) + + else: + print('no model available') + exit() + + model_gen.compile(optimizer=get_optim(SUPER_OPTIMIZER), + loss=[binary_crossentropy, mask_loss], + loss_weights=[1, 1], + metrics=['accuracy']) + + number_img = 0 + start_time = time.time() + print('Start training....') + + for cycle in range(CYCLES): + print('Cycle %i | %i' % (cycle, CYCLES)) + + images, anns = Input_and_Utils.get_image_and_anns(dataset=dataset, batch_size=2*BATCH_SIZE, category=CATEGORY) + org_images, crop_list, bboxes = Input_and_Utils.get_cropped_images(images=images, anns=anns, + shape_0=WIDTH_CROP, shape_1=HEIGHT_CROP, + custom_size=True) + + gt_masks = [] + for ann in anns: + for seg in ann: + if seg['iscrowd'] == 1 or seg['area'] < 1500 or seg['area'] > 17000: + continue + gt_mask = dataset.annToMask(seg) + gt_masks.append(gt_mask) + + gt = Input_and_Utils.get_cropped_eval(masks=gt_masks, anns=anns, shape_0=WIDTH_CROP, shape_1=HEIGHT_CROP) + gt = Input_and_Utils.resize_images(gt, 64, 64) + + if len(crop_list) == 0: + continue + + if (cycle % 30) == 0: + Input_and_Utils.save_image(model_gen.predict(np.expand_dims(crop_list[0], axis=0))[0], + 'super_mask %i' % cycle) + Input_and_Utils.save_image(gt[0], 'gt %i' % cycle) + + Input_and_Utils.save_image(gt[0], 'sup') + Input_and_Utils.save_image((crop_list[0]+1)/2, 'sup2') + gt = gt[0:len(crop_list)] + crop_list = np.array(crop_list) + gt = np.array(gt) + + model_gen.fit(x=crop_list, y=[gt, gt], + batch_size=BATCH_SIZE, epochs=GAN_ITER, + verbose=1, callbacks=[SUPER_LOGGER]) + + number_img += len(crop_list) + print('trained on %s images' % number_img) + + if (cycle % 30) == 0: + model_gen.save_weights(SUPERVISED_GEN_PATH) + print('save weights....') + + model_gen.save_weights(SUPERVISED_GEN_PATH) + print('save weights....') + print('training finished in %s' % (time.time() - start_time)) + + +# Evaluate the performence of a model by comparing predicted mask with +# the ground truth and compute IoU +def evaluate(): + + dataset = Input_and_Utils.get_val_dataset() + + if MODEL == 'pseudo': + print('load pseudo....') + model_gen = super_unet_gen() + model_gen.load_weights(PSEUDO_SUPERVISED_GEN_PATH) + + elif MODEL == 'supervised': + print('load supervised...') + model_gen = super_paper_gen() + model_gen.load_weights(SUPERVISED_GEN_PATH) + + elif MODEL == 'unet_cpgan': + print('load UNET_GAN...') + model_disc = paper_disc() + model_disc.load_weights(UNET_DISC_MODEL_PATH) + model_gen = unet_gen_for_gan() + model_gen.load_weights(UNET_GEN_MODEL_PATH) + + elif MODEL == 'paper_cpgan': + print('load CPGAN...') + model_disc = paper_disc() + model_disc.load_weights(DISC_MODEL_PATH) + model_gen = paper_gen() + model_gen.load_weights(GEN_MODEL_PATH) + + else: + print('no such model for evaluation available') + exit() + + images, anns = Input_and_Utils.get_image_and_anns(dataset=dataset, batch_size=15, category=CATEGORY) + org_images, cropped_images, bboxes = Input_and_Utils.get_cropped_images(images, anns, + shape_0=WIDTH_CROP, shape_1=HEIGHT_CROP, + custom_size=True) + + dt_masks = [] + gt_masks = [] + + for ann in anns: + for seg in ann: + if seg['iscrowd'] == 1 or seg['area'] < 1500 or seg['area'] > 17000: + continue + gt_mask = dataset.annToMask(seg) + gt_masks.append(gt_mask) + + gt_masks = Input_and_Utils.get_cropped_eval(gt_masks, anns, shape_0=WIDTH_CROP, shape_1=HEIGHT_CROP) + + gts = Input_and_Utils.resize_images(gt_masks, 64, 64) + + if MODEL in ['unet_cpgan', 'paper_cpgan']: + + temp1, real_list, temp2 = Input_and_Utils.get_cropped_images(images=images, anns=anns, + shape_0=WIDTH_ORG, shape_1=HEIGHT_ORG, + custom_size=True) + + real_list = Input_and_Utils.resize_images(real_list, WIDTH_REAL, HEIGHT_REAL) + + temp1, backgrounds, temp2 = Input_and_Utils.get_cropped_images(images=org_images, + shape_0=WIDTH_ORG, shape_1=HEIGHT_ORG, + random_position=True, + custom_size=True) + backgrounds = Input_and_Utils.resize_images(backgrounds, WIDTH_REAL, HEIGHT_REAL) + + res_real = 0 + res_fake = 0 + i = 0 + + for background, crop in zip(backgrounds, cropped_images): + + pred = model_gen.predict([np.expand_dims(background, axis=0), np.expand_dims(crop, axis=0)]) + + dt_mask = np.squeeze(np.round(pred[1]), axis=3) + dt_masks.append(dt_mask) + + res_real += model_disc.predict(np.expand_dims(real_list[i], axis=0)) + res_fake += model_disc.predict(pred[0]) + + if i % 1 == 0: + Input_and_Utils.save_image((pred[0]+1)/2, 'cp_img %s' % i, eval_directory=True) + + i += 1 + + print('real_avg: %s fake_avg: %s' % (res_real/len(backgrounds), res_fake/len(backgrounds))) + + else: # 'supervised', 'pseudo' + + for crop in cropped_images: + pred = model_gen.predict(np.expand_dims(crop, axis=0))[0] + dt_masks.append(np.squeeze(pred, axis=3)) + + # computation intersection over union + avg = 0 + i = 0 + n = 0 + crop = Input_and_Utils.resize_images(cropped_images, 64, 64) + + for dt, gt in zip(dt_masks, gts): + dt = dt.astype(bool) + gt = gt.astype(bool) + if np.sum(gt) == 0: + continue + overlap = dt * gt + union = dt + gt + IoU = np.sum(overlap)/(np.sum(union)) + avg += IoU + n += 1 + + if i % 1 == 0: + Input_and_Utils.save_image((cropped_images[i]+1)/2, 'org %i' % i, eval_directory=True) + Input_and_Utils.save_image(((crop[i]+1)/2) * np.expand_dims(np.squeeze(dt, axis=0), axis=2), + 'seg %i' % i, eval_directory=True) + Input_and_Utils.save_image(dt, 'dt %i' % i, eval_directory=True) + Input_and_Utils.save_image(gt, 'gt %i' % i, eval_directory=True) + i += 1 + + print('Average IoU:', avg / n) + + +if __name__ == '__main__': + + if UNIT == 'GPU': + num_GPU = 1 + num_CPU = 1 + elif UNIT == 'CPU': + num_CPU = 1 + num_GPU = 0 + else: + num_CPU = 1 + num_GPU = 0 + + config = tf.ConfigProto(intra_op_parallelism_threads=NUM_CORES, + inter_op_parallelism_threads=NUM_CORES, + allow_soft_placement=True, + device_count={'CPU': num_CPU, + 'GPU': num_GPU} + ) + + session = tf.Session(config=config) + K.set_session(session) + + # training_mode + if MODE == 'train': + + if MODEL == 'unet_cpgan': + cpgan = Paper_CPGAN() + cpgan.train_paper_cpgan() + + elif MODEL == 'paper_cpgan': + cpgan = Paper_CPGAN() + cpgan.train_paper_cpgan() + + elif MODEL == 'supervised': + train_with_gt(model=SUPERVISED_MODEL) + + else: + print('no such model available') + + # evaluation mode + elif MODE == 'evaluate': + + evaluate() + + else: + print('please select a valid mode') diff --git a/Input_and_Utils.py b/Input_and_Utils.py new file mode 100644 index 0000000..1847ff4 --- /dev/null +++ b/Input_and_Utils.py @@ -0,0 +1,376 @@ +import skimage.io +import numpy as np +import skimage + +from skimage.transform import resize +from matplotlib import pyplot as plt +from pycocotools.coco import COCO +from scipy import ndimage + + +# Directory to which the images are saved to +SAVE_DIRECTORY = '../Examples_Results' +EVAL_DIRECTORY = '../Evaluation' +# Dataset paths +VAL_PATH = '../instances_val2017.json' +TRAIN_PATH = '../instances_train2017.json' + + +# cut and paste function to cutout a mask from a image and paste at another position +# image: Original image for acquiring new background position +# image_cropped: Cut out mask for pasting +# mask: Calculated mask for detected object +def cut_and_paste(org_image, crop_image, mask): + + mask = np.squeeze(mask, axis=0) + + crop_image = resize_images(crop_image, mask.shape[0], mask.shape[1])[0] + + mask_to_paste = crop_image * np.expand_dims(mask, axis=2) + mask_to_paste = np.pad(mask_to_paste, ((8, 0), (4, 4), (0, 0)), mode='constant', constant_values=0) + + inverted_mask = 1 - mask + inverted_mask = np.pad(inverted_mask, ((8, 0), (4, 4)), mode='constant', constant_values=0) + + temp1, img_to_paste, temp2 = get_cropped_images([org_image], random_position=True, + shape_0=mask_to_paste.shape[0], + shape_1=mask_to_paste.shape[1], + custom_size=True) + + cp_image = (img_to_paste[0] * np.expand_dims(inverted_mask, axis=2)) + mask_to_paste + + return cp_image + + +# resize image to a desired size with skimage function +# returns list of resized images +def resize_images(images, dim_0, dim_1): + + images_resized = [] + + for image in images: + if image.ndim == 4: + image = np.squeeze(image, axis=0) + + if image.ndim == 3: + if image.shape[2] == 1: + img = skimage.transform.resize(image, (dim_0, dim_1, 1), mode='constant', anti_aliasing=True) + + elif image.shape[2] == 3: + img = skimage.transform.resize(image, (dim_0, dim_1, 3), mode='constant', anti_aliasing=True) + + else: + img = skimage.transform.resize(image, (dim_0, dim_1), mode='constant', anti_aliasing=True) + img = np.expand_dims(img, axis=2) + + images_resized.append(img) + + return images_resized + + +# pad or slice a picture to the desired size dependent on the input size +# returns list of resized images +def cut_to_size(images, desired_x, desired_y): + + sized_images = [] + + for image in images: + + corr_x = 0 + corr_y = 0 + + if image.ndim == 3: + + if desired_x > image.shape[0]: + pad_x = int((desired_x - image.shape[0]) * 0.5) + if 2 * pad_x + image.shape[0] != desired_x: + corr_x += 1 + image = np.pad(image, ((2*pad_x+corr_x, 0), (0, 0), (0, 0)), mode='constant', constant_values=0) + else: + start_x = int(image.shape[0]/2) + cut_x = int(desired_x/2) + if cut_x*2 != desired_x: + corr_x += 1 + image = image[start_x-cut_x-corr_x:start_x+cut_x+corr_x, :, :] + + if desired_y > image.shape[1]: + pad_y = int((desired_y - image.shape[1]) * 0.5) + if 2 * pad_y + image.shape[1] != desired_y: + corr_y += 1 + image = np.pad(image, ((0, 0), (pad_y+corr_y, pad_y), (0, 0)), mode='constant', constant_values=0) + else: + start_y = int(image.shape[1] / 2) + cut_y = int(desired_y / 2) + if cut_y * 2 != desired_y: + corr_y += 1 + image = image[:, start_y-cut_y-corr_y:start_y+cut_y+corr_y, :] + + else: + + if desired_x > image.shape[0]: + pad_x = int((desired_x - image.shape[0]) * 0.5) + if 2 * pad_x + image.shape[0] != desired_x: + corr_x += 1 + image = np.pad(image, ((2*pad_x+corr_x, 0), (0, 0)), mode='constant', constant_values=0) + else: + start_x = int(image.shape[0] / 2) + cut_x = int(desired_x / 2) + if cut_x * 2 != desired_x: + corr_x += 1 + image = image[start_x - cut_x:start_x + cut_x + corr_x, :] + + if desired_y > image.shape[1]: + pad_y = int((desired_y - image.shape[1]) * 0.5) + if 2 * pad_y + image.shape[1] != desired_y: + corr_y += 1 + image = np.pad(image, ((0, 0), (pad_y+corr_y, pad_y)), mode='constant', constant_values=0) + else: + start_y = int(image.shape[1] / 2) + cut_y = int(desired_y / 2) + if cut_y * 2 != desired_y: + corr_y += 1 + image = image[:, start_y - cut_y:start_y + cut_y + corr_y] + + sized_images.append(image) + + return sized_images + + +# crop image according to the given bounding box or arbitrary coordinates +# restriction on size of the bounding box and checks if its crowded +# returns list of cropped parts of the input image, the original images and the coordinates of the bounding boxes +def get_cropped_images(images, anns=None, pos_x=None, pos_y=None, + shape_0=None, shape_1=None, + random_position=True, custom_size=False): + + org_images = [] + cropped_images = [] + bboxes = [] + + if (shape_0 is not None) and (shape_1 is not None) and (anns is None): + for image in images: + + bbox = [] + corr_x = 0 + corr_y = 0 + + if random_position: + + if image.shape[0] < shape_0: + continue + else: + pos_x = np.random.randint(shape_0/2, image.shape[0] - shape_0/2 + 1) + + if image.shape[1] < shape_1: + continue + else: + pos_y = np.random.randint(shape_1/2, image.shape[1] - shape_1/2 + 1) + + dim_0 = int(shape_0/2) + dim_1 = int(shape_1/2) + + if dim_0 * 2 != shape_0: + corr_x = 1 + + if dim_1 * 2 != shape_1: + corr_y = 1 + + random_crop = image[pos_x-dim_0+corr_x:pos_x + dim_0, + pos_y-dim_1+corr_y:pos_y + dim_1, + ] + + else: + if (pos_x is None) or (pos_y is None): + print('no enough coordinates given pos_x or pos_y missing') + return images, cropped_images, bboxes + + else: + dim_0 = int(shape_0) + dim_1 = int(shape_1) + + random_crop = image[pos_x:pos_x + dim_0, + pos_y:pos_y + dim_1, + ] + + if (pos_x - dim_0 < 0) or (pos_y - dim_1 < 0): + random_crop = cut_to_size([random_crop], dim_0, dim_1)[0] + + if random_crop.ndim == 2: + random_crop = np.stack((random_crop,)*3, axis=-1) + + org_images.append(image) + cropped_images.append(random_crop) + bbox.extend([pos_x, pos_y, dim_0, dim_1]) + bboxes.append(bbox) + + else: + for i, seg in zip(range(len(images)), anns): + for ann in seg: + image = images[i] + if ann['iscrowd'] == 1 or ann['area'] < 1500 or ann['area'] > 17000 or image.ndim == 2: + continue + + else: + bbox = ann['bbox'] + crop_y = int(bbox[0]) + crop_x = int(bbox[1]) + height = int(bbox[2]/2) + width = int(bbox[3]/2) + + if custom_size: + crop_width = int(shape_0/2) + crop_height = int(shape_1/2) + else: + crop_height = int(bbox[2]/2) + crop_width = int(bbox[3]/2) + + start_x = crop_x + width + start_y = crop_y + height + + if start_x - crop_width < 0: + crop_width = start_x + if start_y - crop_height < 0: + crop_height = start_y + + if start_x + crop_width > image.shape[0]: + crop_width = image.shape[0] - start_x + if start_y + crop_height > image.shape[1]: + crop_height = image.shape[1] - start_y + + if image.shape == 1 or image.shape == 0 or crop_width*2 > shape_0 or crop_height*2 > shape_1: + continue + + elif len(image.shape) == 2: + img_cropped = image[start_x - crop_width:start_x + crop_width, + start_y - crop_height:start_y + crop_height] + img_cropped = np.expand_dims(img_cropped, axis=2) + else: + img_cropped = image[start_x - crop_width:start_x + crop_width, + start_y - crop_height:start_y + crop_height, + :] + + if img_cropped.shape[0] != shape_0 or img_cropped.shape[1] != shape_1: + img_cropped = cut_to_size([img_cropped], shape_0, shape_1)[0] + + org_images.append(image) + cropped_images.append(img_cropped) + bboxes.append(bbox) + + return org_images, cropped_images, bboxes + + +# crop function for evaluation +# used to crop the ground truth masks correctly +# returns the cropped ground truth masks +def get_cropped_eval(masks, anns, shape_0, shape_1): + + cropped_masks = [] + + i = 0 + for seg in anns: + for ann in seg: + if ann['iscrowd'] == 1 or ann['area'] < 1500 or ann['area'] > 17000: + continue + + else: + mask = masks[i] + bbox = ann['bbox'] + crop_y = int(bbox[0]) + crop_x = int(bbox[1]) + height = int(bbox[2]/2) + width = int(bbox[3]/2) + + crop_width = int(shape_0/2) + crop_height = int(shape_1/2) + + start_x = crop_x + width + start_y = crop_y + height + + if start_x - crop_width < 0: + crop_width = start_x + if start_y - crop_height < 0: + crop_height = start_y + + if start_x + crop_width > mask.shape[0]: + crop_width = mask.shape[0] - start_x + if start_y + crop_height > mask.shape[1]: + crop_height = mask.shape[1] - start_y + + if mask.shape == 1 or mask.shape == 0 or crop_width * 2 > shape_0 or crop_height * 2 > shape_1: + continue + + mask_cropped = mask[start_x - crop_width:start_x + crop_width, + start_y - crop_height:start_y + crop_height] + + if mask_cropped.shape[0] != shape_0 or mask_cropped.shape[1] != shape_1: + mask_cropped = cut_to_size([mask_cropped], shape_0, shape_1)[0] + + cropped_masks.append(mask_cropped) + i += 1 + + return cropped_masks + + +# Get all relevant images for training according to category +# set category at the top of the file +def get_image_and_anns(dataset, batch_size, category, noise=False, blur=False): + + images = [] + image_anns = [] + + cat_ids = dataset.getCatIds(catNms=[category]) + img_ids = dataset.getImgIds(catIds=cat_ids) + + for i in range(batch_size): + + img = dataset.loadImgs(img_ids[np.random.randint(0, len(img_ids))])[0] + image = (skimage.io.imread(img['coco_url'], as_gray=False)/127.5) - 1 + + ann_ids = dataset.getAnnIds(imgIds=img['id'], catIds=cat_ids, iscrowd=None) + anns = dataset.loadAnns(ids=ann_ids) + + if np.any(noise): + noise = np.random.normal(0, 1, image.shape) + noise = noise.reshape(image.shape) + image = image + 0.5*noise + + if np.any(blur): + image = ndimage.gaussian_filter(image, sigma=2) + + images.append(image) + image_anns.append(anns) + + return images, image_anns + + +# save picture to SAVE_DIRECTORY +# set the path at the top of the page +def save_image(image, file_name='example', eval_directory=False): + + if image.shape[0] == 1: + image = np.squeeze(image, axis=0) + if image.ndim == 3: + if image.shape[2] == 1: + image = np.squeeze(image, axis=2) + + plt.imshow(image) + plt.axis('off') + if eval_directory: + plt.savefig(EVAL_DIRECTORY + file_name) + else: + plt.savefig(SAVE_DIRECTORY + file_name) + print('%s saved' % file_name) + + +# Get usable data_annotations from the val_dataset of COCO, saved at VAL_PATH +# Path variable at the top of the page +def get_val_dataset(): + val_coco = COCO(VAL_PATH) + return val_coco + + +# Get usable data_annotations from the train_dataset of COCO, saved at TRAIN_PATH +# Path variable at the top of the page +def get_train_dataset(): + train_coco = COCO(TRAIN_PATH) + return train_coco