From 086f7cb131f85061bee620b90522c19048658b88 Mon Sep 17 00:00:00 2001
From: Fabian Loosli <looslif@gmail.com>
Date: Fri, 9 Aug 2019 13:49:50 +0200
Subject: [PATCH] Initial commit

CP_GAN implemetation in keras after the paper in the README.md
---
 CP_GAN_models.py         | 309 +++++++++++++++++++++++++
 CP_GAN_train_and_eval.py | 470 +++++++++++++++++++++++++++++++++++++++
 Input_and_Utils.py       | 376 +++++++++++++++++++++++++++++++
 3 files changed, 1155 insertions(+)
 create mode 100644 CP_GAN_models.py
 create mode 100644 CP_GAN_train_and_eval.py
 create mode 100644 Input_and_Utils.py

diff --git a/CP_GAN_models.py b/CP_GAN_models.py
new file mode 100644
index 0000000..49dbc30
--- /dev/null
+++ b/CP_GAN_models.py
@@ -0,0 +1,309 @@
+from keras.models import Model
+from keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, Flatten, Dense, Lambda, AveragePooling2D, ReLU,\
+    LeakyReLU
+from keras.layers.normalization import BatchNormalization
+from keras.layers.core import Activation
+from keras.layers.merge import concatenate
+from keras import backend as K
+
+
+# Input shape of new background
+INPUT_BACKGROUND = (72, 72, 3)
+# Input shape of cropped image
+INPUT_CROP = (128, 128, 3)
+# Input shape of discriminator
+INPUT_DISC = (72, 72, 3)
+# Number of filters used by the convolution
+FILTERS = 64
+# At which axis to concatenate and to normalize with batch norm
+AXIS = -1
+# Size of padding boarder
+PADDING = 4
+
+
+# metrics
+def conf_metric(y_true, y_pred):
+    return K.mean(K.minimum(y_pred, 1 - y_pred))
+
+
+def min_metric(y_true, y_pred):
+    return K.mean(K.maximum((-1 * K.mean(y_pred, axis=[1, 2, 3]) + 0.2), 0))
+
+
+# The computed mask should not be smaller than 20% of the bbox
+def min_mask_loss(y_true, y_pred):
+    return K.mean(K.maximum((-1 * K.mean(y_pred, axis=[1, 2, 3]) + 0.2), 0))
+
+
+# mask should make confident predictions, each pixel should be either 0 or 1
+def confidents_loss(y_true, y_pred):
+    return K.mean(K.minimum(y_pred, 1-y_pred))
+
+
+def mask_loss(y_true, y_pred):
+    return confidents_loss(y_true, y_pred) + min_mask_loss(y_true, y_pred)
+
+
+# Cut and paste function, custom lambda layer
+# Creates a new image from a given image, it's mask and given background
+def cut_and_paste(x):
+
+    background = x[0]
+    crop = x[1]
+    mask = x[2]
+
+    mask_to_paste = mask * crop
+    mask_to_paste = K.spatial_2d_padding(mask_to_paste,
+                                         padding=((PADDING*2, 0), (PADDING, PADDING)),
+                                         data_format='channels_last')
+
+    prep_mask = K.spatial_2d_padding(mask,
+                                     padding=((PADDING*2, 0), (PADDING, PADDING)),
+                                     data_format='channels_last')
+
+    inverted_mask = 1 - prep_mask
+
+    cp_img = (background * inverted_mask) + mask_to_paste
+
+    return cp_img
+
+
+# Double convolution layer build into the U-NET
+def double_conv_layer(inputs, filters):
+
+    conv = Conv2D(filters=filters, kernel_size=(3, 3), strides=(1, 1), padding='same')(inputs)
+    conv = BatchNormalization(axis=AXIS, momentum=0.5)(conv)
+    conv = ReLU()(conv)
+    conv = Conv2D(filters=filters, kernel_size=(3, 3), strides=(1, 1), padding='same')(conv)
+    conv = BatchNormalization(axis=AXIS, momentum=0.5)(conv)
+    conv = ReLU()(conv)
+
+    return conv
+
+
+# Architecture of generator, U-NET
+# Input shapes:
+# background_img: 72*72*3
+# crop: 128*128*3
+# Output shapes:
+# new_img: 72*72*3
+# mask: 64*64*3
+def unet_gen_for_gan():
+
+    background = Input(shape=INPUT_BACKGROUND)
+    crop = Input(shape=INPUT_CROP)
+
+    conv_256 = double_conv_layer(inputs=crop, filters=FILTERS)
+    pool_128 = MaxPooling2D(pool_size=(2, 2))(conv_256)
+
+    conv_128 = double_conv_layer(inputs=pool_128, filters=2*FILTERS)
+    pool_64 = MaxPooling2D(pool_size=(2, 2))(conv_128)
+
+    conv_64 = double_conv_layer(inputs=pool_64, filters=4*FILTERS)
+    pool_32 = MaxPooling2D(pool_size=(2, 2))(conv_64)
+
+    conv_32 = double_conv_layer(inputs=pool_32, filters=8*FILTERS)
+    pool_16 = MaxPooling2D(pool_size=(2, 2))(conv_32)
+
+    conv_16 = double_conv_layer(inputs=pool_16, filters=16*FILTERS)
+    pool_8 = MaxPooling2D(pool_size=(2, 2))(conv_16)
+
+    conv_8 = double_conv_layer(inputs=pool_8, filters=32*FILTERS)
+
+    up_16 = concatenate([UpSampling2D(size=(2, 2))(conv_8), conv_16], axis=AXIS)
+    up_conv_16 = double_conv_layer(inputs=up_16, filters=16*FILTERS)
+
+    up_32 = concatenate([UpSampling2D(size=(2, 2))(up_conv_16), conv_32], axis=AXIS)
+    up_conv_32 = double_conv_layer(inputs=up_32, filters=8*FILTERS)
+
+    up_64 = concatenate([UpSampling2D(size=(2, 2))(up_conv_32), conv_64], axis=AXIS)
+    up_conv_64 = double_conv_layer(inputs=up_64, filters=4*FILTERS)
+
+    up_128 = concatenate([UpSampling2D(size=(2, 2))(up_conv_64), conv_128], axis=AXIS)
+    up_conv_128 = double_conv_layer(inputs=up_128, filters=2*FILTERS)
+
+    up_256 = concatenate([UpSampling2D(size=(2, 2))(up_conv_128), conv_256], axis=AXIS)
+    up_conv_256 = double_conv_layer(inputs=up_256, filters=FILTERS)
+
+    conv_final = Conv2D(filters=1, kernel_size=(2, 2), strides=(2, 2))(up_conv_256)
+    mask = Activation('sigmoid')(conv_final)
+
+    org_crop = AveragePooling2D(pool_size=(2, 2), padding='valid')(crop)
+
+    cut_paste_layer = Lambda(cut_and_paste, output_shape=None)
+    new_img = cut_paste_layer([background, org_crop, mask])
+
+    model = Model([background, crop], [new_img, mask], name='Gen')
+
+    model.summary()
+
+    return model
+
+
+# Architecture of generator for supervised learning, U-Net
+# Inputshape: 128*128*3
+# Outputshape: 64*64*3
+def super_unet_gen():
+
+    crop = Input(shape=INPUT_CROP)
+
+    conv_256 = double_conv_layer(inputs=crop, filters=FILTERS)
+    pool_128 = MaxPooling2D(pool_size=(2, 2))(conv_256)
+
+    conv_128 = double_conv_layer(inputs=pool_128, filters=2 * FILTERS)
+    pool_64 = MaxPooling2D(pool_size=(2, 2))(conv_128)
+
+    conv_64 = double_conv_layer(inputs=pool_64, filters=4 * FILTERS)
+    pool_32 = MaxPooling2D(pool_size=(2, 2))(conv_64)
+
+    conv_32 = double_conv_layer(inputs=pool_32, filters=8 * FILTERS)
+    pool_16 = MaxPooling2D(pool_size=(2, 2))(conv_32)
+
+    conv_16 = double_conv_layer(inputs=pool_16, filters=16 * FILTERS)
+    pool_8 = MaxPooling2D(pool_size=(2, 2))(conv_16)
+
+    conv_8 = double_conv_layer(inputs=pool_8, filters=32 * FILTERS)
+
+    up_16 = concatenate([UpSampling2D(size=(2, 2))(conv_8), conv_16], axis=AXIS)
+    up_conv_16 = double_conv_layer(inputs=up_16, filters=16 * FILTERS)
+
+    up_32 = concatenate([UpSampling2D(size=(2, 2))(up_conv_16), conv_32], axis=AXIS)
+    up_conv_32 = double_conv_layer(inputs=up_32, filters=8 * FILTERS)
+
+    up_64 = concatenate([UpSampling2D(size=(2, 2))(up_conv_32), conv_64], axis=AXIS)
+    up_conv_64 = double_conv_layer(inputs=up_64, filters=4 * FILTERS)
+
+    up_128 = concatenate([UpSampling2D(size=(2, 2))(up_conv_64), conv_128], axis=AXIS)
+    up_conv_128 = double_conv_layer(inputs=up_128, filters=2 * FILTERS)
+
+    up_256 = concatenate([UpSampling2D(size=(2, 2))(up_conv_128), conv_256], axis=AXIS)
+    up_conv_256 = double_conv_layer(inputs=up_256, filters=FILTERS)
+
+    conv_final = Conv2D(filters=1, kernel_size=(4, 4), strides=(4, 4))(up_conv_256)
+    mask = Activation('sigmoid')(conv_final)
+
+    model = Model(crop, mask, name='Super_gen')
+
+    model.summary()
+
+    return model
+
+
+# Discriminator architecture according to the paper
+# Input shape: 72*72*3
+# Output shape: scalar
+def paper_disc():
+
+    img = Input(shape=INPUT_DISC)
+
+    conv_1 = Conv2D(filters=FILTERS, kernel_size=3, strides=1, padding='valid')(img)
+    batch_norm_1 = BatchNormalization(axis=AXIS, momentum=0.5)(conv_1)
+    lrelu_1 = LeakyReLU(alpha=0.2)(batch_norm_1)
+
+    conv_2 = Conv2D(filters=2*FILTERS, kernel_size=3, strides=2, padding='valid')(lrelu_1)
+    batch_norm_2 = BatchNormalization(axis=AXIS, momentum=0.5)(conv_2)
+    lrelu_2 = LeakyReLU(alpha=0.2)(batch_norm_2)
+
+    conv_3 = Conv2D(filters=4*FILTERS, kernel_size=3, strides=2, padding='valid')(lrelu_2)
+    batch_norm_3 = BatchNormalization(axis=AXIS, momentum=0.5)(conv_3)
+    lrelu_3 = LeakyReLU(alpha=0.2)(batch_norm_3)
+
+    conv_4 = Conv2D(filters=8*FILTERS, kernel_size=3, strides=2, padding='valid')(lrelu_3)
+    batch_norm_4 = BatchNormalization(axis=AXIS, momentum=0.5)(conv_4)
+    lrelu_4 = LeakyReLU(alpha=0.2)(batch_norm_4)
+
+    flatten = Flatten()(lrelu_4)
+
+    dense = Dense(units=1)(flatten)
+
+    valid = Activation('sigmoid')(dense)
+
+    model = Model(img, valid, name='Disc')
+
+    model.summary()
+
+    return model
+
+
+# Generator architecture according to the paper
+# Input shapes:
+# background_img: 72*72*3
+# crop: 128*128*3
+# Output shapes:
+# new_img: 72*72*3
+# mask: 64*64*3
+def paper_gen():
+
+    background = Input(shape=INPUT_BACKGROUND)
+    crop = Input(shape=INPUT_CROP)
+
+    conv_1 = Conv2D(filters=FILTERS, kernel_size=1, strides=2, padding='same')(crop)
+    batch_norm_1 = BatchNormalization(axis=AXIS, momentum=0.5)(conv_1)
+    relu_1 = ReLU()(batch_norm_1)
+
+    conv_2 = Conv2D(filters=4*FILTERS, kernel_size=3, strides=1, padding='same')(relu_1)
+    batch_norm_2 = BatchNormalization(axis=AXIS, momentum=0.5)(conv_2)
+    relu_2 = ReLU()(batch_norm_2)
+
+    up_1 = UpSampling2D(size=(2, 2), data_format='channels_last', interpolation='bilinear')(relu_2)
+
+    conv_3 = Conv2D(filters=4*FILTERS, kernel_size=3, strides=1, padding='same')(up_1)
+    batch_norm_3 = BatchNormalization(axis=AXIS, momentum=0.5)(conv_3)
+    relu_3 = ReLU()(batch_norm_3)
+
+    up_2 = UpSampling2D(size=(2, 2), data_format='channels_last', interpolation='bilinear')(relu_3)
+
+    conv_4 = Conv2D(filters=4*FILTERS, kernel_size=3, strides=2, padding='same')(up_2)
+    batch_norm_4 = BatchNormalization(axis=AXIS, momentum=0.5)(conv_4)
+    relu_4 = ReLU()(batch_norm_4)
+
+    conv_5 = Conv2D(filters=1, kernel_size=3, strides=2, padding='same')(relu_4)
+    mask = Activation('sigmoid')(conv_5)
+
+    org_crop = AveragePooling2D(pool_size=(2, 2), padding='valid')(crop)
+
+    cut_paste_layer = Lambda(cut_and_paste, output_shape=None)
+    new_img = cut_paste_layer([background, org_crop, mask])
+
+    model = Model([background, crop], [new_img, mask], name='Gen')
+
+    model.summary()
+
+    return model
+
+
+# Generator architecture according to the paper, supervised
+# Input shape: 128*128*3
+# Output shape: 64*64*3
+def super_paper_gen():
+
+    crop = Input(shape=INPUT_CROP)
+
+    conv_1 = Conv2D(filters=FILTERS, kernel_size=1, strides=1, padding='same')(crop)
+    batch_norm_1 = BatchNormalization(axis=AXIS, momentum=0.5)(conv_1)
+    relu_1 = ReLU()(batch_norm_1)
+
+    conv_2 = Conv2D(filters=4*FILTERS, kernel_size=3, strides=2, padding='same')(relu_1)
+    batch_norm_2 = BatchNormalization(axis=AXIS, momentum=0.5)(conv_2)
+    relu_2 = ReLU()(batch_norm_2)
+
+    up_1 = UpSampling2D(size=(2, 2), data_format='channels_last', interpolation='bilinear')(relu_2)
+
+    conv_3 = Conv2D(filters=4*FILTERS, kernel_size=3, strides=2, padding='same')(up_1)
+    batch_norm_3 = BatchNormalization(axis=AXIS, momentum=0.5)(conv_3)
+    relu_3 = ReLU()(batch_norm_3)
+
+    up_2 = UpSampling2D(size=(2, 2), data_format='channels_last', interpolation='bilinear')(relu_3)
+
+    conv_4 = Conv2D(filters=4*FILTERS, kernel_size=3, strides=2, padding='same')(up_2)
+    batch_norm_4 = BatchNormalization(axis=AXIS, momentum=0.5)(conv_4)
+    relu_4 = ReLU()(batch_norm_4)
+
+    conv_5 = Conv2D(filters=1, kernel_size=3, strides=1, padding='same')(relu_4)
+    mask = Activation('sigmoid')(conv_5)
+
+    model = Model([crop], [mask, mask], name='Paper_gen')
+
+    model.summary()
+
+    return model
diff --git a/CP_GAN_train_and_eval.py b/CP_GAN_train_and_eval.py
new file mode 100644
index 0000000..26657a8
--- /dev/null
+++ b/CP_GAN_train_and_eval.py
@@ -0,0 +1,470 @@
+import Input_and_Utils
+import os
+import tensorflow as tf
+import time
+import numpy as np
+
+from CPGAN_model import *
+from keras.optimizers import Adam, SGD, RMSprop
+from keras.callbacks import CSVLogger
+from keras.losses import binary_crossentropy
+from keras import backend as K
+
+
+# Path for Discriminator and Generator weights of the original architecture
+DISC_MODEL_PATH = '../disc_weight.h5'
+GEN_MODEL_PATH = '../gen_weight.h5'
+# Path for Generator weights of the U-NET architecture
+UNET_GEN_MODEL_PATH = '../unet_gen_weight.h5'
+# Path for the weights of the supervised network
+SUPERVISED_GEN_PATH = '../super_weight.h5'
+# Input shapes, careful!! the proportions have to be in check to work
+WIDTH_CROP = 128
+HEIGHT_CROP = 128
+WIDTH_ORG = 144    # WIDTH_CROP + PADDING in CPGAN_model
+HEIGHT_ORG = 144    # HEIGHT_CROP + PADDING in CPGAN_model
+WIDTH_REAL = 72     # Half the size of WIDTH_ORG
+HEIGHT_REAL = 72   # Half the size of HEIGHT_ORG
+CHANNELS = 3
+# Some parameters for fine-tuning and options for training
+BATCH_SIZE = 4
+DISC_ITER = 1
+GAN_ITER = 1
+CYCLES = 1002
+NOISE = False
+BLUR = False
+LEARNING_RATE = 0.0001
+# Available Optimizers 'adam', 'sgd', 'RMSprop'
+CPGAN_OPTIMIZER = 'adam'
+DISC_OPTIMIZER = 'adam'
+SUPER_OPTIMIZER = 'adam'
+PSEUDO_OPTIMIZER = 'adam'
+# Some options concerning modes and computational sources
+PRETRAINED = True
+MODEL = 'paper_cpgan'  # 'unet_cpgan', 'paper_cpgan' or 'supervised'
+SUPERVISED_MODEL = 'paper'  # 'paper' or 'unet
+UNIT = 'CPU'  # 'GPU' or 'CPU'
+NUM_CORES = 4
+MODE = 'train'  # Mode 'train' or 'evaluate'
+# Category to determine which images to use
+CATEGORY = 'person'
+# CSV Logger for extracting the results to a external csv file
+GEN_LOGGER = CSVLogger(filename='../gen_logger.csv',
+                       separator=';',
+                       append=True)
+DISC_LOGGER = CSVLogger(filename='../disc_logger.csv',
+                        separator=';',
+                        append=True)
+SUPER_LOGGER = CSVLogger(filename='../super_logger.csv',
+                         separator=';',
+                         append=True)
+
+
+# Dictionary of available optimizers, selection at the top of the page
+# Adjust learning rate at the top of the page
+def get_optim(optim):
+    return {
+        'sgd': SGD(lr=LEARNING_RATE, momentum=0, decay=0, nesterov=False),
+        'adam': Adam(lr=LEARNING_RATE, beta_1=0.9, beta_2=0.999, epsilon=0.0000001, decay=0, amsgrad=False),
+        'RMSprop': RMSprop(lr=LEARNING_RATE, rho=0.9, epsilon=None, decay=0.0)
+    }.get(optim, Adam(lr=LEARNING_RATE))
+
+
+# CPGAN class to create a GAN with 'mse' as loss function
+class Paper_CPGAN:
+
+    def __init__(self):
+
+        if MODEL == 'unet_cpgan':
+            self.model_gen = unet_gen_for_gan()
+            GEN_PATH = UNET_GEN_MODEL_PATH
+            DISC_PATH = UNET_DISC_MODEL_PATH
+        else:
+            self.model_gen = paper_gen()
+            GEN_PATH = GEN_MODEL_PATH
+            DISC_PATH = DISC_MODEL_PATH
+
+        # Load Discriminator
+        self.model_disc = paper_disc()
+        if os.path.isfile(DISC_PATH) and PRETRAINED:
+            print('loading weights for discriminator...')
+            self.model_disc.load_weights(DISC_PATH)
+
+        # Load Generator
+        if os.path.isfile(GEN_PATH) and PRETRAINED:
+            print('loading weights for generator...')
+            self.model_gen.load_weights(GEN_PATH)
+
+        # Construct and compile GAN
+        org_crop = Input(shape=(WIDTH_CROP, HEIGHT_CROP, CHANNELS))
+        background = Input(shape=(WIDTH_REAL, HEIGHT_REAL, CHANNELS))
+
+        pred = self.model_gen([background, org_crop])
+        fake = pred[0]
+        mask = pred[1]
+
+        self.model_disc.trainable = False
+        self.model_gen.trainable = True
+        res_fake = self.model_disc(fake)
+
+        self.cp_gan = Model(inputs=[background, org_crop], outputs=[res_fake, mask], name='CPGAN')
+        self.cp_gan.compile(optimizer=get_optim(CPGAN_OPTIMIZER),
+                            loss={'Disc': 'mse', 'Gen': mask_loss},
+                            loss_weights={'Disc': 1.0, 'Gen': 1.0},
+                            metrics={'Disc': 'accuracy', 'Gen': [conf_metric, min_metric]}
+                            )
+
+        self.model_disc.trainable = True
+
+        # Compile Discriminator
+        self.model_disc.compile(optimizer=get_optim(DISC_OPTIMIZER),
+                                loss=['mse'],
+                                loss_weights=[1],
+                                metrics=['accuracy'])
+
+    # Train CPGAN with 'mse' as a loss function
+    def train_paper_cpgan(self):
+
+        if MODEL == 'unet_cpgan':
+            GEN_PATH = UNET_GEN_MODEL_PATH
+            DISC_PATH = UNET_DISC_MODEL_PATH
+        else:
+            GEN_PATH = GEN_MODEL_PATH
+            DISC_PATH = DISC_MODEL_PATH
+
+        dataset = Input_and_Utils.get_train_dataset()
+        gan_train = 0
+        disc_train = 0
+
+        start_time = time.time()
+        print('Start training....')
+
+        for cycle in range(CYCLES):
+            print('Cycle %i | %i' % (cycle, CYCLES))
+
+            # Prepare the input data
+            images, anns = Input_and_Utils.get_image_and_anns(dataset=dataset, batch_size=6, category=CATEGORY,
+                                                              noise=NOISE, blur=BLUR)
+
+            org_images, crop_list, bboxes = Input_and_Utils.get_cropped_images(images=images, anns=anns,
+                                                                               shape_0=WIDTH_CROP, shape_1=HEIGHT_CROP,
+                                                                               custom_size=True)
+
+            temp1, real_list, temp2 = Input_and_Utils.get_cropped_images(images=images, anns=anns,
+                                                                         shape_0=WIDTH_ORG, shape_1=HEIGHT_ORG,
+                                                                         custom_size=True)
+
+            real_list = Input_and_Utils.resize_images(real_list, WIDTH_REAL, HEIGHT_REAL)
+
+            temp1, backgrounds, temp2 = Input_and_Utils.get_cropped_images(images=org_images,
+                                                                           shape_0=WIDTH_ORG, shape_1=HEIGHT_ORG,
+                                                                           random_position=True,
+                                                                           custom_size=True)
+
+            backgrounds = Input_and_Utils.resize_images(backgrounds, WIDTH_REAL, HEIGHT_REAL)
+
+            backgrounds = backgrounds[0:BATCH_SIZE]
+            crop_list = crop_list[0:BATCH_SIZE]
+
+            if len(backgrounds) < BATCH_SIZE or len(crop_list) < BATCH_SIZE:
+                continue
+
+            real = np.ones((BATCH_SIZE, 1))
+            real_list = np.array(real_list[0:BATCH_SIZE])
+
+            placeholder_mask = np.ones((BATCH_SIZE, 64, 64))
+
+            backgrounds = np.array(backgrounds)
+            crop_list = np.array(crop_list)
+
+            fake = np.zeros((BATCH_SIZE, 1))
+
+            predictions = self.model_gen.predict([backgrounds, crop_list],
+                                                 verbose=0,
+                                                 steps=None)
+
+            if (cycle % 50) == 0:
+                Input_and_Utils.save_image((predictions[0][0] + 1) / 2, 'cycle %s' % cycle)
+
+            # Train GAN
+            print('Generator training.....')
+            self.cp_gan.fit(x=[backgrounds, crop_list],
+                            y=[real, placeholder_mask],
+                            batch_size=BATCH_SIZE, epochs=GAN_ITER,
+                            verbose=1, callbacks=[GEN_LOGGER])
+
+            gan_train += len(crop_list)
+            print('GAN trained on %s images' % gan_train)
+
+            # Train Discriminator
+            print('Discriminator training.....')
+
+            self.model_disc.fit(x=predictions[0], y=fake,
+                                batch_size=BATCH_SIZE, epochs=DISC_ITER,
+                                verbose=1, callbacks=[DISC_LOGGER])
+
+            self.model_disc.fit(real_list, y=real,
+                                batch_size=BATCH_SIZE, epochs=DISC_ITER,
+                                verbose=1, callbacks=[DISC_LOGGER])
+
+            disc_train += len(fake)
+            print('Discriminator trained on %s images' % disc_train)
+
+            if (cycle % 25) == 0:
+                self.model_disc.save_weights(DISC_PATH)
+                self.model_gen.save_weights(GEN_PATH)
+                print('save weights....')
+
+        self.model_disc.save_weights(DISC_PATH)
+        self.model_gen.save_weights(GEN_PATH)
+        print('save weights....')
+        print('training finished in %s' % (time.time() - start_time))
+
+
+# Train generator with ground truth
+def train_with_gt(model='paper'):
+
+    dataset = Input_and_Utils.get_train_dataset()
+
+    if model == 'paper':
+        model_gen = super_paper_gen()
+        if os.path.isfile(SUPERVISED_GEN_PATH) and PRETRAINED:
+            print('loading weights for generator...')
+            model_gen.load_weights(SUPERVISED_GEN_PATH)
+
+    elif model == 'unet':
+        model_gen = super_unet_gen()
+        if os.path.isfile(SUPERVISED_GEN_PATH) and PRETRAINED:
+            print('loading weights for generator...')
+            model_gen.load_weights(SUPERVISED_GEN_PATH)
+
+    else:
+        print('no model available')
+        exit()
+
+    model_gen.compile(optimizer=get_optim(SUPER_OPTIMIZER),
+                      loss=[binary_crossentropy, mask_loss],
+                      loss_weights=[1, 1],
+                      metrics=['accuracy'])
+
+    number_img = 0
+    start_time = time.time()
+    print('Start training....')
+
+    for cycle in range(CYCLES):
+        print('Cycle %i | %i' % (cycle, CYCLES))
+
+        images, anns = Input_and_Utils.get_image_and_anns(dataset=dataset, batch_size=2*BATCH_SIZE, category=CATEGORY)
+        org_images, crop_list, bboxes = Input_and_Utils.get_cropped_images(images=images, anns=anns,
+                                                                           shape_0=WIDTH_CROP, shape_1=HEIGHT_CROP,
+                                                                           custom_size=True)
+
+        gt_masks = []
+        for ann in anns:
+            for seg in ann:
+                if seg['iscrowd'] == 1 or seg['area'] < 1500 or seg['area'] > 17000:
+                    continue
+                gt_mask = dataset.annToMask(seg)
+                gt_masks.append(gt_mask)
+
+        gt = Input_and_Utils.get_cropped_eval(masks=gt_masks, anns=anns, shape_0=WIDTH_CROP, shape_1=HEIGHT_CROP)
+        gt = Input_and_Utils.resize_images(gt, 64, 64)
+
+        if len(crop_list) == 0:
+            continue
+
+        if (cycle % 30) == 0:
+            Input_and_Utils.save_image(model_gen.predict(np.expand_dims(crop_list[0], axis=0))[0],
+                                       'super_mask %i' % cycle)
+            Input_and_Utils.save_image(gt[0], 'gt %i' % cycle)
+
+        Input_and_Utils.save_image(gt[0], 'sup')
+        Input_and_Utils.save_image((crop_list[0]+1)/2, 'sup2')
+        gt = gt[0:len(crop_list)]
+        crop_list = np.array(crop_list)
+        gt = np.array(gt)
+
+        model_gen.fit(x=crop_list, y=[gt, gt],
+                      batch_size=BATCH_SIZE, epochs=GAN_ITER,
+                      verbose=1, callbacks=[SUPER_LOGGER])
+
+        number_img += len(crop_list)
+        print('trained on %s images' % number_img)
+
+        if (cycle % 30) == 0:
+            model_gen.save_weights(SUPERVISED_GEN_PATH)
+            print('save weights....')
+
+    model_gen.save_weights(SUPERVISED_GEN_PATH)
+    print('save weights....')
+    print('training finished in %s' % (time.time() - start_time))
+
+
+# Evaluate the performence of a model by comparing predicted mask with
+# the ground truth and compute IoU
+def evaluate():
+
+    dataset = Input_and_Utils.get_val_dataset()
+
+    if MODEL == 'pseudo':
+        print('load pseudo....')
+        model_gen = super_unet_gen()
+        model_gen.load_weights(PSEUDO_SUPERVISED_GEN_PATH)
+
+    elif MODEL == 'supervised':
+        print('load supervised...')
+        model_gen = super_paper_gen()
+        model_gen.load_weights(SUPERVISED_GEN_PATH)
+
+    elif MODEL == 'unet_cpgan':
+        print('load UNET_GAN...')
+        model_disc = paper_disc()
+        model_disc.load_weights(UNET_DISC_MODEL_PATH)
+        model_gen = unet_gen_for_gan()
+        model_gen.load_weights(UNET_GEN_MODEL_PATH)
+
+    elif MODEL == 'paper_cpgan':
+        print('load CPGAN...')
+        model_disc = paper_disc()
+        model_disc.load_weights(DISC_MODEL_PATH)
+        model_gen = paper_gen()
+        model_gen.load_weights(GEN_MODEL_PATH)
+
+    else:
+        print('no such model for evaluation available')
+        exit()
+
+    images, anns = Input_and_Utils.get_image_and_anns(dataset=dataset, batch_size=15, category=CATEGORY)
+    org_images, cropped_images, bboxes = Input_and_Utils.get_cropped_images(images, anns,
+                                                                            shape_0=WIDTH_CROP, shape_1=HEIGHT_CROP,
+                                                                            custom_size=True)
+
+    dt_masks = []
+    gt_masks = []
+
+    for ann in anns:
+        for seg in ann:
+            if seg['iscrowd'] == 1 or seg['area'] < 1500 or seg['area'] > 17000:
+                continue
+            gt_mask = dataset.annToMask(seg)
+            gt_masks.append(gt_mask)
+
+    gt_masks = Input_and_Utils.get_cropped_eval(gt_masks, anns, shape_0=WIDTH_CROP, shape_1=HEIGHT_CROP)
+
+    gts = Input_and_Utils.resize_images(gt_masks, 64, 64)
+
+    if MODEL in ['unet_cpgan', 'paper_cpgan']:
+
+        temp1, real_list, temp2 = Input_and_Utils.get_cropped_images(images=images, anns=anns,
+                                                                     shape_0=WIDTH_ORG, shape_1=HEIGHT_ORG,
+                                                                     custom_size=True)
+
+        real_list = Input_and_Utils.resize_images(real_list, WIDTH_REAL, HEIGHT_REAL)
+
+        temp1, backgrounds, temp2 = Input_and_Utils.get_cropped_images(images=org_images,
+                                                                       shape_0=WIDTH_ORG, shape_1=HEIGHT_ORG,
+                                                                       random_position=True,
+                                                                       custom_size=True)
+        backgrounds = Input_and_Utils.resize_images(backgrounds, WIDTH_REAL, HEIGHT_REAL)
+
+        res_real = 0
+        res_fake = 0
+        i = 0
+
+        for background, crop in zip(backgrounds, cropped_images):
+
+            pred = model_gen.predict([np.expand_dims(background, axis=0), np.expand_dims(crop, axis=0)])
+
+            dt_mask = np.squeeze(np.round(pred[1]), axis=3)
+            dt_masks.append(dt_mask)
+
+            res_real += model_disc.predict(np.expand_dims(real_list[i], axis=0))
+            res_fake += model_disc.predict(pred[0])
+
+            if i % 1 == 0:
+                Input_and_Utils.save_image((pred[0]+1)/2, 'cp_img %s' % i, eval_directory=True)
+
+            i += 1
+
+        print('real_avg: %s  fake_avg: %s' % (res_real/len(backgrounds), res_fake/len(backgrounds)))
+
+    else:  # 'supervised', 'pseudo'
+
+        for crop in cropped_images:
+            pred = model_gen.predict(np.expand_dims(crop, axis=0))[0]
+            dt_masks.append(np.squeeze(pred, axis=3))
+
+    # computation intersection over union
+    avg = 0
+    i = 0
+    n = 0
+    crop = Input_and_Utils.resize_images(cropped_images, 64, 64)
+
+    for dt, gt in zip(dt_masks, gts):
+        dt = dt.astype(bool)
+        gt = gt.astype(bool)
+        if np.sum(gt) == 0:
+            continue
+        overlap = dt * gt
+        union = dt + gt
+        IoU = np.sum(overlap)/(np.sum(union))
+        avg += IoU
+        n += 1
+
+        if i % 1 == 0:
+            Input_and_Utils.save_image((cropped_images[i]+1)/2, 'org %i' % i, eval_directory=True)
+            Input_and_Utils.save_image(((crop[i]+1)/2) * np.expand_dims(np.squeeze(dt, axis=0), axis=2),
+                                       'seg %i' % i, eval_directory=True)
+            Input_and_Utils.save_image(dt, 'dt %i' % i, eval_directory=True)
+            Input_and_Utils.save_image(gt, 'gt %i' % i, eval_directory=True)
+        i += 1
+
+    print('Average IoU:', avg / n)
+
+
+if __name__ == '__main__':
+
+    if UNIT == 'GPU':
+        num_GPU = 1
+        num_CPU = 1
+    elif UNIT == 'CPU':
+        num_CPU = 1
+        num_GPU = 0
+    else:
+        num_CPU = 1
+        num_GPU = 0
+
+    config = tf.ConfigProto(intra_op_parallelism_threads=NUM_CORES,
+                            inter_op_parallelism_threads=NUM_CORES,
+                            allow_soft_placement=True,
+                            device_count={'CPU': num_CPU,
+                                          'GPU': num_GPU}
+                            )
+
+    session = tf.Session(config=config)
+    K.set_session(session)
+
+    # training_mode
+    if MODE == 'train':
+
+        if MODEL == 'unet_cpgan':
+            cpgan = Paper_CPGAN()
+            cpgan.train_paper_cpgan()
+
+        elif MODEL == 'paper_cpgan':
+            cpgan = Paper_CPGAN()
+            cpgan.train_paper_cpgan()
+
+        elif MODEL == 'supervised':
+            train_with_gt(model=SUPERVISED_MODEL)
+
+        else:
+            print('no such model available')
+
+    # evaluation mode
+    elif MODE == 'evaluate':
+
+        evaluate()
+
+    else:
+        print('please select a valid mode')
diff --git a/Input_and_Utils.py b/Input_and_Utils.py
new file mode 100644
index 0000000..1847ff4
--- /dev/null
+++ b/Input_and_Utils.py
@@ -0,0 +1,376 @@
+import skimage.io
+import numpy as np
+import skimage
+
+from skimage.transform import resize
+from matplotlib import pyplot as plt
+from pycocotools.coco import COCO
+from scipy import ndimage
+
+
+# Directory to which the images are saved to
+SAVE_DIRECTORY = '../Examples_Results'
+EVAL_DIRECTORY = '../Evaluation'
+# Dataset paths
+VAL_PATH = '../instances_val2017.json'
+TRAIN_PATH = '../instances_train2017.json'
+
+
+# cut and paste function to cutout a mask from a image and paste at another position
+# image: Original image for acquiring new background position
+# image_cropped: Cut out mask for pasting
+# mask: Calculated mask for detected object
+def cut_and_paste(org_image, crop_image, mask):
+
+    mask = np.squeeze(mask, axis=0)
+
+    crop_image = resize_images(crop_image, mask.shape[0], mask.shape[1])[0]
+
+    mask_to_paste = crop_image * np.expand_dims(mask, axis=2)
+    mask_to_paste = np.pad(mask_to_paste, ((8, 0), (4, 4), (0, 0)),  mode='constant', constant_values=0)
+
+    inverted_mask = 1 - mask
+    inverted_mask = np.pad(inverted_mask, ((8, 0), (4, 4)),  mode='constant', constant_values=0)
+
+    temp1, img_to_paste, temp2 = get_cropped_images([org_image], random_position=True,
+                                                    shape_0=mask_to_paste.shape[0],
+                                                    shape_1=mask_to_paste.shape[1],
+                                                    custom_size=True)
+
+    cp_image = (img_to_paste[0] * np.expand_dims(inverted_mask, axis=2)) + mask_to_paste
+
+    return cp_image
+
+
+# resize image to a desired size with skimage function
+# returns list of resized images
+def resize_images(images, dim_0, dim_1):
+
+    images_resized = []
+
+    for image in images:
+        if image.ndim == 4:
+            image = np.squeeze(image, axis=0)
+
+        if image.ndim == 3:
+            if image.shape[2] == 1:
+                img = skimage.transform.resize(image, (dim_0, dim_1, 1), mode='constant', anti_aliasing=True)
+
+            elif image.shape[2] == 3:
+                img = skimage.transform.resize(image, (dim_0, dim_1, 3), mode='constant', anti_aliasing=True)
+
+        else:
+            img = skimage.transform.resize(image, (dim_0, dim_1), mode='constant', anti_aliasing=True)
+            img = np.expand_dims(img, axis=2)
+
+        images_resized.append(img)
+
+    return images_resized
+
+
+# pad or slice a picture to the desired size dependent on the input size
+# returns list of resized images
+def cut_to_size(images, desired_x, desired_y):
+
+    sized_images = []
+
+    for image in images:
+
+        corr_x = 0
+        corr_y = 0
+
+        if image.ndim == 3:
+
+            if desired_x > image.shape[0]:
+                pad_x = int((desired_x - image.shape[0]) * 0.5)
+                if 2 * pad_x + image.shape[0] != desired_x:
+                    corr_x += 1
+                image = np.pad(image, ((2*pad_x+corr_x, 0), (0, 0), (0, 0)), mode='constant', constant_values=0)
+            else:
+                start_x = int(image.shape[0]/2)
+                cut_x = int(desired_x/2)
+                if cut_x*2 != desired_x:
+                    corr_x += 1
+                image = image[start_x-cut_x-corr_x:start_x+cut_x+corr_x, :, :]
+
+            if desired_y > image.shape[1]:
+                pad_y = int((desired_y - image.shape[1]) * 0.5)
+                if 2 * pad_y + image.shape[1] != desired_y:
+                    corr_y += 1
+                image = np.pad(image, ((0, 0), (pad_y+corr_y, pad_y), (0, 0)), mode='constant', constant_values=0)
+            else:
+                start_y = int(image.shape[1] / 2)
+                cut_y = int(desired_y / 2)
+                if cut_y * 2 != desired_y:
+                    corr_y += 1
+                image = image[:, start_y-cut_y-corr_y:start_y+cut_y+corr_y, :]
+
+        else:
+
+            if desired_x > image.shape[0]:
+                pad_x = int((desired_x - image.shape[0]) * 0.5)
+                if 2 * pad_x + image.shape[0] != desired_x:
+                    corr_x += 1
+                image = np.pad(image, ((2*pad_x+corr_x, 0), (0, 0)), mode='constant', constant_values=0)
+            else:
+                start_x = int(image.shape[0] / 2)
+                cut_x = int(desired_x / 2)
+                if cut_x * 2 != desired_x:
+                    corr_x += 1
+                image = image[start_x - cut_x:start_x + cut_x + corr_x, :]
+
+            if desired_y > image.shape[1]:
+                pad_y = int((desired_y - image.shape[1]) * 0.5)
+                if 2 * pad_y + image.shape[1] != desired_y:
+                    corr_y += 1
+                image = np.pad(image, ((0, 0), (pad_y+corr_y, pad_y)), mode='constant', constant_values=0)
+            else:
+                start_y = int(image.shape[1] / 2)
+                cut_y = int(desired_y / 2)
+                if cut_y * 2 != desired_y:
+                    corr_y += 1
+                image = image[:, start_y - cut_y:start_y + cut_y + corr_y]
+
+        sized_images.append(image)
+
+    return sized_images
+
+
+# crop image according to the given bounding box or arbitrary coordinates
+# restriction on size of the bounding box and checks if its crowded
+# returns list of cropped parts of the input image, the original images and the coordinates of the bounding boxes
+def get_cropped_images(images, anns=None, pos_x=None, pos_y=None,
+                       shape_0=None, shape_1=None,
+                       random_position=True, custom_size=False):
+
+    org_images = []
+    cropped_images = []
+    bboxes = []
+
+    if (shape_0 is not None) and (shape_1 is not None) and (anns is None):
+        for image in images:
+
+            bbox = []
+            corr_x = 0
+            corr_y = 0
+
+            if random_position:
+
+                if image.shape[0] < shape_0:
+                    continue
+                else:
+                    pos_x = np.random.randint(shape_0/2, image.shape[0] - shape_0/2 + 1)
+
+                if image.shape[1] < shape_1:
+                    continue
+                else:
+                    pos_y = np.random.randint(shape_1/2, image.shape[1] - shape_1/2 + 1)
+
+                dim_0 = int(shape_0/2)
+                dim_1 = int(shape_1/2)
+
+                if dim_0 * 2 != shape_0:
+                    corr_x = 1
+
+                if dim_1 * 2 != shape_1:
+                    corr_y = 1
+
+                random_crop = image[pos_x-dim_0+corr_x:pos_x + dim_0,
+                                    pos_y-dim_1+corr_y:pos_y + dim_1,
+                                    ]
+
+            else:
+                if (pos_x is None) or (pos_y is None):
+                    print('no enough coordinates given pos_x or pos_y missing')
+                    return images, cropped_images, bboxes
+
+                else:
+                    dim_0 = int(shape_0)
+                    dim_1 = int(shape_1)
+
+                    random_crop = image[pos_x:pos_x + dim_0,
+                                        pos_y:pos_y + dim_1,
+                                        ]
+
+                    if (pos_x - dim_0 < 0) or (pos_y - dim_1 < 0):
+                        random_crop = cut_to_size([random_crop], dim_0, dim_1)[0]
+
+            if random_crop.ndim == 2:
+                random_crop = np.stack((random_crop,)*3, axis=-1)
+
+            org_images.append(image)
+            cropped_images.append(random_crop)
+            bbox.extend([pos_x, pos_y, dim_0, dim_1])
+            bboxes.append(bbox)
+
+    else:
+        for i, seg in zip(range(len(images)), anns):
+            for ann in seg:
+                image = images[i]
+                if ann['iscrowd'] == 1 or ann['area'] < 1500 or ann['area'] > 17000 or image.ndim == 2:
+                    continue
+
+                else:
+                    bbox = ann['bbox']
+                    crop_y = int(bbox[0])
+                    crop_x = int(bbox[1])
+                    height = int(bbox[2]/2)
+                    width = int(bbox[3]/2)
+
+                    if custom_size:
+                        crop_width = int(shape_0/2)
+                        crop_height = int(shape_1/2)
+                    else:
+                        crop_height = int(bbox[2]/2)
+                        crop_width = int(bbox[3]/2)
+
+                    start_x = crop_x + width
+                    start_y = crop_y + height
+
+                    if start_x - crop_width < 0:
+                        crop_width = start_x
+                    if start_y - crop_height < 0:
+                        crop_height = start_y
+
+                    if start_x + crop_width > image.shape[0]:
+                        crop_width = image.shape[0] - start_x
+                    if start_y + crop_height > image.shape[1]:
+                        crop_height = image.shape[1] - start_y
+
+                    if image.shape == 1 or image.shape == 0 or crop_width*2 > shape_0 or crop_height*2 > shape_1:
+                        continue
+
+                    elif len(image.shape) == 2:
+                        img_cropped = image[start_x - crop_width:start_x + crop_width,
+                                            start_y - crop_height:start_y + crop_height]
+                        img_cropped = np.expand_dims(img_cropped, axis=2)
+                    else:
+                        img_cropped = image[start_x - crop_width:start_x + crop_width,
+                                            start_y - crop_height:start_y + crop_height,
+                                            :]
+
+                    if img_cropped.shape[0] != shape_0 or img_cropped.shape[1] != shape_1:
+                        img_cropped = cut_to_size([img_cropped], shape_0, shape_1)[0]
+
+                    org_images.append(image)
+                    cropped_images.append(img_cropped)
+                    bboxes.append(bbox)
+
+    return org_images, cropped_images, bboxes
+
+
+# crop function for evaluation
+# used to crop the ground truth masks correctly
+# returns the cropped ground truth masks
+def get_cropped_eval(masks, anns, shape_0, shape_1):
+
+    cropped_masks = []
+
+    i = 0
+    for seg in anns:
+        for ann in seg:
+            if ann['iscrowd'] == 1 or ann['area'] < 1500 or ann['area'] > 17000:
+                continue
+
+            else:
+                mask = masks[i]
+                bbox = ann['bbox']
+                crop_y = int(bbox[0])
+                crop_x = int(bbox[1])
+                height = int(bbox[2]/2)
+                width = int(bbox[3]/2)
+
+                crop_width = int(shape_0/2)
+                crop_height = int(shape_1/2)
+
+                start_x = crop_x + width
+                start_y = crop_y + height
+
+                if start_x - crop_width < 0:
+                    crop_width = start_x
+                if start_y - crop_height < 0:
+                    crop_height = start_y
+
+                if start_x + crop_width > mask.shape[0]:
+                    crop_width = mask.shape[0] - start_x
+                if start_y + crop_height > mask.shape[1]:
+                    crop_height = mask.shape[1] - start_y
+
+                if mask.shape == 1 or mask.shape == 0 or crop_width * 2 > shape_0 or crop_height * 2 > shape_1:
+                    continue
+
+                mask_cropped = mask[start_x - crop_width:start_x + crop_width,
+                                    start_y - crop_height:start_y + crop_height]
+
+                if mask_cropped.shape[0] != shape_0 or mask_cropped.shape[1] != shape_1:
+                    mask_cropped = cut_to_size([mask_cropped], shape_0, shape_1)[0]
+
+                cropped_masks.append(mask_cropped)
+                i += 1
+
+    return cropped_masks
+
+
+# Get all relevant images for training according to category
+# set category at the top of the file
+def get_image_and_anns(dataset, batch_size, category, noise=False, blur=False):
+
+    images = []
+    image_anns = []
+
+    cat_ids = dataset.getCatIds(catNms=[category])
+    img_ids = dataset.getImgIds(catIds=cat_ids)
+
+    for i in range(batch_size):
+
+        img = dataset.loadImgs(img_ids[np.random.randint(0, len(img_ids))])[0]
+        image = (skimage.io.imread(img['coco_url'], as_gray=False)/127.5) - 1
+
+        ann_ids = dataset.getAnnIds(imgIds=img['id'], catIds=cat_ids, iscrowd=None)
+        anns = dataset.loadAnns(ids=ann_ids)
+
+        if np.any(noise):
+            noise = np.random.normal(0, 1, image.shape)
+            noise = noise.reshape(image.shape)
+            image = image + 0.5*noise
+
+        if np.any(blur):
+            image = ndimage.gaussian_filter(image, sigma=2)
+
+        images.append(image)
+        image_anns.append(anns)
+
+    return images, image_anns
+
+
+# save picture to SAVE_DIRECTORY
+# set the path at the top of the page
+def save_image(image, file_name='example', eval_directory=False):
+
+    if image.shape[0] == 1:
+        image = np.squeeze(image, axis=0)
+    if image.ndim == 3:
+        if image.shape[2] == 1:
+            image = np.squeeze(image, axis=2)
+
+    plt.imshow(image)
+    plt.axis('off')
+    if eval_directory:
+        plt.savefig(EVAL_DIRECTORY + file_name)
+    else:
+        plt.savefig(SAVE_DIRECTORY + file_name)
+    print('%s saved' % file_name)
+
+
+# Get usable data_annotations from the val_dataset of COCO, saved at VAL_PATH
+# Path variable at the top of the page
+def get_val_dataset():
+    val_coco = COCO(VAL_PATH)
+    return val_coco
+
+
+# Get usable data_annotations from the train_dataset of COCO, saved at TRAIN_PATH
+# Path variable at the top of the page
+def get_train_dataset():
+    train_coco = COCO(TRAIN_PATH)
+    return train_coco