From b01c4385b442d39fa491652fd8c0c2fc282917ac Mon Sep 17 00:00:00 2001 From: Jonathan Date: Sat, 19 Jan 2019 14:33:52 +0100 Subject: [PATCH] Keras notebook --- notebooks/CLODSA_Keras.ipynb | 582 +++++++++++++++++++++++++++++++++++ 1 file changed, 582 insertions(+) create mode 100644 notebooks/CLODSA_Keras.ipynb diff --git a/notebooks/CLODSA_Keras.ipynb b/notebooks/CLODSA_Keras.ipynb new file mode 100644 index 0000000..62dd68e --- /dev/null +++ b/notebooks/CLODSA_Keras.ipynb @@ -0,0 +1,582 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "view-in-github" + }, + "source": [ + "[View in Colaboratory](https://colab.research.google.com/github/joheras/CLoDSA/blob/master/notebooks/CLODSA_Keras.ipynb)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "lLTwDWw0ni8G" + }, + "source": [ + "# Online augmentation for Keras\n", + "\n", + "In this notebook, we illustrate how CLODSA can be employed for online augmentation in Keras using a subset of the [cats and dogs dataset](https://www.kaggle.com/c/dogs-vs-cats). Such a subset can be downloaded using the following command." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 4024 + }, + "colab_type": "code", + "id": "FbGwos-bni8K", + "outputId": "3151302c-730f-4c18-d751-3dd36c4ed51d" + }, + "outputs": [], + "source": [ + "!wget https://www.dropbox.com/s/dvqk6ukk3sli72t/cats_dogs.zip?dl=0 -O cats_dogs.zip\n", + "!unzip cats_dogs.zip" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "FmaoZcYXni8Y" + }, + "source": [ + "We can check the amount of images in each one of the folders." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 126 + }, + "colab_type": "code", + "id": "yXoVkFkqni8a", + "outputId": "55518f50-f4a3-4273-e5a5-dec4c2bd51fe" + }, + "outputs": [], + "source": [ + "print(\"Number of images in cats folder\")\n", + "!ls cats_dogs/cats/ | wc -l\n", + "print(\"Number of images in dogs folder\")\n", + "!ls cats_dogs/dogs/ | wc -l" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "LMaT8vnzni8i" + }, + "source": [ + "## Augmentation techniques\n", + "\n", + "We will use the following techniques for online data augmentation:\n", + "- Vertical and horizontal flip.\n", + "- Equalize histogram\n", + "- Median blur\n", + "- Salt and pepper noise\n", + "- Crops\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "MwG_kg6Tni8k" + }, + "source": [ + "## Installing the necessary libraries\n", + "\n", + "In case that CLODSA is not installed in your system, the first task consists in installing it using ``pip``." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 617 + }, + "colab_type": "code", + "id": "z7qMC41Xni8k", + "outputId": "51dca5c0-72ec-4fe2-cdce-81a4fb5c5873" + }, + "outputs": [], + "source": [ + "!pip install clodsa" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "EoxKQxZJni8q" + }, + "source": [ + "## Loading the necessary libraries\n", + "\n", + "The first step in the pipeline consists in loading the necessary libraries to apply the data augmentation techniques in CLODSA and to train a Keras model. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "colab_type": "code", + "id": "yzMR_9kBni8s", + "outputId": "16fe5d55-95d6-4329-ad41-a55aa19afdb3" + }, + "outputs": [], + "source": [ + "from matplotlib import pyplot as plt\n", + "from clodsa.augmentors.augmentorFactory import createAugmentor\n", + "from clodsa.transformers.transformerFactory import transformerGenerator\n", + "from clodsa.techniques.techniqueFactory import createTechnique\n", + "from keras.optimizers import SGD\n", + "from clodsa.utils.minivgg import MiniVGGNet\n", + "import cv2\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "85NJIbHRni8y" + }, + "source": [ + "## Creating the augmentor object\n", + "\n", + "As explained in the documentation of CLODSA, we need to specify some parameters for the augmentation process, and use them to create an augmentor object. \n", + "\n", + "_The kind of problem_. In this case, we are working in a classification problem" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "udjKKpIMni80" + }, + "outputs": [], + "source": [ + "PROBLEM = \"classification\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "AYs7FXmFni86" + }, + "source": [ + "_The annotation mode_. The annotation mode is folders. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "ROmfoBePni88" + }, + "outputs": [], + "source": [ + "ANNOTATION_MODE = \"folders\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "JFmLaCVzni9A" + }, + "source": [ + "_The input path_. The input path containing the images. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "N4qLXr7Hni9E" + }, + "outputs": [], + "source": [ + "INPUT_PATH = \"cats_dogs/\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "SsxyfUVVni9K" + }, + "source": [ + "_The generation mode_. In this case, linear, that is, all the augmentation techniques are applied to all the images of the original dataset. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "KXHMv0Djni9K" + }, + "outputs": [], + "source": [ + "GENERATION_MODE = \"linear\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "P03YdvTTni9Q" + }, + "source": [ + "_The output mode_. The generated images will be provided to train a Keras model. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "hFboWqIcni9Q" + }, + "outputs": [], + "source": [ + "OUTPUT_MODE = \"keras\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We must also fix other parameters." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "PARAMETERS = {\n", + " \"batchSize\": 32,\n", + " \"width\": 64,\n", + " \"height\": 64\n", + " }" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "pG1harcAni9U" + }, + "source": [ + "Using the above information, we can create our augmentor object. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "OH7Xy3rIni9W" + }, + "outputs": [], + "source": [ + "augmentor = createAugmentor(PROBLEM,ANNOTATION_MODE,OUTPUT_MODE,GENERATION_MODE,INPUT_PATH,PARAMETERS)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "3es15zWQni9a" + }, + "source": [ + "## Adding the augmentation techniques\n", + "\n", + "Now, we define the techniques that will be applied in our augmentation process and add them to our augmentor object. To illustrate the transformations, we will use the following image of the dataset. \n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First of all, we must define a transformer generator." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "transformer = transformerGenerator(PROBLEM)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "TyuArGXGni9o" + }, + "source": [ + "#### Vertical flip" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "CG1YIkc4ni9u" + }, + "outputs": [], + "source": [ + "vFlip = createTechnique(\"flip\",{\"flip\":0})\n", + "augmentor.addTransformer(transformer(vFlip))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "6zpB7VSoni9-" + }, + "source": [ + "#### Horizontal flip" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "q6GhHPJxni-A" + }, + "outputs": [], + "source": [ + "hFlip = createTechnique(\"flip\",{\"flip\":1})\n", + "augmentor.addTransformer(transformer(hFlip))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "Z2s6Z_zJni-i" + }, + "source": [ + "#### Crop" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "cEuHL61rni-i" + }, + "outputs": [], + "source": [ + "crop = createTechnique(\"crop\",{\"percentage\":0.9,\"startFrom\": \"TOPLEFT\"})\n", + "augmentor.addTransformer(transformer(crop))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Equalize histogram**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "equalize = createTechnique(\"equalize_histogram\",{})\n", + "augmentor.addTransformer(transformer(equalize))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Median blur**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "median = createTechnique(\"median_blur\",{\"kernel\":3})\n", + "augmentor.addTransformer(transformer(median))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Salt and pepper**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "salt_and_pepper = createTechnique(\"salt_and_pepper\",{\"low\":0,\"high\":255})\n", + "augmentor.addTransformer(transformer(salt_and_pepper))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "QTZLwyr_ni-4" + }, + "source": [ + "#### None\n", + "(to keep also the original image)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "1XnKlw1Vni-4" + }, + "outputs": [], + "source": [ + "none = createTechnique(\"none\",{})\n", + "augmentor.addTransformer(transformer(none))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "nInbOZQHni_A" + }, + "source": [ + "## Definition of the model\n", + "\n", + "Now we define the model using a version of the VGG network." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "SVuonlNLni_A" + }, + "outputs": [], + "source": [ + "opt = SGD(lr=0.05)\n", + "model = MiniVGGNet.build(width=PARAMETERS[\"width\"], height=PARAMETERS[\"height\"], depth=3)\n", + "model.compile(loss=\"categorical_crossentropy\", optimizer=opt,metrics=[\"accuracy\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "hdW54yfhni_C" + }, + "source": [ + "## Training the model\n", + "\n", + "Finally, we can train the model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "yR5dWTCDni_E" + }, + "outputs": [], + "source": [ + "H = model.fit_generator(augmentor.applyAugmentation(),\n", + " steps_per_epoch= PARAMETERS[\"batchSize\"],\n", + " epochs=100, verbose=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "name": "CLODSA_Melanoma.ipynb", + "provenance": [], + "version": "0.3.2" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.7" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +}