diff --git a/app.py b/app.py index 855ab97..f90b0e9 100644 --- a/app.py +++ b/app.py @@ -29,7 +29,8 @@ selected_structure = st.selectbox("Choose data structure to analyse", ("Structured data", "Unstructured, text data", - "Unstructured, audio data")) + "Unstructured, audio data", + "Unstructured, image data")) if selected_structure == "Structured data": @@ -41,12 +42,16 @@ from text_eda.text_data import * text_data_app() - elif selected_structure == "Unstructured, audio data": from audio_eda.audio_data import * audio_data_app() + elif selected_structure == "Unstructured, image data": + + from image_eda.image_data import * + image_data_app() + except KeyError: st.error("Please select a key value from the dropdown to continue.") diff --git a/helper_functions.py b/helper_functions.py index 34defe0..cd0d439 100644 --- a/helper_functions.py +++ b/helper_functions.py @@ -6,6 +6,12 @@ from PIL import Image import pandas as pd +import io +from PIL import Image +from pprint import pprint +from zipfile import ZipFile +from image_eda.augment import apply_augmentations + def set_bg_hack(main_bg): ''' A function to unpack an image from root folder and set as bg. @@ -173,4 +179,67 @@ def check_input_method(data_input_mthd): st.session_state.is_batch_process = True st.session_state.is_file_uploaded = True - return df,st.session_state.txt \ No newline at end of file + return df,st.session_state.txt + +def load_images(): + + data = st.sidebar.file_uploader("Upload image dataset", + type=['png', 'jpg', 'jpeg'], + accept_multiple_files=True) + + if data: + images = [] + augmentations = get_augmentations() + for image_file in data: + file_details = {"None": None, "File name":image_file.name, "File type":image_file.type, "File size":image_file.size} + image = Image.open(image_file) + images.append((file_details, image)) + + images = apply_augmentations(images, augmentations) + return images + +def _get_default_augmentations() -> dict: + augmentations = { + 'resize': { + 'width': None, + 'height': None + }, + 'grayscale': False, + 'contrast': {'value':None}, + 'brightness': {'value':None}, + 'sharpness': {'value':None}, + 'color': {'value':None}, + 'denoise': False, + } + return augmentations + +def get_augmentations() -> dict: + if 'augmentations' not in st.session_state: + st.session_state.augmentations = _get_default_augmentations() + return st.session_state.augmentations + +def update_augmentations(augmentations) -> None: + st.session_state.augmentations = augmentations + +def _file_process_in_memory(images): + """ Converts PIL image objects into BytesIO in-memory bytes buffers. """ + new_images = [] + for image_name, pil_image in images: + file_object = io.BytesIO() + pil_image.save(file_object, "PNG") + pil_image.close() + new_images.append((image_name, file_object)) + + return new_images + +def export(images): + images = _file_process_in_memory(images) + + # Create an in-memory zip file from the in-memory image file data. + zip_file_bytes_io = io.BytesIO() + + with ZipFile(zip_file_bytes_io, 'w') as zip_file: + for image_name, bytes_stream in images: + zip_file.writestr(image_name["File name"]+".png", bytes_stream.getvalue()) + name = st.sidebar.text_input("File name", value="My augmented dataset") + st.sidebar.download_button('Download Zip', zip_file_bytes_io.getvalue(), file_name=f'{name}.zip') \ No newline at end of file diff --git a/image_eda/augment.py b/image_eda/augment.py new file mode 100644 index 0000000..fe220cb --- /dev/null +++ b/image_eda/augment.py @@ -0,0 +1,28 @@ +import streamlit as st +from PIL import Image, ImageEnhance +import cv2 +import numpy as np + +def apply_augmentations(images, augmentations) -> list: + new_images = [] + for details, image in images: + new_image = image + if augmentations["resize"]["width"] is not None and augmentations["resize"]["height"] is not None: + new_image = new_image.resize((augmentations["resize"]["width"], augmentations["resize"]["height"]), Image.ANTIALIAS) + if augmentations["grayscale"] == True: + new_image = new_image.convert('L') + if augmentations["contrast"]["value"] is not None: + new_image = ImageEnhance.Contrast(new_image).enhance(augmentations["contrast"]["value"]) + if augmentations["brightness"]["value"] is not None: + new_image = ImageEnhance.Brightness(new_image).enhance(augmentations["brightness"]["value"]) + if augmentations["sharpness"]["value"] is not None: + new_image = ImageEnhance.Sharpness(new_image).enhance(augmentations["sharpness"]["value"]) + if augmentations["color"]["value"] is not None: + new_image = ImageEnhance.Color(new_image).enhance(augmentations["color"]["value"]) + if augmentations["denoise"] == True: + if len(new_image.split()) != 3: + new_image = Image.fromarray(cv2.fastNlMeansDenoising(np.array(new_image))) + else: + new_image = Image.fromarray(cv2.fastNlMeansDenoisingColored(np.array(new_image))) + new_images.append((details, new_image)) + return new_images \ No newline at end of file diff --git a/image_eda/image_data.py b/image_eda/image_data.py new file mode 100644 index 0000000..2cf738a --- /dev/null +++ b/image_eda/image_data.py @@ -0,0 +1,73 @@ +""" + +A file with the high level image eda app functions + +""" +import streamlit as st +from helper_functions import * +from image_eda.preparation import show_grid, show_sizes, show_histograms, show_channels, augmentations, export_images + + +def image_data_app(): + + st.write("Welcome to the DQW for unsupervised image data. ", + "Understanding your data is an important step ", + "in AI model development. This app ", + "offers visualisation of descriptive statistics of a ", + "collection of images using a multitude of packages.") + + display_app_header(main_txt = "Step 1", + sub_txt= "Upload data", + is_sidebar=True) + + images = load_images() + + if images: + + display_app_header(main_txt = "Step 2", + sub_txt= "Choose what you want to see/do", + is_sidebar=True) + + selected_structure = st.sidebar.radio( + "", + ("Image grid", + "Image sizes", + "Color histograms", + "Color channels", + "Augmentations")) + + if selected_structure == "Image grid": + show_grid(images) + elif selected_structure == "Image sizes": + show_sizes(images) + elif selected_structure == "Color histograms": + show_histograms(images) + elif selected_structure == "Color channels": + show_channels(images) + elif selected_structure == "Augmentations": + augmentations(images) + + display_app_header(main_txt = "Step 3", + sub_txt= "Export augmented data", + is_sidebar=True) + + is_export = st.sidebar.button("Export ⬇️") + if is_export: + export_images(images) + + + + # prompt the user with an option to select which data they want to + # analyse - save code, right now we only have 1 option + #selected_structure = st.selectbox("Choose data structure to analyse", + #("Unsupervised image data", + #"Supervised image data, Classification", + #"Supervised image data, Segmentation", + #"Supervised image data, Regression")) + + #elif selected_structure == "Supervised image data, Classification": + #st.error("This feature has not been implemented yet!") + #elif selected_structure == "Supervised image data, Segmentation": + #st.error("This feature has not been implemented yet!") + #elif selected_structure == "Supervised image data, Regression": + #st.error("This feature has not been implemented yet!") \ No newline at end of file diff --git a/image_eda/preparation.py b/image_eda/preparation.py new file mode 100644 index 0000000..addfc60 --- /dev/null +++ b/image_eda/preparation.py @@ -0,0 +1,249 @@ +from numpy.core.fromnumeric import sort +import streamlit as st +import numpy as np +import pandas as pd +from helper_functions import get_augmentations, update_augmentations, export + +def show_sizes(images): + image_sizes: np.ndarray = np.array([x[1].size for x in images]).astype(int) + sizes = pd.DataFrame({ + "width":[x[0] for x in image_sizes], + "height":[x[1] for x in image_sizes] + }) + sizes = sizes.groupby(['width','height']).size().reset_index().rename(columns={0:'counts'}) + st.vega_lite_chart(sizes, { + 'mark': {'type': 'circle', 'tooltip': True}, + 'encoding': { + 'x': {'field': 'width', 'type': 'quantitative'}, + 'y': {'field': 'height', 'type': 'quantitative'}, + 'size': {'field': 'counts', 'type': 'quantitative'}, + 'color': {'field': 'counts', 'type': 'quantitative'}, + }, + }, use_container_width = True) + +def show_grid(images): + images_per_row = st.slider("The number of images per row", step = 1, value = 4, min_value = 1, max_value = 8) + caption_type = st.selectbox('Select image caption', ('None', 'File type', 'File name', 'File size')) + st.subheader('A preview of the images is displayed below, please wait for data to be analysed :bar_chart:') + n_rows = len(images) / images_per_row + n_rows = int(np.ceil(n_rows)) + for row_num in range(n_rows): + cols = st.columns(images_per_row) + start = row_num*images_per_row + end = start + images_per_row + if end > len(images): + end = len(images) + for col, (detail, image) in zip(cols, images[start:end]): + col.image(image, use_column_width=True, caption =detail[caption_type]) + +def show_histograms(images): + for _, image in images: + channels = image.split() + + colors = ['Red', 'Green', 'Blue'] + if len(channels) == 1: + colors = ['Gray'] + cols = st.columns(2) + cols[0].image(image, use_column_width=True) + colors_df = None + for channel, color in zip(channels, colors): + data = {} + data['pixel'] = np.asarray(channel).ravel() + # data['pixel'] = np.around((data['pixel'] - data['pixel'].min()) / (data['pixel'].max() - data['pixel'].min()), decimals = 2) + df = pd.DataFrame(data) + df = df.groupby('pixel').size().reset_index().rename(columns={0:'counts'}) + df['color'] = color + if colors_df is None: + colors_df = df + else: + colors_df = pd.concat([colors_df, df], ignore_index=True) + + cols[1].vega_lite_chart(colors_df, { + 'title': f'Pixel intensity histogram for color channel(s)', + 'mark': {'type': 'bar', "cornerRadiusTopLeft": 3, "cornerRadiusTopRight": 3}, + 'encoding': { + 'x': {'field': 'pixel', 'type': 'quantitative', 'title': 'Pixel value'}, + 'y': {"field": "counts", 'type': 'quantitative', 'title': 'Number of pixels'}, + 'color': {'field': 'color', "type": "nominal", "scale": {"domain": colors, "range": colors, "type": "ordinal"}} + }, + }, use_container_width = True) + +def show_channels(images): + for _, image in images: + channels = image.split() + + colors = ['Red', 'Green', 'Blue'] + if len(channels) == 1: + colors = ['Gray'] + cols = st.columns(len(colors) + 1) + cols[0].image(image, use_column_width=True) + for col, channel, color in zip(cols[1:], channels, colors): + channel = np.asarray(channel) + col.image(channel, use_column_width=True, caption = f'{color} channel') + +def augmentations(images): + augmentation = st.selectbox("Choose augmentation method", + ("Resize", + "Grayscale", + "Contrast enhancement", + "Brightness enhancement", + "Sharpness enhancement", + "Color enhancement", + "Denoise")) + augmentations = get_augmentations() + apply_text = 'Apply ▶️' + revert_text = 'Revert ◀️' + eda = 'EDA 📊' + + # resize section -------------------------------------- + if augmentation == "Resize": + new_width = st.number_input('Image width', min_value=32, max_value=2048, value=300) + new_height = st.number_input('Image height', min_value=32, max_value=2048, value=200) + col1, col2, col3 = st.columns([1, 1, 1]) + with col1: + is_applied = st.button(apply_text) + if is_applied: + augmentations["resize"]["width"] = new_width + augmentations["resize"]["height"] = new_height + update_augmentations(augmentations) + st.success("Succesfully resized images") + with col2: + is_eda = st.button(eda) + if is_eda: + show_eda(images) + if augmentations["resize"]["width"] is not None and augmentations["resize"]["height"] is not None: + with col3: + is_reverted = st.button(revert_text) + if is_reverted: + augmentations["resize"]["width"] = None + augmentations["resize"]["height"] = None + update_augmentations(augmentations) + # grayscale section ------------------------------------- + elif augmentation == "Grayscale": + col1, col2, col3 = st.columns([1, 1, 1]) + with col1: + is_applied = st.button(apply_text) + if is_applied: + augmentations["grayscale"] = True + update_augmentations(augmentations) + st.success("Succesfully converted images to grayscale") + with col2: + is_eda = st.button(eda) + if is_eda: + show_eda(images) + if augmentations["grayscale"] == True: + with col3: + is_reverted = st.button(revert_text) + if is_reverted: + augmentations["grayscale"] = False + update_augmentations(augmentations) + # contrast section --------------------------------------- + elif augmentation == "Contrast enhancement": + value = st.slider('Contrast level', min_value=0.1, max_value=5., value=0.5) + col1, col2, col3 = st.columns([1, 1, 1]) + with col1: + is_applied = st.button(apply_text) + if is_applied: + augmentations["contrast"]["value"] = value + update_augmentations(augmentations) + st.success("Succesfully enhanced contrast of images") + with col2: + is_eda = st.button(eda) + if is_eda: + show_eda(images) + if augmentations["contrast"]["value"] is not None: + with col3: + is_reverted = st.button(revert_text) + if is_reverted: + augmentations["contrast"]["value"] = None + update_augmentations(augmentations) + elif augmentation == "Brightness enhancement": + value = st.slider('Brightness level', min_value=0.1, max_value=5., value=1.5) + col1, col2, col3 = st.columns([1, 1, 1]) + with col1: + is_applied = st.button(apply_text) + if is_applied: + augmentations["brightness"]["value"] = value + update_augmentations(augmentations) + st.success("Succesfully enhanced brightness of images") + with col2: + is_eda = st.button(eda) + if is_eda: + show_eda(images) + if augmentations["brightness"]["value"] is not None: + with col3: + is_reverted = st.button(revert_text) + if is_reverted: + augmentations["brightness"]["value"] = None + update_augmentations(augmentations) + # sharpness section --------------------------------------------- + elif augmentation == "Sharpness enhancement": + value = st.slider('Sharpness level', min_value=0.1, max_value=5., value=2.) + col1, col2, col3 = st.columns([1, 1, 1]) + with col1: + is_applied = st.button(apply_text) + if is_applied: + augmentations["sharpness"]["value"] = value + update_augmentations(augmentations) + st.success("Succesfully enhanced sharpness of images") + with col2: + is_eda = st.button(eda) + if is_eda: + show_eda(images) + if augmentations["sharpness"]["value"] is not None: + with col3: + is_reverted = st.button(revert_text) + if is_reverted: + augmentations["sharpness"]["value"] = None + update_augmentations(augmentations) + # color enhancement ------------------------------------------------ + elif augmentation == "Color enhancement": + value = st.slider('Color level', min_value=0.1, max_value=5., value=2.) + col1, col2, col3 = st.columns([1, 1, 1]) + with col1: + is_applied = st.button(apply_text) + if is_applied: + augmentations["color"]["value"] = value + update_augmentations(augmentations) + st.success("Succesfully enhanced color of images") + with col2: + is_eda = st.button(eda) + if is_eda: + show_eda(images) + if augmentations["color"]["value"] is not None: + with col3: + is_reverted = st.button(revert_text) + if is_reverted: + augmentations["color"]["value"] = None + update_augmentations(augmentations) + # denoise section --------------------------------------------- + elif augmentation == "Denoise": + col1, col2, col3 = st.columns([1, 1, 1]) + with col1: + is_applied = st.button(apply_text) + if is_applied: + augmentations["denoise"] = True + update_augmentations(augmentations) + st.success("Succesfully denoised images") + with col2: + is_eda = st.button(eda) + if is_eda: + show_eda(images) + if augmentations["denoise"] is not None: + with col3: + is_reverted = st.button(revert_text) + if is_reverted: + augmentations["denoise"] = False + update_augmentations(augmentations) + +def export_images(images): + try: + export(images) + except Exception as e: + print(e) + +def show_eda(images): + show_grid(images) + show_sizes(images) + show_histograms(images) + show_channels(images) \ No newline at end of file diff --git a/image_eda/side_menu.py b/image_eda/side_menu.py new file mode 100644 index 0000000..c475df9 --- /dev/null +++ b/image_eda/side_menu.py @@ -0,0 +1,16 @@ +import streamlit as st + +def create_menu(menu_options:list=["Preparation","Selection","Description"]): + + sideb = st.sidebar + sideb.write("Choose one of the options and get to know your data even better!") + # menu_options = ["Grid view","Preparation","Selection","Description"] + menu_buttons = [] + for option in menu_options: + menu_buttons.append(sideb.button(option)) + + for button, option in zip(menu_buttons, menu_options): + if button == True: + return option + + # userid=st.sidebar.radio("Choose your user ID",("Grid view","Preparation","Selection","Description")) diff --git a/requirements.txt b/requirements.txt index 5825eb6..28723ea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,4 +21,5 @@ wordcloud==1.8.1 tqdm==4.54.1 audiomentations pydub -dtw \ No newline at end of file +dtw +textstat \ No newline at end of file