diff --git a/009_data_visualization.ipynb b/009_data_visualization.ipynb deleted file mode 100644 index 657dc31..0000000 --- a/009_data_visualization.ipynb +++ /dev/null @@ -1,768 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "009_data-visualization", - "provenance": [], - "collapsed_sections": [], - "toc_visible": true, - "include_colab_link": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, - "source": [ - "\"Open" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "uB9UYPsHYFYY", - "colab_type": "text" - }, - "source": [ - "# Data Visualization" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "UVTHNHhEYSYj", - "colab_type": "text" - }, - "source": [ - "## Import Libraries" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "NdWLfwAEcKJK", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Import Library for Data Manipulation\n", - "import pandas as pd" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "7H6g1punZZcG", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Import Libraries for Visualization\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mxHQEGqOYOoz", - "colab_type": "text" - }, - "source": [ - "## Import Data" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "PeKg5U9hdpUb", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Import Data From an online link (GitHub)\n", - "df = pd.read_csv('https://raw.githubusercontent.com/dianrdn/data/master/suicide_germany2.csv', sep=',',)\n", - "df" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "nkd6N72ZfbwG", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Prints the Mmount of Rows and Column Numbers\n", - "df.shape" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "fZUCcxhlfmJj", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Prints Information About a DataFrame\n", - "df.info()" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "UR51hXJMiYdh", - "colab_type": "text" - }, - "source": [ - "## Distribution Plot" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "nDgoq0uPi0mG", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Set Graph Size\n", - "plt.rcParams['figure.figsize'] = (16, 8)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "v7pawK0DkDAS", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Visualize Distribution\n", - "sns.kdeplot(df['suicides_no'])\n", - "plt.xlabel('Number of Suicides')\n", - "plt.ylabel('Probability Density Function / The Likelihood of Outcome')\n", - "plt.title('Number of Suicide Distribution')" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "ArlZQ-4wiWmI", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Visualize Histogram\n", - "sns.distplot(df['suicides_no'])\n", - "plt.xlabel('Number of Suicides')\n", - "plt.ylabel('Probability Density Function / The Likelihood of Outcome')\n", - "plt.title('Number of Suicide Distribution')" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "air4PeB7jepA", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Set Number of Category\n", - "sns.distplot(df['suicides_no'], bins=50)\n", - "plt.xlabel('Number of Suicides')\n", - "plt.ylabel('Probability Density Function / The Likelihood of Outcome')\n", - "plt.title('Number of Suicide Distribution')" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "uYHu8yy9iUFQ", - "colab_type": "text" - }, - "source": [ - "## Pair Plot" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "s2yk7AIVkit3", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Visualize Pair Plot\n", - "sns.pairplot(df)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "H5goAcLJQPY3", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Visualize Pair Plot with Colors\n", - "sns.pairplot(df, hue='sex')" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "s5vgGq_uodV7", - "colab_type": "text" - }, - "source": [ - "## Bar Plot" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "TFRRadpuonXb", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Draw Basic Bar Plot\n", - "sns.barplot(x='generation', y='suicides_no', data=df)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "vYGJ3fR-pVwO", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Draw a Set of Vertical Bars with Different Colors based on Sex\n", - "sns.barplot(x='generation', y='suicides_no', hue='sex', data=df)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "_kA0UTi8psxP", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Add Error Bars\n", - "sns.barplot(x='generation', y='suicides_no', hue='sex', capsize=.2, data=df)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "9_ue3cdArZCb", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Make Different Plot based on Attribute : Sex\n", - "sns.catplot(x='generation', y='suicides_no', hue='sex', capsize=.2, kind='bar', col='sex', data=df)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "aZ5Tc9mdIsNL", - "colab_type": "text" - }, - "source": [ - "## Line Plot" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "QIZw3FHUu7Cw", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Draw Basic Line Plot\n", - "sns.lineplot(x='year', y='suicides_no', data=df)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "CTwkTYiqI3hS", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Draw Line Plot with Different Colors based on Age\n", - "sns.lineplot(x='year', y='suicides_no', hue='age', palette='hls', data=df)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "GZxwcdO4JGQf", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Make Different Plot based on Attribute : Sex\n", - "sns.relplot(x='year', y='suicides_no', hue='age', palette='hls', col='sex', kind='line', data=df)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "sgDiwGwhiOcY", - "colab_type": "text" - }, - "source": [ - "## Scatter Plot" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "CWmeNua9eh0b", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Basic Scatterplot\n", - "sns.scatterplot(x='year', y='suicides_no', data= df)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "bTW9VX8BnORl", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Show the Groups with Different Colors based on Age\n", - "sns.scatterplot(x='year', y='suicides_no', hue='age', data=df)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "HBtNkfmtba1S", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Show the Grouping Variable by Marker Style based on Sex\n", - "sns.scatterplot(x='year', y='suicides_no', hue='age', style='sex', data=df)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "S3QSBdcl0bD1", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Apply Different Color Palette\n", - "sns.scatterplot(x='year', y='suicides_no', hue='age', style='sex', palette='hls', data=df)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "f_cw4Uev0890", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Set Points Size Proportional to Age\n", - "sns.scatterplot(x='year', y='suicides_no', hue='age', style='sex', palette='hls', size='age', sizes=(20, 200), data=df)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "RDuZHLm8hSPc", - "colab": {} - }, - "source": [ - "# Make Different Plot based on Attribute : Sex\n", - "sns.relplot(x='year', y='suicides_no', hue='age', style='sex', palette='hls', size='age', sizes=(20, 200), col='sex', kind='scatter', data=df)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "d6q-igQyhRAl", - "colab": {} - }, - "source": [ - "# Make Different Plot based on Attribute : Age\n", - "sns.relplot(x='year', y='suicides_no', hue='age', style='sex', palette='hls', size='age', sizes=(20, 200), col='age', kind='scatter', data=df)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HIholpMmUFJX", - "colab_type": "text" - }, - "source": [ - "## Regression Plot" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "DWy9ENw5SZ07", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Draw Regression Plot\n", - "sns.lmplot(x='year', y='suicides_no', data=df)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "GehNRIfRUN-p", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Draw Multiple Regression Plot\n", - "sns.lmplot(x='year', y='suicides_no', hue='sex', data=df)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "h-NwWpBqmw4j", - "colab_type": "text" - }, - "source": [ - "## Joint Plot" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "W0Zf7gIzmklC", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Draw Joint Plot\n", - "sns.jointplot(x='year', y='suicides_no', data=df)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "QnEei5YBvmtg", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Draw Regression Plot\n", - "sns.jointplot(x='year', y='gdp_for_year', data=df, kind='reg', truncate=False)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "RtHcfMLemzk4", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Draw Hexagon Plot\n", - "sns.jointplot(x='year', y='gdp_for_year', kind='hex', data=df)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "VmTsnY2snm6Q", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Draw Kernel Density Estimation Plot\n", - "sns.jointplot(x='year', y='gdp_for_year', kind='kde', data=df)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "KDZ8gc7hrflE", - "colab_type": "text" - }, - "source": [ - "## Box Plot" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "4nRCjxPFr4VF", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Draw Box Plot\n", - "sns.boxplot(x='year', y='suicides_no', data=df)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "ocLH80PRsV3h", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Draw a Set of Box Plot with Different Colors based on Sex\n", - "sns.boxplot(x='year', y='suicides_no', hue='sex', palette='hls', data=df)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "wv7PtgNls8nQ", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Make Different Plot based on Attribute : Sex\n", - "sns.catplot(x='year', y='suicides_no', hue='sex', palette='hls', kind='box', col='sex', data=df)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "aF3KFmT_tzAS" - }, - "source": [ - "## Boxen Plot" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "6qLvT6o1tzAX", - "colab": {} - }, - "source": [ - "# Draw Boxen Plot\n", - "sns.boxenplot(x='year', y='suicides_no', data=df)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "8I0QtrE3tzAd", - "colab": {} - }, - "source": [ - "# Draw a Set of Boxen Plot with Different Colors based on Sex\n", - "sns.boxenplot(x='year', y='suicides_no', hue='sex', palette='hls', data=df)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "BemZqPGatzAg", - "colab": {} - }, - "source": [ - "# Make Different Plot based on Attribute : Sex\n", - "sns.catplot(x='year', y='suicides_no', hue='sex', palette='hls', kind='boxen', col='sex', data=df)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "sUPwtvc_N_Q2", - "colab_type": "text" - }, - "source": [ - "## Violin Plot" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "KOaRR8ghJ3MS", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Draw Basic Violin Plot\n", - "sns.violinplot(x='year', y='suicides_no', split=True, data=df)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "cYC2IohgOFaU", - "colab": {} - }, - "source": [ - "# with Different Colors based on Sex\n", - "sns.violinplot(x='year', y='suicides_no', hue='sex', palette='hls', split=True, data=df)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "e5nJ-xSlOqyg", - "colab": {} - }, - "source": [ - "# Make Different Plot based on Attribute : Sex\n", - "sns.catplot(x='year', y='suicides_no', hue='sex', palette='hls', kind='violin', col='sex', data=df)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "zFLxgT3CSL_W", - "colab_type": "text" - }, - "source": [ - "## Correlation Map" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "pTWT7M4Zk7U5", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Select Only Numeric Attribute\n", - "df_num = df.select_dtypes(include=['float64', 'int64'])" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "x2gewwl9OqkM", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Draw Correlation Map\n", - "sns.clustermap(df_num.corr(), center=0, cmap='vlag', linewidths=.75)" - ], - "execution_count": null, - "outputs": [] - } - ] -} \ No newline at end of file