Pinned Loading
-
Injury-severity-classification
Injury-severity-classification PublicData set and code for the project Injury severity classification
Jupyter Notebook
-
-
MATH6373-Deep-Learning
MATH6373-Deep-Learning PublicForked from dustinak80/Deep_Learning
Neural Networks and Deep Learning
Jupyter Notebook 1
-
MATH6350-Data-Mining
MATH6350-Data-Mining PublicAll homework from the class of Statistics learning and Data mining
HTML
-
MATH6380-Python-for-beginners
MATH6380-Python-for-beginners PublicAll things relate to programming for data analytics
Rich Text Format
-
Reading large csv file from Github
Reading large csv file from Github 1{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Simple import large file.ipynb","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyP7yl2BE+frCzsGndibSg8u"},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.7.7"},"toc":{"base_numbering":1,"nav_menu":{},"number_sections":true,"sideBar":true,"skip_h1_title":false,"title_cell":"Table of Contents","title_sidebar":"Contents","toc_cell":false,"toc_position":{},"toc_section_display":true,"toc_window_display":false}},"cells":[{"cell_type":"markdown","metadata":{"colab_type":"text","id":"qFsT_lEWEwZ7"},"source":["# Reading a large csv/excel file from **GitHub**\n","\n","---\n","\n","\n"]},{"cell_type":"code","metadata":{"colab_type":"code","id":"XdGRPTsvLaXP","colab":{}},"source":["from io import StringIO\n","from io import BytesIO\n","from zipfile import ZipFile\n","import urllib.request\n","import pandas as pd "],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"colab_type":"text","id":"M1_tGqITEtGf"},"source":["# Import url\n"]},{"cell_type":"code","metadata":{"colab_type":"code","id":"c2C-Nfg-kxmg","colab":{}},"source":["link1=\"https://github.com/duonghung86/Vehicle-crash-analysis/raw/master/final%20data.zip\"\n","url = urllib.request.urlopen(link1)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"colab_type":"text","id":"DE9o_qxoE-FF"},"source":["## Read file from url as bytes object\n","\n"]},{"cell_type":"code","metadata":{"colab_type":"code","id":"NR_lbJHzj3I7","colab":{}},"source":["with ZipFile(BytesIO(url.read())) as my_zip_file:\n"," for contained_file in my_zip_file.namelist():\n"," fzip=my_zip_file.open(contained_file)\n"," data=fzip.read()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"colab_type":"code","executionInfo":{"elapsed":2188,"status":"ok","timestamp":1589122588152,"user":{"displayName":"Duong Hung","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Ggfgs4FqFN7Yq3pQXoVHeQcc9w9kDV_fAGoQ3Nx4nk=s64","userId":"01608720095171318435"},"user_tz":300},"id":"cgXDjldoIkio","outputId":"8102198a-30b7-47a6-9981-926324117b8d","colab":{"base_uri":"https://localhost:8080/","height":34}},"source":["# example of the data file\n","data[:100]"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["b'Injury Severity,Crash_Month,Crash_Day,Crash_Minute,Crash_AM/PM,Crash_Hour,Unit_Nbr,Prsn_Age,Toll_Roa'"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"colab_type":"text","id":"GWFlYwrOFo_P"},"source":["# Convert the bytes object to pandas dataframe object\n"]},{"cell_type":"code","metadata":{"colab_type":"code","executionInfo":{"elapsed":7267,"status":"ok","timestamp":1589122593243,"user":{"displayName":"Duong Hung","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Ggfgs4FqFN7Yq3pQXoVHeQcc9w9kDV_fAGoQ3Nx4nk=s64","userId":"01608720095171318435"},"user_tz":300},"id":"QWIVXl4JkMso","outputId":"794aaf45-a1dd-49eb-fb63-6d0ecf98c932","colab":{"base_uri":"https://localhost:8080/","height":372}},"source":["# Convert bytes data to string data\n","s=str(data,'utf-8')\n","\n","data = StringIO(s) \n","\n","# convert it to pandas DataFrame as normal csv file\n","df=pd.read_csv(data)\n","\n","df.tail()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>Injury Severity</th>\n"," <th>Crash_Month</th>\n"," <th>Crash_Day</th>\n"," <th>Crash_Minute</th>\n"," <th>Crash_AM/PM</th>\n"," <th>Crash_Hour</th>\n"," <th>Unit_Nbr</th>\n"," <th>Prsn_Age</th>\n"," <th>Toll_Road_Fl</th>\n"," <th>Crash_Speed_Limit</th>\n"," <th>Road_Constr_Zone_Fl</th>\n"," <th>Road_Constr_Zone_Wrkr_Fl</th>\n"," <th>At_Intrsct_Fl</th>\n"," <th>Latitude</th>\n"," <th>Longitude</th>\n"," <th>Day_of_Week_MON</th>\n"," <th>Day_of_Week_SAT</th>\n"," <th>Day_of_Week_SUN</th>\n"," <th>Day_of_Week_THU</th>\n"," <th>Day_of_Week_TUE</th>\n"," <th>Day_of_Week_WED</th>\n"," <th>Prsn_Type_ID_DRIVER OF MOTORCYCLE TYPE VEHICLE</th>\n"," <th>Prsn_Type_ID_PASSENGER/OCCUPANT</th>\n"," <th>Prsn_Type_ID_PASSENGER/OCCUPANT ON MOTORCYCLE TYPE VEHICLE</th>\n"," <th>Prsn_Type_ID_PEDALCYCLIST</th>\n"," <th>Prsn_Type_ID_PEDESTRIAN</th>\n"," <th>Prsn_Type_ID_UNKNOWN</th>\n"," <th>Prsn_Occpnt_Pos_ID_FRONT CENTER</th>\n"," <th>Prsn_Occpnt_Pos_ID_FRONT LEFT</th>\n"," <th>Prsn_Occpnt_Pos_ID_FRONT RIGHT</th>\n"," <th>Prsn_Occpnt_Pos_ID_OTHER (EXPLAIN IN NARRATIVE)</th>\n"," <th>Prsn_Occpnt_Pos_ID_OTHER IN VEHICLE</th>\n"," <th>Prsn_Occpnt_Pos_ID_OUTSIDE VEHICLE</th>\n"," <th>Prsn_Occpnt_Pos_ID_PASSENGER IN BUS</th>\n"," <th>Prsn_Occpnt_Pos_ID_PEDESTRIAN, PEDALCYCLIST, OR MOTORIZED CONVEYANCE</th>\n"," <th>Prsn_Occpnt_Pos_ID_SECOND SEAT CENTER</th>\n"," <th>Prsn_Occpnt_Pos_ID_SECOND SEAT LEFT</th>\n"," <th>Prsn_Occpnt_Pos_ID_SECOND SEAT RIGHT</th>\n"," <th>Prsn_Occpnt_Pos_ID_THIRD SEAT CENTER</th>\n"," <th>Prsn_Occpnt_Pos_ID_UNKNOWN</th>\n"," <th>...</th>\n"," <th>Light_Cond_ID_DAYLIGHT</th>\n"," <th>Light_Cond_ID_DUSK</th>\n"," <th>Light_Cond_ID_UNKNOWN</th>\n"," <th>Road_Algn_ID_CURVE, HILLCREST</th>\n"," <th>Road_Algn_ID_CURVE, LEVEL</th>\n"," <th>Road_Algn_ID_STRAIGHT, GRADE</th>\n"," <th>Road_Algn_ID_STRAIGHT, HILLCREST</th>\n"," <th>Road_Algn_ID_STRAIGHT, LEVEL</th>\n"," <th>Road_Algn_ID_UNKNOWN</th>\n"," <th>Surf_Cond_ID_ICE</th>\n"," <th>Surf_Cond_ID_SAND, MUD, DIRT</th>\n"," <th>Surf_Cond_ID_SLUSH</th>\n"," <th>Surf_Cond_ID_SNOW</th>\n"," <th>Surf_Cond_ID_STANDING WATER</th>\n"," <th>Surf_Cond_ID_UNKNOWN</th>\n"," <th>Surf_Cond_ID_WET</th>\n"," <th>Traffic_Cntl_ID_CENTER STRIPE/DIVIDER</th>\n"," <th>Traffic_Cntl_ID_CROSSWALK</th>\n"," <th>Traffic_Cntl_ID_FLAGMAN</th>\n"," <th>Traffic_Cntl_ID_FLASHING RED LIGHT</th>\n"," <th>Traffic_Cntl_ID_FLASHING YELLOW LIGHT</th>\n"," <th>Traffic_Cntl_ID_INOPERATIVE (EXPLAIN IN NARRATIVE)</th>\n"," <th>Traffic_Cntl_ID_MARKED LANES</th>\n"," <th>Traffic_Cntl_ID_NO PASSING ZONE</th>\n"," <th>Traffic_Cntl_ID_NONE</th>\n"," <th>Traffic_Cntl_ID_OFFICER</th>\n"," <th>Traffic_Cntl_ID_OTHER (EXPLAIN IN NARRATIVE)</th>\n"," <th>Traffic_Cntl_ID_RR GATE/SIGNAL</th>\n"," <th>Traffic_Cntl_ID_SIGNAL LIGHT</th>\n"," <th>Traffic_Cntl_ID_SIGNAL LIGHT WITH RED LIGHT RUNNING CAMERA</th>\n"," <th>Traffic_Cntl_ID_STOP SIGN</th>\n"," <th>Traffic_Cntl_ID_WARNING SIGN</th>\n"," <th>Traffic_Cntl_ID_YIELD SIGN</th>\n"," <th>Unit_Desc_ID_MOTORIZED CONVEYANCE</th>\n"," <th>Unit_Desc_ID_NON-CONTACT</th>\n"," <th>Unit_Desc_ID_OTHER (EXPLAIN IN NARRATIVE)</th>\n"," <th>Unit_Desc_ID_PEDALCYCLIST</th>\n"," <th>Unit_Desc_ID_PEDESTRIAN</th>\n"," <th>Unit_Desc_ID_TOWED/PUSHED/TRAILER</th>\n"," <th>Unit_Desc_ID_TRAIN</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>488844</th>\n"," <td>NOT INJURED</td>\n"," <td>11</td>\n"," <td>29</td>\n"," <td>13</td>\n"," <td>0</td>\n"," <td>11</td>\n"," <td>1</td>\n"," <td>68</td>\n"," <td>0</td>\n"," <td>70</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>28.954878</td>\n"," <td>-97.987513</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>...</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>488845</th>\n"," <td>NOT INJURED</td>\n"," <td>11</td>\n"," <td>25</td>\n"," <td>6</td>\n"," <td>0</td>\n"," <td>11</td>\n"," <td>1</td>\n"," <td>44</td>\n"," <td>0</td>\n"," <td>55</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>32.756880</td>\n"," <td>-94.354907</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>...</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>488846</th>\n"," <td>POSSIBLE INJURY</td>\n"," <td>11</td>\n"," <td>25</td>\n"," <td>6</td>\n"," <td>0</td>\n"," <td>11</td>\n"," <td>2</td>\n"," <td>57</td>\n"," <td>0</td>\n"," <td>55</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>32.756880</td>\n"," <td>-94.354907</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>...</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>488847</th>\n"," <td>NOT INJURED</td>\n"," <td>9</td>\n"," <td>3</td>\n"," <td>15</td>\n"," <td>1</td>\n"," <td>4</td>\n"," <td>1</td>\n"," <td>16</td>\n"," <td>0</td>\n"," <td>40</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>31.279209</td>\n"," <td>-94.579816</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>...</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>488848</th>\n"," <td>NOT INJURED</td>\n"," <td>9</td>\n"," <td>3</td>\n"," <td>15</td>\n"," <td>1</td>\n"," <td>4</td>\n"," <td>2</td>\n"," <td>16</td>\n"," <td>0</td>\n"," <td>40</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>31.279209</td>\n"," <td>-94.579816</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>...</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>5 rows × 134 columns</p>\n","</div>"],"text/plain":[" Injury Severity ... Unit_Desc_ID_TRAIN\n","488844 NOT INJURED ... 0\n","488845 NOT INJURED ... 0\n","488846 POSSIBLE INJURY ... 0\n","488847 NOT INJURED ... 0\n","488848 NOT INJURED ... 0\n","\n","[5 rows x 134 columns]"]},"metadata":{"tags":[]},"execution_count":5}]}]}
Something went wrong, please refresh the page to try again.
If the problem persists, check the GitHub status page or contact support.
If the problem persists, check the GitHub status page or contact support.