[added]Impute functions for weight data

neoconstantine7 · Apr 4, 2014 · fe85b25 · fe85b25
1 parent 39d4ffa
commit fe85b25
Show file tree

Hide file tree

Showing 27 changed files with 515,846 additions and 0 deletions.
diff --git a/DataScienceIntro/DataWrangling/csv_with_describe.py b/DataScienceIntro/DataWrangling/csv_with_describe.py
@@ -0,0 +1,12 @@
+#!/usr/bin/python
+
+import pandas
+import pprint
+def add_full_name(path_to_csv, path_to_new_csv):
+    df = pandas.read_csv(path_to_csv)
+    #print df['nameFirst']
+    #df['nameFull'] = df['nameFirst'] + " " + df['nameLast']
+    print df.describe()
+print "hi"
+
+add_full_name("test.csv", "out.csv")
diff --git a/DataScienceIntro/DataWrangling/dw_impute_08.py b/DataScienceIntro/DataWrangling/dw_impute_08.py
@@ -0,0 +1,32 @@
+#!/usr/bin/python
+
+from pandas import *
+import numpy
+
+def imputation(filename):
+    # Pandas dataframes have a method called 'fillna(value)', such that you can
+    # pass in a single value to replace any NAs in a dataframe or series. You
+    # can call it like this: 
+    #     dataframe['column'] = dataframe['column'].fillna(value)
+    #
+    # Using the numpy.mean function, which calculates the mean of a numpy
+    # array, impute any missing values in our Lahman baseball
+    # data sets 'weight' column by setting them equal to the average weight.
+    # 
+    # You can access the 'weight' colum in the baseball data frame by
+    # calling baseball['weight']
+
+    baseball = pandas.read_csv(filename)
+    wmean = numpy.mean(baseball['weight'])   
+
+    #YOUR CODE GOES HERE
+    baseball['weight'] = baseball['weight'].fillna(wmean)
+    return baseball
+
+
+
+
+
+values = imputation("../files/Master.csv")
+
+print values.describe()