Skip to content

Commit

Permalink
[added]Impute functions for weight data
Browse files Browse the repository at this point in the history
  • Loading branch information
Pete authored and Pete committed Apr 4, 2014
1 parent 39d4ffa commit fe85b25
Show file tree
Hide file tree
Showing 27 changed files with 515,846 additions and 0 deletions.
12 changes: 12 additions & 0 deletions DataScienceIntro/DataWrangling/csv_with_describe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/usr/bin/python

import pandas
import pprint
def add_full_name(path_to_csv, path_to_new_csv):
df = pandas.read_csv(path_to_csv)
#print df['nameFirst']
#df['nameFull'] = df['nameFirst'] + " " + df['nameLast']
print df.describe()
print "hi"

add_full_name("test.csv", "out.csv")
32 changes: 32 additions & 0 deletions DataScienceIntro/DataWrangling/dw_impute_08.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/usr/bin/python

from pandas import *
import numpy

def imputation(filename):
# Pandas dataframes have a method called 'fillna(value)', such that you can
# pass in a single value to replace any NAs in a dataframe or series. You
# can call it like this:
# dataframe['column'] = dataframe['column'].fillna(value)
#
# Using the numpy.mean function, which calculates the mean of a numpy
# array, impute any missing values in our Lahman baseball
# data sets 'weight' column by setting them equal to the average weight.
#
# You can access the 'weight' colum in the baseball data frame by
# calling baseball['weight']

baseball = pandas.read_csv(filename)
wmean = numpy.mean(baseball['weight'])

#YOUR CODE GOES HERE
baseball['weight'] = baseball['weight'].fillna(wmean)
return baseball





values = imputation("../files/Master.csv")

print values.describe()
Loading

0 comments on commit fe85b25

Please sign in to comment.