forked from prabhjotSL/Udacity
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[added]Impute functions for weight data
- Loading branch information
Pete
authored and
Pete
committed
Apr 4, 2014
1 parent
39d4ffa
commit fe85b25
Showing
27 changed files
with
515,846 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
#!/usr/bin/python | ||
|
||
import pandas | ||
import pprint | ||
def add_full_name(path_to_csv, path_to_new_csv): | ||
df = pandas.read_csv(path_to_csv) | ||
#print df['nameFirst'] | ||
#df['nameFull'] = df['nameFirst'] + " " + df['nameLast'] | ||
print df.describe() | ||
print "hi" | ||
|
||
add_full_name("test.csv", "out.csv") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
#!/usr/bin/python | ||
|
||
from pandas import * | ||
import numpy | ||
|
||
def imputation(filename): | ||
# Pandas dataframes have a method called 'fillna(value)', such that you can | ||
# pass in a single value to replace any NAs in a dataframe or series. You | ||
# can call it like this: | ||
# dataframe['column'] = dataframe['column'].fillna(value) | ||
# | ||
# Using the numpy.mean function, which calculates the mean of a numpy | ||
# array, impute any missing values in our Lahman baseball | ||
# data sets 'weight' column by setting them equal to the average weight. | ||
# | ||
# You can access the 'weight' colum in the baseball data frame by | ||
# calling baseball['weight'] | ||
|
||
baseball = pandas.read_csv(filename) | ||
wmean = numpy.mean(baseball['weight']) | ||
|
||
#YOUR CODE GOES HERE | ||
baseball['weight'] = baseball['weight'].fillna(wmean) | ||
return baseball | ||
|
||
|
||
|
||
|
||
|
||
values = imputation("../files/Master.csv") | ||
|
||
print values.describe() |
Oops, something went wrong.