Skip to content

Commit

Permalink
Added kaggle_geomean.py
Browse files Browse the repository at this point in the history
  • Loading branch information
lenguyenthedat committed Nov 23, 2015
1 parent 37bcdd4 commit 6b94b3b
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 1 deletion.
16 changes: 15 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ For more information: http://mlwave.com/kaggle-ensembling-guide/
parsing: ./samples/method3.csv
wrote to ./samples/kaggle_avg.csv

$ python kaggle_geomean.py "./samples/method*.csv" "./samples/kaggle_geomean.csv"
parsing: ./samples/method1.csv
parsing: ./samples/method2.csv
parsing: ./samples/method3.csv
wrote to ./samples/kaggle_geomean.csv

## Result:

==> ./samples/method1.csv <==
Expand Down Expand Up @@ -79,4 +85,12 @@ For more information: http://mlwave.com/kaggle-ensembling-guide/
2,0
3,9
4,2
5,3
5,3

==> ./samples/kaggle_geomean.csv <==
ImageId,Label
1,1.587401
2,0.000000
3,7.862224
4,3.301927
5,3.000000
31 changes: 31 additions & 0 deletions kaggle_geomean.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from __future__ import division
from collections import defaultdict
from glob import glob
import sys
import math

glob_files = sys.argv[1]
loc_outfile = sys.argv[2]

def kaggle_bag(glob_files, loc_outfile, method="average", weights="uniform"):
if method == "average":
scores = defaultdict(float)
with open(loc_outfile,"wb") as outfile:
for i, glob_file in enumerate( glob(glob_files) ):
print "parsing:", glob_file
# sort glob_file by first column, ignoring the first line
lines = open(glob_file).readlines()
lines = [lines[0]] + sorted(lines[1:])
for e, line in enumerate( lines ):
if i == 0 and e == 0:
outfile.write(line)
if e > 0:
row = line.strip().split(",")
if scores[(e,row[0])] == 0:
scores[(e,row[0])] = 1
scores[(e,row[0])] *= float(row[1])
for j,k in sorted(scores):
outfile.write("%s,%f\n"%(k,math.pow(scores[(j,k)],1/(i+1))))
print("wrote to %s"%loc_outfile)

kaggle_bag(glob_files, loc_outfile)
6 changes: 6 additions & 0 deletions samples/kaggle_geomean.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
ImageId,Label
1,1.587401
2,0.000000
3,7.862224
4,3.301927
5,3.000000

0 comments on commit 6b94b3b

Please sign in to comment.