Skip to content

Commit

Permalink
Using autopep8 (https://github.com/hhatto/autopep8) on all py files.
Browse files Browse the repository at this point in the history
Some lines were changed manually (those which may change the logic).

Have not fixed E501 line too longs, these are the only pep8 failures.

Check current pep8 compliance of all py files in current directory:
for f in *.py; do pep8 $f; done

Run autopep8 on py files in current directory:
for f in *.py; do autopep8 -i $f; done
  • Loading branch information
hayd committed Jan 3, 2013
1 parent c934e02 commit 66fc98f
Show file tree
Hide file tree
Showing 167 changed files with 5,438 additions and 4,562 deletions.
1 change: 0 additions & 1 deletion bench/bench_dense_to_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,3 @@
this_rng = rng2[:-i]
data[100:] = np.nan
series[i] = SparseSeries(data, index=this_rng)

7 changes: 7 additions & 0 deletions bench/bench_get_put_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,39 +4,46 @@
N = 1000
K = 50


def _random_index(howmany):
return Index([rands(10) for _ in xrange(howmany)])

df = DataFrame(np.random.randn(N, K), index=_random_index(N),
columns=_random_index(K))


def get1():
for col in df.columns:
for row in df.index:
_ = df[col][row]


def get2():
for col in df.columns:
for row in df.index:
_ = df.get_value(row, col)


def put1():
for col in df.columns:
for row in df.index:
df[col][row] = 0


def put2():
for col in df.columns:
for row in df.index:
df.set_value(row, col, 0)


def resize1():
buf = DataFrame()
for col in df.columns:
for row in df.index:
buf = buf.set_value(row, col, 5.)
return buf


def resize2():
from collections import defaultdict

Expand Down
9 changes: 6 additions & 3 deletions bench/bench_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,19 @@
random.shuffle(foo)
random.shuffle(foo2)

df = DataFrame({'A' : foo,
'B' : foo2,
'C' : np.random.randn(n * k)})
df = DataFrame({'A': foo,
'B': foo2,
'C': np.random.randn(n * k)})

import pandas._sandbox as sbx


def f():
table = sbx.StringHashTable(len(df))
ret = table.factorize(df['A'])
return ret


def g():
table = sbx.PyObjectHashTable(len(df))
ret = table.factorize(df['A'])
Expand Down
38 changes: 21 additions & 17 deletions bench/bench_join_panel.py
Original file line number Diff line number Diff line change
@@ -1,49 +1,54 @@
# reasonably effecient


def create_panels_append(cls, panels):
""" return an append list of panels """
panels = [ a for a in panels if a is not None ]
panels = [a for a in panels if a is not None]
# corner cases
if len(panels) == 0:
return None
elif len(panels) == 1:
return panels[0]
elif len(panels) == 2 and panels[0] == panels[1]:
return panels[0]
#import pdb; pdb.set_trace()
# import pdb; pdb.set_trace()
# create a joint index for the axis

def joint_index_for_axis(panels, axis):
s = set()
for p in panels:
s.update(list(getattr(p,axis)))
s.update(list(getattr(p, axis)))
return sorted(list(s))

def reindex_on_axis(panels, axis, axis_reindex):
new_axis = joint_index_for_axis(panels, axis)
new_panels = [ p.reindex(**{ axis_reindex : new_axis, 'copy' : False}) for p in panels ]
new_panels = [p.reindex(**{axis_reindex: new_axis,
'copy': False}) for p in panels]
return new_panels, new_axis
# create the joint major index, dont' reindex the sub-panels - we are appending
# create the joint major index, dont' reindex the sub-panels - we are
# appending
major = joint_index_for_axis(panels, 'major_axis')
# reindex on minor axis
panels, minor = reindex_on_axis(panels, 'minor_axis', 'minor')
# reindex on items
panels, items = reindex_on_axis(panels, 'items', 'items')
# concatenate values
try:
values = np.concatenate([ p.values for p in panels ],axis=1)
values = np.concatenate([p.values for p in panels], axis=1)
except (Exception), detail:
raise Exception("cannot append values that dont' match dimensions! -> [%s] %s" % (','.join([ "%s" % p for p in panels ]),str(detail)))
#pm('append - create_panel')
p = Panel(values, items = items, major_axis = major, minor_axis = minor )
#pm('append - done')
raise Exception("cannot append values that dont' match dimensions! -> [%s] %s" % (','.join(["%s" % p for p in panels]), str(detail)))
# pm('append - create_panel')
p = Panel(values, items=items, major_axis=major,
minor_axis=minor)
# pm('append - done')
return p



# does the job but inefficient (better to handle like you read a table in pytables...e.g create a LongPanel then convert to Wide)

# does the job but inefficient (better to handle like you read a table in
# pytables...e.g create a LongPanel then convert to Wide)
def create_panels_join(cls, panels):
""" given an array of panels's, create a single panel """
panels = [ a for a in panels if a is not None ]
panels = [a for a in panels if a is not None]
# corner cases
if len(panels) == 0:
return None
Expand All @@ -62,16 +67,15 @@ def create_panels_join(cls, panels):
for minor_i, minor_index in panel.minor_axis.indexMap.items():
for major_i, major_index in panel.major_axis.indexMap.items():
try:
d[(minor_i,major_i,item)] = values[item_index,major_index,minor_index]
d[(minor_i, major_i, item)] = values[item_index, major_index, minor_index]
except:
pass
# stack the values
minor = sorted(list(minor))
major = sorted(list(major))
items = sorted(list(items))
# create the 3d stack (items x columns x indicies)
data = np.dstack([ np.asarray([ np.asarray([ d.get((minor_i,major_i,item),np.nan) for item in items ]) for major_i in major ]).transpose() for minor_i in minor ])
data = np.dstack([np.asarray([np.asarray([d.get((minor_i, major_i, item), np.nan) for item in items]) for major_i in major]).transpose() for minor_i in minor])
# construct the panel
return Panel(data, items, major, minor)
add_class_method(Panel, create_panels_join, 'join_many')

11 changes: 10 additions & 1 deletion bench/bench_khash_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,15 @@
pid = os.getpid()
proc = psutil.Process(pid)


def object_test_data(n):
pass


def string_test_data(n):
return np.array([rands(10) for _ in xrange(n)], dtype='O')


def int_test_data(n):
return np.arange(n, dtype='i8')

Expand All @@ -30,17 +33,21 @@ def int_test_data(n):
#----------------------------------------------------------------------
# Benchmark 1: map_locations


def map_locations_python_object():
arr = string_test_data(N)
return _timeit(lambda: lib.map_indices_object(arr))


def map_locations_khash_object():
arr = string_test_data(N)

def f():
table = sbx.PyObjectHashTable(len(arr))
table.map_locations(arr)
return _timeit(f)


def _timeit(f, iterations=10):
start = time.time()
for _ in xrange(iterations):
Expand All @@ -51,17 +58,20 @@ def _timeit(f, iterations=10):
#----------------------------------------------------------------------
# Benchmark 2: lookup_locations


def lookup_python(values):
table = lib.map_indices_object(values)
return _timeit(lambda: lib.merge_indexer_object(values, table))


def lookup_khash(values):
table = sbx.PyObjectHashTable(len(values))
table.map_locations(values)
locs = table.lookup_locations(values)
# elapsed = _timeit(lambda: table.lookup_locations2(values))
return table


def leak(values):
for _ in xrange(100):
print proc.get_memory_info()
Expand All @@ -75,4 +85,3 @@ def leak(values):

#----------------------------------------------------------------------
# Benchmark 4: factorize

15 changes: 9 additions & 6 deletions bench/bench_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
N = 10000
ngroups = 10


def get_test_data(ngroups=100, n=N):
unique_groups = range(ngroups)
arr = np.asarray(np.tile(unique_groups, n / ngroups), dtype=object)
Expand Down Expand Up @@ -38,10 +39,10 @@ def get_test_data(ngroups=100, n=N):
key = np.tile(indices[:8000], 10)
key2 = np.tile(indices2[:8000], 10)

left = DataFrame({'key' : key, 'key2':key2,
'value' : np.random.randn(80000)})
right = DataFrame({'key': indices[2000:], 'key2':indices2[2000:],
'value2' : np.random.randn(8000)})
left = DataFrame({'key': key, 'key2': key2,
'value': np.random.randn(80000)})
right = DataFrame({'key': indices[2000:], 'key2': indices2[2000:],
'value2': np.random.randn(8000)})

right2 = right.append(right, ignore_index=True)

Expand Down Expand Up @@ -78,7 +79,8 @@ def get_test_data(ngroups=100, n=N):

all_results = all_results.div(all_results['pandas'], axis=0)

all_results = all_results.ix[:, ['pandas', 'data.table', 'plyr', 'base::merge']]
all_results = all_results.ix[:, ['pandas', 'data.table', 'plyr',
'base::merge']]

sort_results = DataFrame.from_items([('pandas', results['sort']),
('R', r_results['base::merge'])])
Expand All @@ -102,4 +104,5 @@ def get_test_data(ngroups=100, n=N):

all_results = presults.join(r_results)
all_results = all_results.div(all_results['pandas'], axis=0)
all_results = all_results.ix[:, ['pandas', 'data.table', 'plyr', 'base::merge']]
all_results = all_results.ix[:, ['pandas', 'data.table', 'plyr',
'base::merge']]
22 changes: 12 additions & 10 deletions bench/bench_merge_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@
key = np.tile(indices[:8000], 10)
key2 = np.tile(indices2[:8000], 10)

left = DataFrame({'key' : key, 'key2':key2,
'value' : np.random.randn(80000)})
right = DataFrame({'key': indices[2000:], 'key2':indices2[2000:],
'value2' : np.random.randn(8000)})
left = DataFrame({'key': key, 'key2': key2,
'value': np.random.randn(80000)})
right = DataFrame({'key': indices[2000:], 'key2': indices2[2000:],
'value2': np.random.randn(8000)})

# right2 = right.append(right, ignore_index=True)
# right = right2
Expand All @@ -30,8 +30,10 @@
create_sql_indexes = True

conn = sqlite3.connect(':memory:')
conn.execute('create table left( key varchar(10), key2 varchar(10), value int);')
conn.execute('create table right( key varchar(10), key2 varchar(10), value2 int);')
conn.execute(
'create table left( key varchar(10), key2 varchar(10), value int);')
conn.execute(
'create table right( key varchar(10), key2 varchar(10), value2 int);')
conn.executemany('insert into left values (?, ?, ?)',
zip(key, key2, left['value']))
conn.executemany('insert into right values (?, ?, ?)',
Expand All @@ -43,7 +45,7 @@
conn.execute('create index right_ix on right(key, key2)')


join_methods = ['inner', 'left outer', 'left'] # others not supported
join_methods = ['inner', 'left outer', 'left'] # others not supported
sql_results = DataFrame(index=join_methods, columns=[False])
niter = 5
for sort in [False]:
Expand All @@ -61,8 +63,8 @@

if sort:
sql = '%s order by key, key2' % sql
f = lambda: list(conn.execute(sql)) # list fetches results
g = lambda: conn.execute(sql) # list fetches results
f = lambda: list(conn.execute(sql)) # list fetches results
g = lambda: conn.execute(sql) # list fetches results
gc.disable()
start = time.time()
# for _ in xrange(niter):
Expand All @@ -74,7 +76,7 @@
conn.commit()

sql_results[sort][join_method] = elapsed
sql_results.columns = ['sqlite3'] # ['dont_sort', 'sort']
sql_results.columns = ['sqlite3'] # ['dont_sort', 'sort']
sql_results.index = ['inner', 'outer', 'left']

sql = """select *
Expand Down
28 changes: 14 additions & 14 deletions bench/bench_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@
arr1 = np.arange(N)
index = Index(np.arange(N))

off = N//10
arr1[off : 2 * off] = np.NaN
arr1[4*off: 5 * off] = np.NaN
arr1[8*off: 9 * off] = np.NaN
off = N // 10
arr1[off: 2 * off] = np.NaN
arr1[4 * off: 5 * off] = np.NaN
arr1[8 * off: 9 * off] = np.NaN

arr2 = np.arange(N)
arr2[3 * off // 2: 2 * off + off // 2] = np.NaN
arr2[3 * off // 2: 2 * off + off // 2] = np.NaN
arr2[8 * off + off // 2: 9 * off + off // 2] = np.NaN

s1 = SparseSeries(arr1, index=index)
Expand All @@ -38,6 +38,7 @@

sdf = dm.to_sparse()


def new_data_like(sdf):
new_data = {}
for col, series in sdf.iteritems():
Expand All @@ -52,22 +53,22 @@ def new_data_like(sdf):
# for col, ser in dm.iteritems():
# data[col] = SparseSeries(ser)

dwp = Panel.fromDict({'foo' : dm})
dwp = Panel.fromDict({'foo': dm})
# sdf = SparseDataFrame(data)


lp = stack_sparse_frame(sdf)


swp = SparsePanel({'A' : sdf})
swp = SparsePanel({'A' : sdf,
'B' : sdf,
'C' : sdf,
'D' : sdf})
swp = SparsePanel({'A': sdf})
swp = SparsePanel({'A': sdf,
'B': sdf,
'C': sdf,
'D': sdf})

y = sdf
x = SparsePanel({'x1' : sdf + new_data_like(sdf) / 10,
'x2' : sdf + new_data_like(sdf) / 10})
x = SparsePanel({'x1': sdf + new_data_like(sdf) / 10,
'x2': sdf + new_data_like(sdf) / 10})

dense_y = sdf
dense_x = x.to_dense()
Expand All @@ -89,4 +90,3 @@ def new_data_like(sdf):
reload(face)

# model = face.ols(y=y, x=x)

Loading

0 comments on commit 66fc98f

Please sign in to comment.