forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathio_roundtrip.py
116 lines (90 loc) · 2.78 KB
/
io_roundtrip.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
from __future__ import print_function
import time
import os
import numpy as np
import la
import pandas
from pandas.compat import range
from pandas import datetools, DateRange
def timeit(f, iterations):
start = time.clock()
for i in range(iterations):
f()
return time.clock() - start
def rountrip_archive(N, K=50, iterations=10):
# Create data
arr = np.random.randn(N, K)
# lar = la.larry(arr)
dma = pandas.DataFrame(arr,
DateRange('1/1/2000', periods=N,
offset=datetools.Minute()))
dma[201] = 'bar'
# filenames
filename_numpy = '/Users/wesm/tmp/numpy.npz'
filename_larry = '/Users/wesm/tmp/archive.hdf5'
filename_pandas = '/Users/wesm/tmp/pandas_tmp'
# Delete old files
try:
os.unlink(filename_numpy)
except:
pass
try:
os.unlink(filename_larry)
except:
pass
try:
os.unlink(filename_pandas)
except:
pass
# Time a round trip save and load
# numpy_f = lambda: numpy_roundtrip(filename_numpy, arr, arr)
# numpy_time = timeit(numpy_f, iterations) / iterations
# larry_f = lambda: larry_roundtrip(filename_larry, lar, lar)
# larry_time = timeit(larry_f, iterations) / iterations
pandas_f = lambda: pandas_roundtrip(filename_pandas, dma, dma)
pandas_time = timeit(pandas_f, iterations) / iterations
print('pandas (HDF5) %7.4f seconds' % pandas_time)
pickle_f = lambda: pandas_roundtrip(filename_pandas, dma, dma)
pickle_time = timeit(pickle_f, iterations) / iterations
print('pandas (pickle) %7.4f seconds' % pickle_time)
# print('Numpy (npz) %7.4f seconds' % numpy_time)
# print('larry (HDF5) %7.4f seconds' % larry_time)
# Delete old files
try:
os.unlink(filename_numpy)
except:
pass
try:
os.unlink(filename_larry)
except:
pass
try:
os.unlink(filename_pandas)
except:
pass
def numpy_roundtrip(filename, arr1, arr2):
np.savez(filename, arr1=arr1, arr2=arr2)
npz = np.load(filename)
arr1 = npz['arr1']
arr2 = npz['arr2']
def larry_roundtrip(filename, lar1, lar2):
io = la.IO(filename)
io['lar1'] = lar1
io['lar2'] = lar2
lar1 = io['lar1']
lar2 = io['lar2']
def pandas_roundtrip(filename, dma1, dma2):
# What's the best way to code this?
from pandas.io.pytables import HDFStore
store = HDFStore(filename)
store['dma1'] = dma1
store['dma2'] = dma2
dma1 = store['dma1']
dma2 = store['dma2']
def pandas_roundtrip_pickle(filename, dma1, dma2):
dma1.save(filename)
dma1 = pandas.DataFrame.load(filename)
dma2.save(filename)
dma2 = pandas.DataFrame.load(filename)
if __name__ == '__main__':
rountrip_archive(10000, K=200)