-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy path4_combine_mmap.py
executable file
·44 lines (35 loc) · 1004 Bytes
/
4_combine_mmap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import numpy as np
import sys
import time
import os
from multiprocessing import Pool
import argparse
parser = argparse.ArgumentParser(description="Scale dataset stored as compress numpy files")
#parser.add_argument('--total-rows', type=int, default=351348424)
parser.add_argument('--total-rows', type=int, default=342959816)
parser.add_argument('--cols', type=int, default=94*39)
parser.add_argument('fnames',nargs='*')
args = parser.parse_args()
r = args.total_rows
c = args.cols
shp = (r, c)
print(shp)
arr = np.memmap('data_spec_fc/totalall.mmap', dtype=np.float32, mode='w+',
shape=shp)
fnames = args.fnames
print("There are %d files." % (len(fnames)))
t1 = time.time()
total_x = None
total_y = None
total_l = None
j = 0
for fname in fnames:
print(fname)
data = np.load(fname)
x = data['x'].astype(np.float32)
for i in range(len(x)):
arr[j] = x[i]
j = j+1
print("Total number: ", j)
del arr
print("Took %f to make files" % (time.time() - t1))