Skip to content

Commit

Permalink
Add construction speed logging
Browse files Browse the repository at this point in the history
  • Loading branch information
Yury committed Jan 17, 2023
1 parent dd266bc commit d35f428
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 32 deletions.
26 changes: 19 additions & 7 deletions tests/python/git_tester.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,29 @@
speedtest_copy_path = os.path.join("tests", "python", "speedtest2.py")
shutil.copyfile(speedtest_src_path, speedtest_copy_path) # the file has to be outside of git

commits = list(Repository('.', from_tag="v0.6.0").traverse_commits())
commits = list(Repository('.', from_tag="v0.6.2").traverse_commits())
print("Found commits:")
for idx, commit in enumerate(commits):
name = commit.msg.replace('\n', ' ').replace('\r', ' ')
print(idx, commit.hash, name)

for commit in commits:
name = commit.msg.replace('\n', ' ').replace('\r', ' ')
name = commit.msg.replace('\n', ' ').replace('\r', ' ').replace(",", ";")
print("\nProcessing", commit.hash, name)

if os.path.exists("build"):
shutil.rmtree("build")
os.system(f"git checkout {commit.hash}")

# Checking we have actually switched the branch:
current_commit=list(Repository('.').traverse_commits())[-1]
if current_commit.hash != commit.hash:
print("git checkout failed!!!!")
print("git checkout failed!!!!")
print("git checkout failed!!!!")
print("git checkout failed!!!!")
continue

print("\n\n--------------------\n\n")
ret = os.system("python -m pip install .")
print("Install result:", ret)
Expand All @@ -33,8 +43,10 @@
print("build failed!!!!")
continue

os.system(f'python {speedtest_copy_path} -n "{name}" -d 4 -t 1')
os.system(f'python {speedtest_copy_path} -n "{name}" -d 64 -t 1')
os.system(f'python {speedtest_copy_path} -n "{name}" -d 128 -t 1')
os.system(f'python {speedtest_copy_path} -n "{name}" -d 4 -t 24')
os.system(f'python {speedtest_copy_path} -n "{name}" -d 128 -t 24')
# os.system(f'python {speedtest_copy_path} -n "{hash[:4]}_{name}" -d 32 -t 1')
os.system(f'python {speedtest_copy_path} -n "{commit.hash[:4]}_{name}" -d 16 -t 1')
os.system(f'python {speedtest_copy_path} -n "{commit.hash[:4]}_{name}" -d 16 -t 64')
# os.system(f'python {speedtest_copy_path} -n "{name}" -d 64 -t 1')
# os.system(f'python {speedtest_copy_path} -n "{name}" -d 128 -t 1')
# os.system(f'python {speedtest_copy_path} -n "{name}" -d 4 -t 24')
# os.system(f'python {speedtest_copy_path} -n "{name}" -d 128 -t 24')
53 changes: 28 additions & 25 deletions tests/python/speedtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,50 +13,53 @@
dim = int(args.d)
name = args.n
threads=int(args.t)
num_elements = 1000000 * 4//dim
num_elements = 400000

# Generating sample data
np.random.seed(1)
data = np.float32(np.random.random((num_elements, dim)))


index_path=f'speed_index{dim}.bin'
# index_path=f'speed_index{dim}.bin'
# Declaring index
p = hnswlib.Index(space='l2', dim=dim) # possible options are l2, cosine or ip

if not os.path.isfile(index_path) :
# if not os.path.isfile(index_path) :

p.init_index(max_elements=num_elements, ef_construction=100, M=16)
p.init_index(max_elements=num_elements, ef_construction=60, M=16)

# Controlling the recall by setting ef:
# higher ef leads to better accuracy, but slower search
p.set_ef(10)
# Controlling the recall by setting ef:
# higher ef leads to better accuracy, but slower search
p.set_ef(10)

# Set number of threads used during batch search/construction
# By default using all available cores
p.set_num_threads(12)
# Set number of threads used during batch search/construction
# By default using all available cores
p.set_num_threads(64)
t0=time.time()
p.add_items(data)
construction_time=time.time()-t0
# Serializing and deleting the index:

p.add_items(data)

# Serializing and deleting the index:

print("Saving index to '%s'" % index_path)
p.save_index(index_path)
# print("Saving index to '%s'" % index_path)
# p.save_index(index_path)
p.set_num_threads(threads)
times=[]
time.sleep(10)
p.set_ef(100)
for _ in range(3):
p.load_index(index_path)
for _ in range(10):
time.sleep(1)
p.set_ef(15)
for _ in range(1):
# p.load_index(index_path)
for _ in range(3):
t0=time.time()
labels, distances = p.knn_query(data, k=1)
qdata=data[:5000*threads]
labels, distances = p.knn_query(qdata, k=1)
tt=time.time()-t0
times.append(tt)
print(f"{tt} seconds")
str_out=f"mean time:{np.mean(times)}, median time:{np.median(times)}, std time {np.std(times)} {name}"
recall=np.sum(labels.reshape(-1)==np.arange(len(qdata)))/len(qdata)
print(f"{tt} seconds, recall= {recall}")

str_out=f"{np.mean(times)}, {np.median(times)}, {np.std(times)}, {construction_time}, {recall}, {name}"
print(str_out)
with open (f"log_{dim}_t{threads}.txt","a") as f:
with open (f"log2_{dim}_t{threads}.txt","a") as f:
f.write(str_out+"\n")
f.flush()

0 comments on commit d35f428

Please sign in to comment.