From f6d87cb27ab56ef012ba2ae2ad386db8b1a4c902 Mon Sep 17 00:00:00 2001 From: verges Date: Wed, 11 May 2022 00:10:12 +0200 Subject: [PATCH 1/4] bloom --- examples/bloom_filters.py | 44 +++++++++++++++++++++++++++++++++++ examples/graph_isomorphism.py | 0 2 files changed, 44 insertions(+) create mode 100644 examples/bloom_filters.py create mode 100644 examples/graph_isomorphism.py diff --git a/examples/bloom_filters.py b/examples/bloom_filters.py new file mode 100644 index 00000000..caa10499 --- /dev/null +++ b/examples/bloom_filters.py @@ -0,0 +1,44 @@ +# The following two lines are only needed because of this repository organization +import sys, os + +sys.path.insert(1, os.path.realpath(os.path.pardir)) + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.data as data + +# Note: this example requires the torchmetrics library: https://torchmetrics.readthedocs.io +import torchmetrics + +from torchhd import functional +from torchhd import embeddings +from torchhd import structures + +D = 10000 +num_hash_fun = 100 +dens = num_hash_fun/D +N = 10000 +simul = 10 + +selection = torch.full((D,N), -1) + +for i in selection: + perm = torch.randperm(D)[:num_hash_fun] + i[perm] = 1 + +thr_limit = torch.tensor([20]*30 + [30]*20 + [40]*15 + [50]*15 + [60]*20) + +f_range = torch.range(0,5000,50) +current_rbf = torch.zeros(num_hash_fun, 3) + +for i in f_range: + rbf_tpr = torch.zeros(simul) + rbf_fpr = torch.zeros(simul) + for j in range(simul): + rand_perm = torch.randperm(N) + ind = rand_perm[:int(i)] + ll_ind = rand_perm[int(i):] + l_ind = torch.full(N, -1) + l_ind[ind] = 1 + diff --git a/examples/graph_isomorphism.py b/examples/graph_isomorphism.py new file mode 100644 index 00000000..e69de29b From c4e2617a61f4f4a51d519339b8ef7f60ba7f0ec3 Mon Sep 17 00:00:00 2001 From: pereverges Date: Wed, 11 May 2022 02:59:16 +0200 Subject: [PATCH 2/4] bloom filter advancing --- examples/bloom_filters.py | 73 +++++++++++++++++++++++++++++---------- 1 file changed, 54 insertions(+), 19 deletions(-) diff --git a/examples/bloom_filters.py b/examples/bloom_filters.py index caa10499..91eeceaf 100644 --- a/examples/bloom_filters.py +++ b/examples/bloom_filters.py @@ -1,19 +1,13 @@ # The following two lines are only needed because of this repository organization -import sys, os - +import os +import sys +import copy sys.path.insert(1, os.path.realpath(os.path.pardir)) - import torch -import torch.nn as nn -import torch.nn.functional as F -import torch.utils.data as data - +from scipy.stats import binom # Note: this example requires the torchmetrics library: https://torchmetrics.readthedocs.io -import torchmetrics - +import numpy as np from torchhd import functional -from torchhd import embeddings -from torchhd import structures D = 10000 num_hash_fun = 100 @@ -21,24 +15,65 @@ N = 10000 simul = 10 -selection = torch.full((D,N), -1) +hd = torch.full((D,N), -1) -for i in selection: +for i in hd: perm = torch.randperm(D)[:num_hash_fun] i[perm] = 1 thr_limit = torch.tensor([20]*30 + [30]*20 + [40]*15 + [50]*15 + [60]*20) -f_range = torch.range(0,5000,50) +f_range = torch.range(50,5050,50) current_rbf = torch.zeros(num_hash_fun, 3) -for i in f_range: +for i in range(len(f_range)): + F = f_range[i] rbf_tpr = torch.zeros(simul) rbf_fpr = torch.zeros(simul) for j in range(simul): rand_perm = torch.randperm(N) - ind = rand_perm[:int(i)] - ll_ind = rand_perm[int(i):] - l_ind = torch.full(N, -1) - l_ind[ind] = 1 + ind = rand_perm[:int(F)] + ll_ind = rand_perm[int(F):] + # l_ind = torch.full((N,1), 1) + rbf = functional.multiset(hd[ind,:]) + + positive = torch.tensor(1.0) + negative = torch.tensor(-1.0) + rbf = torch.where(rbf > -F, positive, negative) + rbf_ind = ((rbf != -1).nonzero()) + + puncture = int(D*(0.001*(len(rbf_ind)/D))) + p_ind = torch.randperm(N)[:puncture] + + rbf[p_ind] = -1 + dp = functional.bind(hd, rbf) + rbf_tpr[j] = (sum(dp[ind,1] != 1)/F.item()).item() + rbf_fpr[j] = ((sum(dp[ind,1] != 1)/(N-F.item())).item()) + + print(rbf_tpr) + print(rbf_fpr) + + current_rbf[int(F),0] = torch.mean(rbf_tpr) + current_rbf[int(F),1] = torch.mean(rbf_fpr) + current_rbf[int(F),2] = (current_rbf[int(i),0]+(1-current_rbf[int(F),1]))/2 + + p1 = dens + pdf_bins = binom.pmf(range(0,int(F)+1), int(F), p1) + thr = list(range(0,thr_limit[int(i)])) + p0_bf = torch.zeros(len(thr)) + + for k in thr: + p0_bf[k] = sum(pdf_bins[0:thr[k]+1]) + + print(pdf_bins) + bins2 = list(range(num_hash_fun+1)) + pdf_bins2 = torch.zeros(len(thr), len(bins2)) + for k in range(len(thr)): + print(k) + print(F*num_hash_fun) + print(F*num_hash_fun) + print((F*num_hash_fun - sum((D*list(range(k)))*pdf_bins[:k]))) + print(sum((D*list(range(k)))*pdf_bins[:k])) + print(F*num_hash_fun) + break From 68b793453dae1b7403f03061b2984f55adb69fa8 Mon Sep 17 00:00:00 2001 From: pereverges Date: Wed, 11 May 2022 21:53:08 +0200 Subject: [PATCH 3/4] bloom filter first finished version (need to check if the output is correct) --- examples/bloom_filters.py | 58 ++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 16 deletions(-) diff --git a/examples/bloom_filters.py b/examples/bloom_filters.py index 91eeceaf..f2d65889 100644 --- a/examples/bloom_filters.py +++ b/examples/bloom_filters.py @@ -8,13 +8,18 @@ # Note: this example requires the torchmetrics library: https://torchmetrics.readthedocs.io import numpy as np from torchhd import functional +import numpy as np +import warnings +warnings.filterwarnings("ignore") D = 10000 num_hash_fun = 100 dens = num_hash_fun/D N = 10000 simul = 10 - +BEST_ABF = [] +CURRENT_BF = [] +BEST_BF = [] hd = torch.full((D,N), -1) for i in hd: @@ -50,12 +55,10 @@ rbf_tpr[j] = (sum(dp[ind,1] != 1)/F.item()).item() rbf_fpr[j] = ((sum(dp[ind,1] != 1)/(N-F.item())).item()) - print(rbf_tpr) - print(rbf_fpr) - - current_rbf[int(F),0] = torch.mean(rbf_tpr) - current_rbf[int(F),1] = torch.mean(rbf_fpr) - current_rbf[int(F),2] = (current_rbf[int(i),0]+(1-current_rbf[int(F),1]))/2 + print(i) + current_rbf[i,0] = torch.mean(rbf_tpr) + current_rbf[i,1] = torch.mean(rbf_fpr) + current_rbf[i,2] = (current_rbf[int(i),0]+(1-current_rbf[i,1]))/2 p1 = dens pdf_bins = binom.pmf(range(0,int(F)+1), int(F), p1) @@ -65,15 +68,38 @@ for k in thr: p0_bf[k] = sum(pdf_bins[0:thr[k]+1]) - print(pdf_bins) - bins2 = list(range(num_hash_fun+1)) + bins2 = range(num_hash_fun+1) pdf_bins2 = torch.zeros(len(thr), len(bins2)) + pdf_bins3 = torch.zeros(len(thr), len(bins2)) + for k in range(len(thr)): + l1 = [D*(x) for x in list(range(0,k+1))] + exp = (F*num_hash_fun - sum([l1[i]*pdf_bins[:k+1][i] for i in range(len(l1))])) / (F*num_hash_fun) + pdf_bins2[k] = torch.tensor(binom.pmf(bins2, num_hash_fun, exp)) + pdf_bins3[k] = torch.tensor(binom.pmf(bins2, num_hash_fun, (1-p0_bf[k]))) + + tnfp = [ [] for _ in range(len(thr)) ] + for j in range(len(thr)): + for k in range(num_hash_fun): + b1 = sum(pdf_bins2[j][k:]) + b2 = sum(pdf_bins3[j][k:]) + tnfp[j].append([b1, b2, (b1+(1-b2))/2]) + + THR = torch.zeros(len(thr), 5) for k in range(len(thr)): - print(k) - print(F*num_hash_fun) - print(F*num_hash_fun) - print((F*num_hash_fun - sum((D*list(range(k)))*pdf_bins[:k]))) - print(sum((D*list(range(k)))*pdf_bins[:k])) - print(F*num_hash_fun) + col0 = [row[0] for row in tnfp[k]] + index = [i for i, v in enumerate(col0) if v >= 0.9] + val = [row[2] for row in tnfp[k]] + ind = len([val[i] for i in index]) + THR[k][0] = ind-1 + THR[k][1:4] = torch.tensor(tnfp[k][ind-1]) - break + THR[:,4] = torch.tensor(thr) + ind = torch.max(THR[:,3]) + BEST_ABF.append(THR[int(ind),:]) + CURRENT_BF.append(tnfp[0][-1]) + one_n_opt = round((D/int(F))*np.log(2)) + if one_n_opt == 0: + one_n_opt = 1 + fpr = pow(1-np.exp(-(one_n_opt*int(F)/D)), one_n_opt) + BEST_BF.append([one_n_opt, 1, fpr, (1+(1-fpr))/2]) + print(BEST_BF) From ffc30c95d718440094a855efbe2f94c4e2bc3e6c Mon Sep 17 00:00:00 2001 From: pereverges Date: Fri, 13 May 2022 02:27:40 +0200 Subject: [PATCH 4/4] bloom filter first finished version (output checked) --- examples/bloom_filters.py | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/examples/bloom_filters.py b/examples/bloom_filters.py index f2d65889..393d3aca 100644 --- a/examples/bloom_filters.py +++ b/examples/bloom_filters.py @@ -16,11 +16,12 @@ num_hash_fun = 100 dens = num_hash_fun/D N = 10000 -simul = 10 +simul = 100 BEST_ABF = [] CURRENT_BF = [] BEST_BF = [] -hd = torch.full((D,N), -1) +hd = torch.full((D,N), 0, dtype=torch.long) + for i in hd: perm = torch.randperm(D)[:num_hash_fun] @@ -28,7 +29,7 @@ thr_limit = torch.tensor([20]*30 + [30]*20 + [40]*15 + [50]*15 + [60]*20) -f_range = torch.range(50,5050,50) +f_range = torch.range(50,5000,50) current_rbf = torch.zeros(num_hash_fun, 3) for i in range(len(f_range)): @@ -40,22 +41,22 @@ ind = rand_perm[:int(F)] ll_ind = rand_perm[int(F):] # l_ind = torch.full((N,1), 1) - rbf = functional.multiset(hd[ind,:]) - + rbf = torch.sum(hd[ind,:], dim=0) positive = torch.tensor(1.0) - negative = torch.tensor(-1.0) - rbf = torch.where(rbf > -F, positive, negative) - rbf_ind = ((rbf != -1).nonzero()) + negative = torch.tensor(0.0) + rbf = torch.where(rbf > 0, positive, negative) + rbf_ind = ((rbf != 0).nonzero()) - puncture = int(D*(0.001*(len(rbf_ind)/D))) + puncture = int(D*(0.001*(len(rbf_ind)/D)))+1 p_ind = torch.randperm(N)[:puncture] rbf[p_ind] = -1 - dp = functional.bind(hd, rbf) - rbf_tpr[j] = (sum(dp[ind,1] != 1)/F.item()).item() - rbf_fpr[j] = ((sum(dp[ind,1] != 1)/(N-F.item())).item()) - print(i) + dp = torch.matmul(hd, rbf.long()) + + rbf_tpr[j] = (sum(dp[ind] == num_hash_fun)/F.item()).item() + rbf_fpr[j] = ((sum(dp[ll_ind] == num_hash_fun)/(N-F.item())).item()) + current_rbf[i,0] = torch.mean(rbf_tpr) current_rbf[i,1] = torch.mean(rbf_fpr) current_rbf[i,2] = (current_rbf[int(i),0]+(1-current_rbf[i,1]))/2 @@ -102,4 +103,7 @@ one_n_opt = 1 fpr = pow(1-np.exp(-(one_n_opt*int(F)/D)), one_n_opt) BEST_BF.append([one_n_opt, 1, fpr, (1+(1-fpr))/2]) - print(BEST_BF) +print(BEST_BF) +print(BEST_ABF) +print(CURRENT_BF) +print(current_rbf)