From 1a93a9c361de2944ab85df7e7e3e8c099bb4ecc7 Mon Sep 17 00:00:00 2001 From: Jeff Calder Date: Wed, 22 May 2024 13:12:38 -0500 Subject: [PATCH] Updates --- docs/weightmatrix.html | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/docs/weightmatrix.html b/docs/weightmatrix.html index f894a76..939188a 100644 --- a/docs/weightmatrix.html +++ b/docs/weightmatrix.html @@ -403,26 +403,31 @@

Weight Matrices

knn_ind = [] eps = 1e-15 for i in range(n): - A = u.get_nns_by_item(i, k, include_distances=True, search_k=-1) - knn_ind.append(A[0]) + #Get extra neighbors, in case there are mistakes + A = u.get_nns_by_item(i, min(2*k,n), include_distances=True) + ind = np.array(A[0]) #knn_dist.append(A[1]) #These distances are floating point (32-bit) precision #The code below computes them more accurately if similarity == 'euclidean': - dist = np.linalg.norm(X[i,:] - X[A[0],:],axis=1) + dist = np.linalg.norm(X[i,:] - X[ind,:],axis=1) elif similarity == 'angular': vi = X[i,:]/np.maximum(np.linalg.norm(X[i,:]),eps) - vj = X[A[0],:]/np.maximum(np.linalg.norm(X[A[0],:],axis=1)[:,None],eps) + vj = X[ind,:]/np.maximum(np.linalg.norm(X[ind,:],axis=1)[:,None],eps) dist = np.linalg.norm(vi-vj,axis=1) elif similarity == 'manhattan': - dist = np.linalg.norm(X[i,:] - X[A[0],:],axis=1,ord=1) + dist = np.linalg.norm(X[i,:] - X[ind,:],axis=1,ord=1) elif similarity == 'hamming': dist = A[1] #hamming is integer-valued, so no need to compute in double precision elif similarity == 'dot': - dist = np.sum(X[i,:]*X[A[0],:],axis=1) + dist = np.sum(X[i,:]*X[ind,:],axis=1) else: dist = A[1] + ind_sort = np.argsort(dist)[:k] + ind = ind[ind_sort] + dist = dist[ind_sort] #print(np.max(np.absolute(dist - np.array(A[1])))) + knn_ind.append(ind) knn_dist.append(dist) @@ -1171,26 +1176,31 @@

Returns

knn_ind = [] eps = 1e-15 for i in range(n): - A = u.get_nns_by_item(i, k, include_distances=True, search_k=-1) - knn_ind.append(A[0]) + #Get extra neighbors, in case there are mistakes + A = u.get_nns_by_item(i, min(2*k,n), include_distances=True) + ind = np.array(A[0]) #knn_dist.append(A[1]) #These distances are floating point (32-bit) precision #The code below computes them more accurately if similarity == 'euclidean': - dist = np.linalg.norm(X[i,:] - X[A[0],:],axis=1) + dist = np.linalg.norm(X[i,:] - X[ind,:],axis=1) elif similarity == 'angular': vi = X[i,:]/np.maximum(np.linalg.norm(X[i,:]),eps) - vj = X[A[0],:]/np.maximum(np.linalg.norm(X[A[0],:],axis=1)[:,None],eps) + vj = X[ind,:]/np.maximum(np.linalg.norm(X[ind,:],axis=1)[:,None],eps) dist = np.linalg.norm(vi-vj,axis=1) elif similarity == 'manhattan': - dist = np.linalg.norm(X[i,:] - X[A[0],:],axis=1,ord=1) + dist = np.linalg.norm(X[i,:] - X[ind,:],axis=1,ord=1) elif similarity == 'hamming': dist = A[1] #hamming is integer-valued, so no need to compute in double precision elif similarity == 'dot': - dist = np.sum(X[i,:]*X[A[0],:],axis=1) + dist = np.sum(X[i,:]*X[ind,:],axis=1) else: dist = A[1] + ind_sort = np.argsort(dist)[:k] + ind = ind[ind_sort] + dist = dist[ind_sort] #print(np.max(np.absolute(dist - np.array(A[1])))) + knn_ind.append(ind) knn_dist.append(dist)