-
Notifications
You must be signed in to change notification settings - Fork 280
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add a beautiful Heap in ./tool,add KNN in /KNN,and the test code in /…
…test
- Loading branch information
Showing
18 changed files
with
324 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,11 @@ | ||
RECENT TODO List: | ||
== | ||
|
||
1.spectual cluster | ||
-- | ||
learning | ||
|
||
2.Decision Tree | ||
1.Decision Tree | ||
-- | ||
add Pruning | ||
|
||
3.k-nearest | ||
-- | ||
kd tree is required of course | ||
|
||
4.Naive Bayesian Model | ||
2.Naive Bayesian Model | ||
-- | ||
to do |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
""" | ||
""" | ||
|
||
import numpy as np | ||
import scipy as sp | ||
import pylab as py | ||
from .knn import KNNC | ||
from .kd import KDTree | ||
|
||
__all__ = ['KNNC','KDTree' | ||
] |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
from __future__ import division | ||
import numpy as np | ||
import scipy as sp | ||
from operator import itemgetter | ||
from scipy.spatial.distance import euclidean | ||
from dml.tool import Heap | ||
class KDNode: | ||
def __init__(self,x,y,l): | ||
self.x=x | ||
self.y=y | ||
self.l=l | ||
self.F=None | ||
self.Lc=None | ||
self.Rc=None | ||
self.distsToNode=None | ||
|
||
class KDTree: | ||
def __init__(self,X,y=None,dist=euclidean): | ||
self.X=X | ||
self.k=X.shape[0] #N | ||
self.y=y | ||
self.dist=dist | ||
self.P=self.maketree(X,y,0) | ||
self.P.F=None | ||
def maketree(self,data,y,deep): | ||
if data.size==0: | ||
return None | ||
lenght = data.shape[0] | ||
case = data.shape[1] | ||
p=int((case)/2) | ||
l = (deep%self.k) | ||
#print data | ||
data=np.vstack((data,y)) | ||
data=np.array(sorted(data.transpose(),key=itemgetter(l))).transpose() | ||
#print data | ||
y=data[lenght,:] | ||
data=data[:lenght,:] | ||
|
||
v=data[l,p] | ||
rP=KDNode(data[:,p],y[p],l) | ||
#print data[:,p],y[p],l | ||
if case>1: | ||
ldata=data[:,data[l,:]<v] | ||
ly=y[data[l,:]<v] | ||
data[l,p]=v-1 | ||
rdata=data[:,data[l,:]>=v] | ||
ry=y[data[l,:]>=v] | ||
data[l,p]=v | ||
rP.Lc=self.maketree(ldata,ly,deep+1) | ||
if rP.Lc!=None: | ||
rP.Lc.F=rP | ||
rP.Rc=self.maketree(rdata,ry,deep+1) | ||
if rP.Rc!=None: | ||
rP.Rc.F=rP | ||
return rP | ||
|
||
def search_knn(self,P,x,k,maxiter=200): | ||
def pf_compare(a,b): | ||
return self.dist(x,a.x)<self.dist(x,b.x) | ||
def ans_compare(a,b): | ||
return self.dist(x,a.x)>self.dist(x,b.x) | ||
pf_seq=Heap(compare=pf_compare) | ||
pf_seq.insert(P) #prior sequence | ||
ans=Heap(k,compare=ans_compare) #ans sequence | ||
while pf_seq.counter>0: | ||
t=pf_seq.heap[1] | ||
pf_seq.delete(1) | ||
flag=True | ||
if ans.counter==k: | ||
now=t.F | ||
#print ans.heap[1].x,'========' | ||
if now != None: | ||
q=x.copy() | ||
q[now.l]=now.x[now.l] | ||
length=self.dist(q,x) | ||
if length>self.dist(ans.heap[1].x,x): | ||
flag=False | ||
else: | ||
flag=True | ||
else: | ||
flag=True | ||
if flag: | ||
tp,pf_seq,ans=self.to_leaf(t,x,pf_seq,ans) | ||
#print "=============" | ||
#ans.insert(tp) | ||
return ans | ||
|
||
|
||
def to_leaf(self,P,x,pf_seq,ans): | ||
tp=P | ||
if tp!=None: | ||
ans.insert(tp) | ||
if tp.x[tp.l]>x[tp.l]: | ||
if tp.Rc!=None: | ||
pf_seq.insert(tp.Rc) | ||
if tp.Lc==None: | ||
return tp,pf_seq,ans | ||
else: | ||
return self.to_leaf(tp.Lc,x,pf_seq,ans) | ||
if tp.Lc!=None: | ||
pf_seq.insert(tp.Lc) | ||
if tp.Rc==None: | ||
return tp,pf_seq,ans | ||
else: | ||
return self.to_leaf(tp.Rc,x,pf_seq,ans) | ||
|
||
|
||
|
||
|
||
|
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
#coding:utf-8 | ||
import numpy as np | ||
import scipy as sp | ||
from scipy.spatial.distance import cdist | ||
from scipy.spatial.distance import euclidean | ||
from dml.KNN.kd import KDTree | ||
|
||
#import pylab as py | ||
class KNNC: | ||
"""docstring for KNNC""" | ||
def __init__(self,X,K,labels=None,dist=euclidean): | ||
''' | ||
X is a N*M matrix where M is the case | ||
labels is prepare for the predict. | ||
dist is the similarity measurement way, | ||
The distance function can be ‘braycurtis’, ‘canberra’, | ||
‘chebyshev’, ‘cityblock’, ‘correlation’, ‘cosine’, | ||
‘dice’, ‘euclidean’, ‘hamming’, ‘jaccard’, ‘kulsinski’, | ||
‘mahalanobis’, | ||
''' | ||
self.X = np.array(X) | ||
if labels==None: | ||
np.zeros((1,self.X.shape[1])) | ||
self.labels = np.array(labels) | ||
self.K = K | ||
self.dist = dist | ||
self.KDTrees=KDTree(X,labels,self.dist) | ||
|
||
def predict(self,x,k): | ||
ans=self.KDTrees.search_knn(self.KDTrees.P,x,k) | ||
dc={} | ||
maxx=0 | ||
y=0 | ||
for i in range(ans.counter+1): | ||
if i==0: | ||
continue | ||
dc.setdefault(ans.heap[i].y,0) | ||
dc[ans.heap[i].y]+=1 | ||
if dc[ans.heap[i].y]>maxx: | ||
maxx=dc[ans.heap[i].y] | ||
y=ans.heap[i].y | ||
return y | ||
def for_point(self,test_x,k=None): | ||
if k==None: | ||
k=self.K | ||
ans=self.KDTrees.search_knn(self.KDTrees.P,np.array(test_x),k) | ||
result=[] | ||
for i in range(ans.counter+1): | ||
if i==0: | ||
continue | ||
result.append(ans.heap[i].x) | ||
return result | ||
def pred(self,test_x,k=None): | ||
''' | ||
test_x is a N*TM matrix,and indicate TM test case | ||
you can redecide the k | ||
''' | ||
if k==None: | ||
k=self.K | ||
test_case=np.array(test_x) | ||
y=[] | ||
for i in range(test_case.shape[1]): | ||
y.append(self.predict(test_case[:,i].transpose(),k)) | ||
return y |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
from __future__ import division | ||
import numpy as np | ||
import scipy as sp | ||
def heap_judge(a,b): | ||
return a>b | ||
|
||
class Heap: | ||
def __init__(self,K=None,compare=heap_judge): | ||
''' | ||
'K' is the parameter to restrict the length of Heap | ||
!!! when K is confirmed,the Min heap contain Max K elements | ||
while Max heap contain Min K elements | ||
'compare' is the compare function which return a BOOL when pass two variable | ||
default is Max heap | ||
''' | ||
self.K=K | ||
self.compare=compare | ||
self.heap=['#'] | ||
self.counter=0 | ||
def insert(self,a): | ||
#print self.heap | ||
#if self.K!=None: | ||
# print a.x,'===' | ||
if self.K==None: | ||
self.heap.append(a) | ||
self.counter+=1 | ||
self.up(self.counter) | ||
else: | ||
if self.counter<self.K: | ||
self.heap.append(a) | ||
self.counter+=1 | ||
self.up(self.counter) | ||
else: | ||
if (not self.compare(a,self.heap[1])): | ||
self.heap[1]=a | ||
self.down(1) | ||
return | ||
def up(self,index): | ||
if (index==1): | ||
return | ||
''' | ||
print index | ||
for t in range(index+1): | ||
if t==0: | ||
continue | ||
print self.heap[t].x | ||
''' | ||
if self.compare(self.heap[index],self.heap[int(index/2)]): | ||
#fit the condition | ||
self.heap[index],self.heap[int(index/2)]=self.heap[int(index/2)],self.heap[index] | ||
self.up(int(index/2)) | ||
return | ||
def down(self,index): | ||
if 2*index>self.counter: | ||
return | ||
tar_index=0 | ||
if 2*index<self.counter: | ||
if self.compare(self.heap[index*2],self.heap[index*2+1]): | ||
tar_index=index*2 | ||
else: | ||
tar_index=index*2+1 | ||
else: | ||
tar_index=index*2 | ||
if not self.compare(self.heap[index],self.heap[tar_index]): | ||
self.heap[index],self.heap[tar_index]=self.heap[tar_index],self.heap[index] | ||
self.down(tar_index) | ||
return | ||
|
||
def delete(self,index): | ||
self.heap[index],self.heap[self.counter]=self.heap[self.counter],self.heap[index] | ||
self.heap.pop() | ||
self.counter-=1 | ||
self.down(index) | ||
pass | ||
|
||
def delete_ele(self,a): | ||
try: | ||
t=self.heap.index(a) | ||
except ValueError: | ||
t=None | ||
if t!=None: | ||
self.delete(t) | ||
return t |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
from __future__ import division | ||
import numpy as np | ||
import scipy as sp | ||
from dml.KNN.kd import KDTree | ||
from dml.KNN import KNNC | ||
X=np.array([[2,5,9,4,8,7],[3,4,6,7,1,2]]) | ||
y=np.array([2,5,9,4,8,7]) | ||
knn=KNNC(X,1,y) | ||
print knn.for_point([[2],[2]]) |
Oops, something went wrong.