Skip to content

Commit 3a63f13

Browse files
committed
SVM
1 parent d81f593 commit 3a63f13

File tree

5 files changed

+49
-3
lines changed

5 files changed

+49
-3
lines changed

K-Menas/K-Menas.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import numpy as np
2+
from matplotlib import pyplot as plt
3+
from scipy import io as spio
4+
5+
6+
def KMeans():
7+
data = spio.loadmat("data.mat")
8+
X = data['X']
9+
K = 3 # 总类数
10+
initial_centroids = np.array([[3,3],[6,2],[8,5]]) # 初始化类中心
11+
idx = findClosestCentroids(X,initial_centroids) # 找到每条数据属于哪个类
12+
13+
centroids = computerCentroids(X,idx,K) # 重新计算类中心
14+
print centroids
15+
16+
# 找到每条数据距离哪个类中心最近
17+
def findClosestCentroids(X,initial_centroids):
18+
m = X.shape[0] # 数据条数
19+
K = initial_centroids.shape[0] # 类的总数
20+
dis = np.zeros((m,K)) # 存储计算每个点分别到K个类的距离
21+
idx = np.zeros((m,1)) # 要返回的每条数据属于哪个类
22+
23+
'''计算每个点到每个类中心的距离'''
24+
for i in range(m):
25+
for j in range(K):
26+
dis[i,j] = np.dot((X[i,:]-initial_centroids[j,:]).reshape(1,-1),(X[i,:]-initial_centroids[j,:]).reshape(-1,1))
27+
28+
'''返回dis每一行的最小值对应的列号,即为对应的类别'''
29+
idx = np.array(np.where(dis[0,:] == np.min(dis, axis=1)[0]))
30+
for i in np.arange(1, m):
31+
t = np.array(np.where(dis[i,:] == np.min(dis, axis=1)[i]))
32+
idx = np.vstack((idx,t))
33+
return idx
34+
35+
36+
# 计算类中心
37+
def computerCentroids(X,idx,K):
38+
n = X.shape[1]
39+
centroids = np.zeros((K,n))
40+
for i in range(K):
41+
centroids[i,:] = np.mean(X[np.array(np.where(idx==i)),:], axis=0).reshape(1,-1) # axis=0为每一列
42+
return centroids
43+
44+
if __name__ == "__main__":
45+
KMeans()
46+

K-Menas/bird.mat

44.5 KB
Binary file not shown.

K-Menas/bird.png

32.3 KB
Loading

K-Menas/data.mat

4.67 KB
Binary file not shown.

readme.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -644,13 +644,13 @@ def predict(Theta1,Theta2,X):
644644
![\cos t({h_\theta }(x),y) = \left\{ {\begin{array}{c} { - \log ({h_\theta }(x))} \\ { - \log (1 - {h_\theta }(x))} \end{array} \begin{array}{c} {y = 1} \\ {y = 0} \end{array} } \right.](http://chart.apis.google.com/chart?cht=tx&chs=1x0&chf=bg,s,FFFFFF00&chco=000000&chl=%5Ccos%20t%28%7Bh_%5Ctheta%20%7D%28x%29%2Cy%29%20%3D%20%5Cleft%5C%7B%20%7B%5Cbegin%7Barray%7D%7Bc%7D%20%20%20%20%7B%20-%20%5Clog%20%28%7Bh_%5Ctheta%20%7D%28x%29%29%7D%20%5C%5C%20%20%20%20%7B%20-%20%5Clog%20%281%20-%20%7Bh_%5Ctheta%20%7D%28x%29%29%7D%20%20%5Cend%7Barray%7D%20%5Cbegin%7Barray%7D%7Bc%7D%20%20%20%20%7By%20%3D%201%7D%20%5C%5C%20%20%20%20%7By%20%3D%200%7D%20%20%5Cend%7Barray%7D%20%7D%20%5Cright.)
645645
其中:![{h_\theta }({\text{z}}) = \frac{1}{{1 + {e^{ - z}}}}](http://chart.apis.google.com/chart?cht=tx&chs=1x0&chf=bg,s,FFFFFF00&chco=000000&chl=%7Bh_%5Ctheta%20%7D%28%7B%5Ctext%7Bz%7D%7D%29%20%3D%20%5Cfrac%7B1%7D%7B%7B1%20%2B%20%7Be%5E%7B%20-%20z%7D%7D%7D%7D)![z = {\theta ^T}x](http://chart.apis.google.com/chart?cht=tx&chs=1x0&chf=bg,s,FFFFFF00&chco=000000&chl=z%20%3D%20%7B%5Ctheta%20%5ET%7Dx)
646646
- 如图所示,如果`y=1``cost`代价函数如图所示
647-
![enter description here][24]
647+
![enter description here][24]
648648
我们想让![{\theta ^T}x > > 0](http://chart.apis.google.com/chart?cht=tx&chs=1x0&chf=bg,s,FFFFFF00&chco=000000&chl=%7B%5Ctheta%20%5ET%7Dx%20%3E%20%20%3E%200),即`z>>0`,这样的话`cost`代价函数才会趋于最小(这是我们想要的),所以用途中**红色**的函数![\cos {t_1}(z)](http://chart.apis.google.com/chart?cht=tx&chs=1x0&chf=bg,s,FFFFFF00&chco=000000&chl=%5Ccos%20%7Bt_1%7D%28z%29)代替逻辑回归中的cost
649649
-`y=0`时同样,用![\cos {t_0}(z)](http://chart.apis.google.com/chart?cht=tx&chs=1x0&chf=bg,s,FFFFFF00&chco=000000&chl=%5Ccos%20%7Bt_0%7D%28z%29)代替
650650
![enter description here][25]
651651
- 最终得到的代价函数为:
652652
![J(\theta ) = C\sum\limits_{i = 1}^m {[{y^{(i)}}\cos {t_1}({\theta ^T}{x^{(i)}}) + (1 - {y^{(i)}})\cos {t_0}({\theta ^T}{x^{(i)}})} ] + \frac{1}{2}\sum\limits_{j = 1}^{\text{n}} {\theta _j^2} ](http://chart.apis.google.com/chart?cht=tx&chs=1x0&chf=bg,s,FFFFFF00&chco=000000&chl=J%28%5Ctheta%20%29%20%3D%20C%5Csum%5Climits_%7Bi%20%3D%201%7D%5Em%20%7B%5B%7By%5E%7B%28i%29%7D%7D%5Ccos%20%7Bt_1%7D%28%7B%5Ctheta%20%5ET%7D%7Bx%5E%7B%28i%29%7D%7D%29%20%2B%20%281%20-%20%7By%5E%7B%28i%29%7D%7D%29%5Ccos%20%7Bt_0%7D%28%7B%5Ctheta%20%5ET%7D%7Bx%5E%7B%28i%29%7D%7D%29%7D%20%5D%20%2B%20%5Cfrac%7B1%7D%7B2%7D%5Csum%5Climits_%7Bj%20%3D%201%7D%5E%7B%5Ctext%7Bn%7D%7D%20%7B%5Ctheta%20_j%5E2%7D%20)
653-
最后我们想要![\mathop {\min }\limits_\theta J(\theta )](http://chart.apis.google.com/chart?cht=tx&chs=1x0&chf=bg,s,FFFFFF00&chco=000000&chl=%5Cmathop%20%7B%5Cmin%20%7D%5Climits_%5Ctheta%20%20J%28%5Ctheta%20%29)
653+
最后我们想要![{\min }\limits_\theta J(\theta )](http://chart.apis.google.com/chart?cht=tx&chs=1x0&chf=bg,s,FFFFFF00&chco=000000&chl=%5Cmathop%20%7B%5Cmin%20%7D%5Climits_%5Ctheta%20%20J%28%5Ctheta%20%29)
654654
- 之前我们逻辑回归中的代价函数为:
655655
![J(\theta ) = - \frac{1}{m}\sum\limits_{i = 1}^m {[{y^{(i)}}\log ({h_\theta }({x^{(i)}}) + (1 - } {y^{(i)}})\log (1 - {h_\theta }({x^{(i)}})] + \frac{\lambda }{{2m}}\sum\limits_{j = 1}^n {\theta _j^2} ](http://chart.apis.google.com/chart?cht=tx&chs=1x0&chf=bg,s,FFFFFF00&chco=000000&chl=J%28%5Ctheta%20%29%20%3D%20%20-%20%5Cfrac%7B1%7D%7Bm%7D%5Csum%5Climits_%7Bi%20%3D%201%7D%5Em%20%7B%5B%7By%5E%7B%28i%29%7D%7D%5Clog%20%28%7Bh_%5Ctheta%20%7D%28%7Bx%5E%7B%28i%29%7D%7D%29%20%2B%20%281%20-%20%7D%20%7By%5E%7B%28i%29%7D%7D%29%5Clog%20%281%20-%20%7Bh_%5Ctheta%20%7D%28%7Bx%5E%7B%28i%29%7D%7D%29%5D%20%2B%20%5Cfrac%7B%5Clambda%20%7D%7B%7B2m%7D%7D%5Csum%5Climits_%7Bj%20%3D%201%7D%5En%20%7B%5Ctheta%20_j%5E2%7D%20)
656656
可以认为这里的![C = \frac{m}{\lambda }](http://chart.apis.google.com/chart?cht=tx&chs=1x0&chf=bg,s,FFFFFF00&chco=000000&chl=C%20%3D%20%5Cfrac%7Bm%7D%7B%5Clambda%20%7D),只是表达形式问题,这里`C`的值越大,SVM的决策边界的`margin`也越大,下面会说明
@@ -662,7 +662,7 @@ def predict(Theta1,Theta2,X):
662662
- ![u = \left[ {\begin{array}{c} {{u_1}} \\ {{u_2}} \end{array} } \right]](http://chart.apis.google.com/chart?cht=tx&chs=1x0&chf=bg,s,FFFFFF00&chco=000000&chl=u%20%3D%20%5Cleft%5B%20%7B%5Cbegin%7Barray%7D%7Bc%7D%20%20%20%20%7B%7Bu_1%7D%7D%20%5C%5C%20%20%20%20%7B%7Bu_2%7D%7D%20%20%5Cend%7Barray%7D%20%7D%20%5Cright%5D)![v = \left[ {\begin{array}{c} {{v_1}} \\ {{v_2}} \end{array} } \right]](http://chart.apis.google.com/chart?cht=tx&chs=1x0&chf=bg,s,FFFFFF00&chco=000000&chl=v%20%3D%20%5Cleft%5B%20%7B%5Cbegin%7Barray%7D%7Bc%7D%20%20%20%20%7B%7Bv_1%7D%7D%20%5C%5C%20%20%20%20%7B%7Bv_2%7D%7D%20%20%5Cend%7Barray%7D%20%7D%20%5Cright%5D)
663663
- ![||u||](http://chart.apis.google.com/chart?cht=tx&chs=1x0&chf=bg,s,FFFFFF00&chco=000000&chl=%7C%7Cu%7C%7C)表示`u`**欧几里得范数**(欧式范数),![||u||{\text{ = }}\sqrt {{\text{u}}_1^2 + u_2^2} ](http://chart.apis.google.com/chart?cht=tx&chs=1x0&chf=bg,s,FFFFFF00&chco=000000&chl=%7C%7Cu%7C%7C%7B%5Ctext%7B%20%3D%20%7D%7D%5Csqrt%20%7B%7B%5Ctext%7Bu%7D%7D_1%5E2%20%2B%20u_2%5E2%7D%20)
664664
- `向量V``向量u`上的投影的长度记为`p`,则:向量内积:
665-
![{{\text{u}}^T}v = p||u|| = {u_1}{v_1} + {u_2}{v_2}](http://chart.apis.google.com/chart?cht=tx&chs=1x0&chf=bg,s,FFFFFF00&chco=000000&chl=%7B%7B%5Ctext%7Bu%7D%7D%5ET%7Dv%20%3D%20p%7C%7Cu%7C%7C%20%3D%20%7Bu_1%7D%7Bv_1%7D%20%2B%20%7Bu_2%7D%7Bv_2%7D)
665+
![{{\text{u}}^T}v = p||u|| = {u_1}{v_1} + {u_2}{v_2}](http://chart.apis.google.com/chart?cht=tx&chs=1x0&chf=bg,s,FFFFFF00&chco=000000&chl=%7B%7B%5Ctext%7Bu%7D%7D%5ET%7Dv%20%3D%20p%7C%7Cu%7C%7C%20%3D%20%7Bu_1%7D%7Bv_1%7D%20%2B%20%7Bu_2%7D%7Bv_2%7D)
666666
![enter description here][27]
667667
根据向量夹角公式推导一下即可。![\cos \theta = \frac{{\overrightarrow {\text{u}} \overrightarrow v }}{{|\overrightarrow {\text{u}} ||\overrightarrow v |}}](http://chart.apis.google.com/chart?cht=tx&chs=1x0&chf=bg,s,FFFFFF00&chco=000000&chl=%5Ccos%20%5Ctheta%20%20%3D%20%5Cfrac%7B%7B%5Coverrightarrow%20%7B%5Ctext%7Bu%7D%7D%20%5Coverrightarrow%20v%20%7D%7D%7B%7B%7C%5Coverrightarrow%20%7B%5Ctext%7Bu%7D%7D%20%7C%7C%5Coverrightarrow%20v%20%7C%7D%7D)
668668

0 commit comments

Comments
 (0)