Skip to content

Commit 2a23263

Browse files
committed
📝 使用scikit-learn 中的PCA实现降维
1 parent 6860053 commit 2a23263

File tree

6 files changed

+149
-0
lines changed

6 files changed

+149
-0
lines changed

AnomalyDetection/AnomalyDetection.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#-*- coding: utf-8 -*-
2+
# Author: Bob
3+
# Date: 2016.12.22

AnomalyDetection/data1.mat

9.28 KB
Binary file not shown.

AnomalyDetection/data2.mat

91.3 KB
Binary file not shown.

PCA/PCA_scikit-learn.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
#-*- coding: utf-8 -*-
2+
# Author:bob
3+
# Date:2016.12.22
4+
import numpy as np
5+
from matplotlib import pyplot as plt
6+
from scipy import io as spio
7+
from sklearn.decomposition import pca
8+
from sklearn.preprocessing import StandardScaler
9+
10+
'''二维降为一维主运行函数'''
11+
def PCA_2d_example():
12+
'''加载数据并作图'''
13+
data = spio.loadmat('data.mat')
14+
X = data['X']
15+
plt = plot_data_2d(X,'bo')
16+
plt.axis('square')
17+
plt.title('original data')
18+
plt.show()
19+
'''归一化数据并作图'''
20+
scaler = StandardScaler()
21+
scaler.fit(X)
22+
x_train = scaler.transform(X)
23+
24+
plot_data_2d(x_train, 'bo')
25+
plt.axis('square')
26+
plt.title('scaler data')
27+
plt.show()
28+
29+
'''拟合数据'''
30+
K=1 # 要降的维度
31+
model = pca.PCA(n_components=K).fit(x_train) # 拟合数据,n_components定义要降的维度
32+
Z = model.transform(x_train) # transform就会执行降维操作
33+
34+
'''数据恢复并作图'''
35+
Ureduce = model.components_ # 得到降维用的Ureduce
36+
x_rec = np.dot(Z,Ureduce) # 数据恢复
37+
38+
plot_data_2d(x_rec,'bo')
39+
plt.plot()
40+
plt.axis('square')
41+
plt.title('recover data')
42+
plt.show()
43+
44+
def PCA_face_example():
45+
'''加载数据并显示'''
46+
image_data = spio.loadmat('data_faces.mat')
47+
X = image_data['X']
48+
display_imageData(X[0:100,:]) # 显示100个最初图像
49+
50+
'''归一化数据'''
51+
scaler = StandardScaler()
52+
scaler.fit(X)
53+
x_train = scaler.transform(X)
54+
55+
'''拟合模型'''
56+
K=100
57+
model = pca.PCA(n_components=K).fit(x_train)
58+
Z = model.transform(x_train)
59+
Ureduce = model.components_
60+
61+
display_imageData(Ureduce[0:36,:]) # 可视化部分U数据
62+
x_rec = np.dot(Z,Ureduce)
63+
64+
display_imageData(x_rec[0:100,:]) # 显示恢复的数据
65+
66+
67+
68+
69+
# 可视化二维数据
70+
def plot_data_2d(X,marker):
71+
plt.plot(X[:,0],X[:,1],marker)
72+
return plt
73+
74+
# 显示图片
75+
def display_imageData(imgData):
76+
sum = 0
77+
'''
78+
显示100个数(若是一个一个绘制将会非常慢,可以将要画的图片整理好,放到一个矩阵中,显示这个矩阵即可)
79+
- 初始化一个二维数组
80+
- 将每行的数据调整成图像的矩阵,放进二维数组
81+
- 显示即可
82+
'''
83+
m,n = imgData.shape
84+
width = np.int32(np.round(np.sqrt(n)))
85+
height = np.int32(n/width);
86+
rows_count = np.int32(np.floor(np.sqrt(m)))
87+
cols_count = np.int32(np.ceil(m/rows_count))
88+
pad = 1
89+
display_array = -np.ones((pad+rows_count*(height+pad),pad+cols_count*(width+pad)))
90+
for i in range(rows_count):
91+
for j in range(cols_count):
92+
max_val = np.max(np.abs(imgData[sum,:]))
93+
display_array[pad+i*(height+pad):pad+i*(height+pad)+height,pad+j*(width+pad):pad+j*(width+pad)+width] = imgData[sum,:].reshape(height,width,order="F")/max_val # order=F指定以列优先,在matlab中是这样的,python中需要指定,默认以行
94+
sum += 1
95+
96+
plt.imshow(display_array,cmap='gray') #显示灰度图像
97+
plt.axis('off')
98+
plt.show()
99+
100+
101+
if __name__ == '__main__':
102+
#PCA_2d_example()
103+
PCA_face_example()

formula/PCA.wmf

25.9 KB
Binary file not shown.

readme.md

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -988,6 +988,49 @@ def runKMeans(X,initial_centroids,max_iters,plot_process):
988988
- 恢复数据
989989
![enter description here][48]
990990

991+
### 9、[使用scikit-learn库中的PCA实现降维](/PCA/PCA.py_scikit-learn.py)
992+
- 导入需要的包:
993+
```
994+
#-*- coding: utf-8 -*-
995+
# Author:bob
996+
# Date:2016.12.22
997+
import numpy as np
998+
from matplotlib import pyplot as plt
999+
from scipy import io as spio
1000+
from sklearn.decomposition import pca
1001+
from sklearn.preprocessing import StandardScaler
1002+
```
1003+
- 归一化数据
1004+
```
1005+
'''归一化数据并作图'''
1006+
scaler = StandardScaler()
1007+
scaler.fit(X)
1008+
x_train = scaler.transform(X)
1009+
```
1010+
- 使用PCA模型拟合数据,并降维
1011+
```
1012+
'''拟合数据'''
1013+
K=1 # 要降的维度
1014+
model = pca.PCA(n_components=K).fit(x_train) # 拟合数据,n_components定义要降的维度
1015+
Z = model.transform(x_train) # transform就会执行降维操作
1016+
```
1017+
- `n_components`对应要将的维度
1018+
1019+
- 数据恢复
1020+
```
1021+
'''数据恢复并作图'''
1022+
Ureduce = model.components_ # 得到降维用的Ureduce
1023+
x_rec = np.dot(Z,Ureduce) # 数据恢复
1024+
```
1025+
- `model.components_`会得到降维使用的`U`矩阵
1026+
1027+
1028+
---------------------------------------------------------------
1029+
1030+
1031+
### 七、异常检测 Anomaly Detection
1032+
1033+
9911034

9921035
[1]: ./images/LinearRegression_01.png "LinearRegression_01.png"
9931036
[2]: ./images/LogisticRegression_01.png "LogisticRegression_01.png"

0 commit comments

Comments
 (0)