4
4
import numpy as np
5
5
from matplotlib import pyplot as plt
6
6
from scipy import io as spio
7
+ from sklearn .decomposition import pca
7
8
8
9
'''
9
- 主成分分析运行方法
10
+ 主成分分析_2维数据降维1维演示函数
10
11
'''
11
- def PCA ():
12
+ def PCA_2D ():
12
13
data_2d = spio .loadmat ("data.mat" )
13
14
X = data_2d ['X' ]
14
15
m = X .shape [0 ]
15
- plt = plot_data_2d (X ) # 显示二维的数据
16
+ plt = plot_data_2d (X , 'bo' ) # 显示二维的数据
16
17
plt .show ()
17
18
18
19
X_copy = X .copy ()
19
20
X_norm ,mu ,sigma = featureNormalize (X_copy ) # 归一化数据
20
- plot_data_2d (X_norm ) # 显示归一化后的数据
21
+ #plot_data_2d(X_norm) # 显示归一化后的数据
22
+ #plt.show()
21
23
22
- Sigma = np .dot (np .transpose (X_norm ),X_norm )/ m
23
- U ,S ,V = np .linalg .svd (Sigma )
24
+ Sigma = np .dot (np .transpose (X_norm ),X_norm )/ m # 求Sigma
25
+ U ,S ,V = np .linalg .svd (Sigma ) # 求Sigma的奇异值分解
24
26
25
- plt = plot_data_2d (X_norm )
26
- plt .plot (mu .reshape (- 1 ,1 ),mu .reshape (- 1 ,1 )+ S [0 ]* (U [:,0 ].reshape (- 1 ,1 )),'k-' )
27
+ plt = plot_data_2d (X ,'bo' ) # 显示原本数据
28
+ drawline (plt , mu , mu + S [0 ]* (U [:,0 ]), 'r-' ) # 线,为投影的方向
29
+
30
+ plt .axis ('square' )
27
31
plt .show ()
28
32
29
- K = 1
30
- Z = projectData (X_norm ,U ,K )
31
- print Z [0 ]
32
-
33
+ K = 1 # 定义降维多少维(本来是2维的,这里降维1维)
34
+ '''投影之后数据(降维之后)'''
35
+ Z = projectData (X_norm ,U ,K ) # 投影
36
+ '''恢复数据'''
37
+ X_rec = recoverData (Z ,U ,K ) # 恢复
38
+ '''作图-----原数据与恢复的数据'''
39
+ plt = plot_data_2d (X_norm ,'bo' )
40
+ plot_data_2d (X_rec ,'ro' )
41
+ for i in range (X_norm .shape [0 ]):
42
+ drawline (plt , X_norm [i ,:], X_rec [i ,:], '--k' )
43
+ plt .axis ('square' )
44
+ plt .show ()
33
45
34
46
47
+ '''主成分分析_PCA图像数据降维'''
48
+ def PCA_faceImage ():
49
+ print (u'加载图像数据.....' )
50
+ data_image = spio .loadmat ('data_faces.mat' )
51
+ X = data_image ['X' ]
52
+ display_imageData (X [0 :100 ,:])
53
+ m = X .shape [0 ] # 数据条数
54
+
55
+ print (u'运行PCA....' )
56
+ X_norm ,mu ,sigma = featureNormalize (X ) # 归一化
57
+
58
+ Sigma = np .dot (np .transpose (X_norm ),X_norm )/ m # 求Sigma
59
+ U ,S ,V = np .linalg .svd (Sigma ) # 奇异值分解
60
+ display_imageData (np .transpose (U [:,0 :36 ])) # 显示U的数据
61
+
62
+ print (u'对face数据降维.....' )
63
+ K = 100 # 降维100维(原先是32*32=1024维的)
64
+ Z = projectData (X_norm , U , K )
65
+ print (u'投影之后Z向量的大小:%d %d' % Z .shape )
66
+
67
+ print (u'显示降维之后的数据......' )
68
+ X_rec = recoverData (Z , U , K ) # 恢复数据
69
+ display_imageData (X_rec [0 :100 ,:])
70
+
71
+
72
+
73
+
35
74
# 可视化二维数据
36
- def plot_data_2d (X ):
37
- plt .plot (X [:,0 ],X [:,1 ],'bo' )
75
+ def plot_data_2d (X , marker ):
76
+ plt .plot (X [:,0 ],X [:,1 ],marker )
38
77
return plt
39
78
40
79
# 归一化数据
@@ -55,10 +94,48 @@ def featureNormalize(X):
55
94
def projectData (X_norm ,U ,K ):
56
95
Z = np .zeros ((X_norm .shape [0 ],K ))
57
96
58
- U_reduce = U [:,0 :K ]
59
- Z = np .dot (X_norm ,U_reduce )
97
+ U_reduce = U [:,0 :K ] # 取前K个
98
+ Z = np .dot (X_norm ,U_reduce )
60
99
return Z
61
100
101
+ # 画一条线
102
+ def drawline (plt ,p1 ,p2 ,line_type ):
103
+ plt .plot (np .array ([p1 [0 ],p2 [0 ]]),np .array ([p1 [1 ],p2 [1 ]]),line_type )
104
+
105
+
106
+ # 恢复数据
107
+ def recoverData (Z ,U ,K ):
108
+ X_rec = np .zeros ((Z .shape [0 ],U .shape [0 ]))
109
+ U_recude = U [:,0 :K ]
110
+ X_rec = np .dot (Z ,np .transpose (U_recude )) # 还原数据(近似)
111
+ return X_rec
112
+
113
+ # 显示图片
114
+ def display_imageData (imgData ):
115
+ sum = 0
116
+ '''
117
+ 显示100个数(若是一个一个绘制将会非常慢,可以将要画的图片整理好,放到一个矩阵中,显示这个矩阵即可)
118
+ - 初始化一个二维数组
119
+ - 将每行的数据调整成图像的矩阵,放进二维数组
120
+ - 显示即可
121
+ '''
122
+ m ,n = imgData .shape
123
+ width = np .int32 (np .round (np .sqrt (n )))
124
+ height = np .int32 (n / width );
125
+ rows_count = np .int32 (np .floor (np .sqrt (m )))
126
+ cols_count = np .int32 (np .ceil (m / rows_count ))
127
+ pad = 1
128
+ display_array = - np .ones ((pad + rows_count * (height + pad ),pad + cols_count * (width + pad )))
129
+ for i in range (rows_count ):
130
+ for j in range (cols_count ):
131
+ max_val = np .max (np .abs (imgData [sum ,:]))
132
+ display_array [pad + i * (height + pad ):pad + i * (height + pad )+ height ,pad + j * (width + pad ):pad + j * (width + pad )+ width ] = imgData [sum ,:].reshape (height ,width ,order = "F" )/ max_val # order=F指定以列优先,在matlab中是这样的,python中需要指定,默认以行
133
+ sum += 1
134
+
135
+ plt .imshow (display_array ,cmap = 'gray' ) #显示灰度图像
136
+ plt .axis ('off' )
137
+ plt .show ()
62
138
63
139
if __name__ == "__main__" :
64
- PCA ()
140
+ PCA_2D ()
141
+ PCA_faceImage ()
0 commit comments