Skip to content

Commit 8b5da44

Browse files
committed
add PCA code
1 parent 06470cd commit 8b5da44

File tree

2 files changed

+94
-17
lines changed

2 files changed

+94
-17
lines changed

PCA/PCA.py

Lines changed: 94 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,37 +4,76 @@
44
import numpy as np
55
from matplotlib import pyplot as plt
66
from scipy import io as spio
7+
from sklearn.decomposition import pca
78

89
'''
9-
主成分分析运行方法
10+
主成分分析_2维数据降维1维演示函数
1011
'''
11-
def PCA():
12+
def PCA_2D():
1213
data_2d = spio.loadmat("data.mat")
1314
X = data_2d['X']
1415
m = X.shape[0]
15-
plt = plot_data_2d(X) # 显示二维的数据
16+
plt = plot_data_2d(X,'bo') # 显示二维的数据
1617
plt.show()
1718

1819
X_copy = X.copy()
1920
X_norm,mu,sigma = featureNormalize(X_copy) # 归一化数据
20-
plot_data_2d(X_norm) # 显示归一化后的数据
21+
#plot_data_2d(X_norm) # 显示归一化后的数据
22+
#plt.show()
2123

22-
Sigma = np.dot(np.transpose(X_norm),X_norm)/m
23-
U,S,V = np.linalg.svd(Sigma)
24+
Sigma = np.dot(np.transpose(X_norm),X_norm)/m # 求Sigma
25+
U,S,V = np.linalg.svd(Sigma) # 求Sigma的奇异值分解
2426

25-
plt = plot_data_2d(X_norm)
26-
plt.plot(mu.reshape(-1,1),mu.reshape(-1,1)+S[0]*(U[:,0].reshape(-1,1)),'k-')
27+
plt = plot_data_2d(X,'bo') # 显示原本数据
28+
drawline(plt, mu, mu+S[0]*(U[:,0]), 'r-') # 线,为投影的方向
29+
30+
plt.axis('square')
2731
plt.show()
2832

29-
K = 1
30-
Z = projectData(X_norm,U,K)
31-
print Z[0]
32-
33+
K = 1 # 定义降维多少维(本来是2维的,这里降维1维)
34+
'''投影之后数据(降维之后)'''
35+
Z = projectData(X_norm,U,K) # 投影
36+
'''恢复数据'''
37+
X_rec = recoverData(Z,U,K) # 恢复
38+
'''作图-----原数据与恢复的数据'''
39+
plt = plot_data_2d(X_norm,'bo')
40+
plot_data_2d(X_rec,'ro')
41+
for i in range(X_norm.shape[0]):
42+
drawline(plt, X_norm[i,:], X_rec[i,:], '--k')
43+
plt.axis('square')
44+
plt.show()
3345

3446

47+
'''主成分分析_PCA图像数据降维'''
48+
def PCA_faceImage():
49+
print (u'加载图像数据.....')
50+
data_image = spio.loadmat('data_faces.mat')
51+
X = data_image['X']
52+
display_imageData(X[0:100,:])
53+
m = X.shape[0] # 数据条数
54+
55+
print (u'运行PCA....')
56+
X_norm,mu,sigma = featureNormalize(X) # 归一化
57+
58+
Sigma = np.dot(np.transpose(X_norm),X_norm)/m # 求Sigma
59+
U,S,V = np.linalg.svd(Sigma) # 奇异值分解
60+
display_imageData(np.transpose(U[:,0:36])) # 显示U的数据
61+
62+
print (u'对face数据降维.....')
63+
K = 100 # 降维100维(原先是32*32=1024维的)
64+
Z = projectData(X_norm, U, K)
65+
print (u'投影之后Z向量的大小:%d %d' %Z.shape)
66+
67+
print (u'显示降维之后的数据......')
68+
X_rec = recoverData(Z, U, K) # 恢复数据
69+
display_imageData(X_rec[0:100,:])
70+
71+
72+
73+
3574
# 可视化二维数据
36-
def plot_data_2d(X):
37-
plt.plot(X[:,0],X[:,1],'bo')
75+
def plot_data_2d(X,marker):
76+
plt.plot(X[:,0],X[:,1],marker)
3877
return plt
3978

4079
# 归一化数据
@@ -55,10 +94,48 @@ def featureNormalize(X):
5594
def projectData(X_norm,U,K):
5695
Z = np.zeros((X_norm.shape[0],K))
5796

58-
U_reduce = U[:,0:K]
59-
Z = np.dot(X_norm,U_reduce)
97+
U_reduce = U[:,0:K] # 取前K个
98+
Z = np.dot(X_norm,U_reduce)
6099
return Z
61100

101+
# 画一条线
102+
def drawline(plt,p1,p2,line_type):
103+
plt.plot(np.array([p1[0],p2[0]]),np.array([p1[1],p2[1]]),line_type)
104+
105+
106+
# 恢复数据
107+
def recoverData(Z,U,K):
108+
X_rec = np.zeros((Z.shape[0],U.shape[0]))
109+
U_recude = U[:,0:K]
110+
X_rec = np.dot(Z,np.transpose(U_recude)) # 还原数据(近似)
111+
return X_rec
112+
113+
# 显示图片
114+
def display_imageData(imgData):
115+
sum = 0
116+
'''
117+
显示100个数(若是一个一个绘制将会非常慢,可以将要画的图片整理好,放到一个矩阵中,显示这个矩阵即可)
118+
- 初始化一个二维数组
119+
- 将每行的数据调整成图像的矩阵,放进二维数组
120+
- 显示即可
121+
'''
122+
m,n = imgData.shape
123+
width = np.int32(np.round(np.sqrt(n)))
124+
height = np.int32(n/width);
125+
rows_count = np.int32(np.floor(np.sqrt(m)))
126+
cols_count = np.int32(np.ceil(m/rows_count))
127+
pad = 1
128+
display_array = -np.ones((pad+rows_count*(height+pad),pad+cols_count*(width+pad)))
129+
for i in range(rows_count):
130+
for j in range(cols_count):
131+
max_val = np.max(np.abs(imgData[sum,:]))
132+
display_array[pad+i*(height+pad):pad+i*(height+pad)+height,pad+j*(width+pad):pad+j*(width+pad)+width] = imgData[sum,:].reshape(height,width,order="F")/max_val # order=F指定以列优先,在matlab中是这样的,python中需要指定,默认以行
133+
sum += 1
134+
135+
plt.imshow(display_array,cmap='gray') #显示灰度图像
136+
plt.axis('off')
137+
plt.show()
62138

63139
if __name__ == "__main__":
64-
PCA()
140+
PCA_2D()
141+
PCA_faceImage()

PCA/data1.mat

995 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)