Skip to content

Commit 3bc2172

Browse files
committed
add code anomaly detection异常检测代码
1 parent 9f46fc7 commit 3bc2172

File tree

1 file changed

+53
-8
lines changed

1 file changed

+53
-8
lines changed

AnomalyDetection/AnomalyDetection.py

Lines changed: 53 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,25 @@ def anomalyDetection_example():
1313
plt = display_2d_data(X, 'bx')
1414
plt.title("origin data")
1515
plt.show()
16+
'''多元高斯分布函数,并可视化拟合的边界'''
17+
mu,sigma2 = estimateGaussian(X) # 参数估计(求均值和方差)
18+
#print mu,sigma2
19+
p = multivariateGaussian(X,mu,sigma2) # 多元高斯分布函数
20+
#print p
21+
visualizeFit(X,mu,sigma2) # 显示图像
1622

17-
mu,sigma2 = estimateGaussian(X)
18-
print mu,sigma2
19-
p = multivariateGaussian(X,mu,sigma2)
20-
print p
23+
'''选择异常点(在交叉验证CV上训练得到最好的epsilon)'''
24+
Xval = data['Xval']
25+
yval = data['yval']
26+
pval = multivariateGaussian(Xval, mu, sigma2) # 计算CV上的概率密度值
27+
epsilon,F1 = selectThreshold(yval,pval) # 选择最优的epsilon临界值
28+
print u'在CV上得到的最好的epsilon是:%e'%epsilon
29+
print u'对应的F1Score值为:%f'%F1
30+
outliers = np.where(p<epsilon) # 找到小于临界值的异常点,并作图
31+
plt.plot(X[outliers,0],X[outliers,1],'o',markeredgecolor='r',markerfacecolor='w',markersize=10.)
32+
plt = display_2d_data(X, 'bx')
33+
plt.show()
2134

22-
visualizeFit(X,mu,sigma2)
2335

2436

2537

@@ -44,16 +56,49 @@ def multivariateGaussian(X,mu,Sigma2):
4456
k = len(mu)
4557
if (Sigma2.shape[0]>1):
4658
Sigma2 = np.diag(Sigma2)
47-
59+
'''多元高斯分布函数'''
4860
X = X-mu
4961
argu = (2*np.pi)**(-k/2)*np.linalg.det(Sigma2)**(-0.5)
5062
p = argu*np.exp(-0.5*np.sum(np.dot(X,np.linalg.inv(Sigma2))*X,axis=1)) # axis表示每行
5163
return p
5264

5365
# 可视化边界
5466
def visualizeFit(X,mu,sigma2):
55-
X1,X2 = np.meshgrid(0,0.5,35)
56-
Z = multivariateGaussian(np.vstack((X1,X2)), mu, Sigma2)
67+
x = np.arange(0, 36, 0.5) # 0-36,步长0.5
68+
y = np.arange(0, 36, 0.5)
69+
X1,X2 = np.meshgrid(x,y) # 要画等高线,所以meshgird
70+
Z = multivariateGaussian(np.hstack((X1.reshape(-1,1),X2.reshape(-1,1))), mu, sigma2) # 计算对应的高斯分布函数
71+
Z = Z.reshape(X1.shape) # 调整形状
72+
plt.plot(X[:,0],X[:,1],'bx')
73+
74+
if np.sum(np.isinf(Z).astype(float)) == 0: # 如果计算的为无穷,就不用画了
75+
# plt.contourf(X1,X2,Z,10.**np.arange(-20, 0, 3),linewidth=.5)
76+
CS = plt.contour(X1,X2,Z,10.**np.arange(-20, 0, 3),color='black',linewidth=.5) # 画等高线,Z的值在10.**np.arange(-20, 0, 3)
77+
#plt.clabel(CS)
78+
79+
plt.show()
80+
81+
# 选择最优的epsilon,即:使F1Score最大
82+
def selectThreshold(yval,pval):
83+
'''初始化所需变量'''
84+
bestEpsilon = 0.
85+
bestF1 = 0.
86+
F1 = 0.
87+
step = (np.max(pval)-np.min(pval))/1000
88+
'''计算'''
89+
for epsilon in np.arange(np.min(pval),np.max(pval),step):
90+
cvPrecision = pval<epsilon
91+
tp = np.sum((cvPrecision == 1) & (yval == 1)).astype(float) # sum求和是int型的,需要转为float
92+
fp = np.sum((cvPrecision == 1) & (yval == 0)).astype(float)
93+
fn = np.sum((cvPrecision == 1) & (yval == 0)).astype(float)
94+
precision = tp/(tp+fp) # 精准度
95+
recision = tp/(tp+fn) # 召回率
96+
F1 = (2*precision*recision)/(precision+recision) # F1Score计算公式
97+
if F1 > bestF1: # 修改最优的F1 Score
98+
bestF1 = F1
99+
bestEpsilon = epsilon
100+
return bestEpsilon,bestF1
101+
57102

58103

59104
if __name__ == '__main__':

0 commit comments

Comments
 (0)