Skip to content

Commit

Permalink
上传代码
Browse files Browse the repository at this point in the history
  • Loading branch information
fengdu78 committed Nov 26, 2018
0 parents commit b37ed39
Show file tree
Hide file tree
Showing 16 changed files with 5,586 additions and 0 deletions.
Binary file added code/gongzhong.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
33 changes: 33 additions & 0 deletions code/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
**代码目录**

第1章 统计学习方法概论

第2章 感知机

第3章 k近邻法

第4章 朴素贝叶斯

第5章 决策树

第6章 逻辑斯谛回归

第7章 支持向量机

第8章 提升方法

第9章 EM算法及其推广

第10章 隐马尔可夫模型

第11章 条件随机场

-----------
参考:
https://github.com/wzyonggege/statistical-learning-method
https://github.com/WenDesi/lihang_book_algorithm
https://blog.csdn.net/tudaodiaozhale

代码整理和修改:机器学习初学者 (微信公众号,ID:ai-start-com


310 changes: 310 additions & 0 deletions code/第10章 隐马尔可夫模型(HMM)/HMM.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,310 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"原文代码作者:https://blog.csdn.net/tudaodiaozhale\n",
"\n",
"中文注释制作:机器学习初学者(微信公众号:ID:ai-start-com)\n",
"\n",
"配置环境:python 3.6\n",
"\n",
"代码全部测试通过。\n",
"![gongzhong](../gongzhong.jpg)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 第10章 隐马尔可夫模型"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"class HiddenMarkov:\n",
" def forward(self, Q, V, A, B, O, PI): # 使用前向算法\n",
" N = len(Q) # 状态序列的大小\n",
" M = len(O) # 观测序列的大小\n",
" alphas = np.zeros((N, M)) # alpha值\n",
" T = M # 有几个时刻,有几个观测序列,就有几个时刻\n",
" for t in range(T): # 遍历每一时刻,算出alpha值\n",
" indexOfO = V.index(O[t]) # 找出序列对应的索引\n",
" for i in range(N):\n",
" if t == 0: # 计算初值\n",
" alphas[i][t] = PI[t][i] * B[i][indexOfO] # P176(10.15)\n",
" print('alpha1(%d)=p%db%db(o1)=%f' % (i, i, i, alphas[i][t]))\n",
" else:\n",
" alphas[i][t] = np.dot([alpha[t - 1] for alpha in alphas], [a[i] for a in A]) * B[i][\n",
" indexOfO] # 对应P176(10.16)\n",
" print('alpha%d(%d)=[sigma alpha%d(i)ai%d]b%d(o%d)=%f' % (t, i, t - 1, i, i, t, alphas[i][t]))\n",
" # print(alphas)\n",
" P = np.sum([alpha[M - 1] for alpha in alphas]) # P176(10.17)\n",
" # alpha11 = pi[0][0] * B[0][0] #代表a1(1)\n",
" # alpha12 = pi[0][1] * B[1][0] #代表a1(2)\n",
" # alpha13 = pi[0][2] * B[2][0] #代表a1(3)\n",
"\n",
" def backward(self, Q, V, A, B, O, PI): # 后向算法\n",
" N = len(Q) # 状态序列的大小\n",
" M = len(O) # 观测序列的大小\n",
" betas = np.ones((N, M)) # beta\n",
" for i in range(N):\n",
" print('beta%d(%d)=1' % (M, i))\n",
" for t in range(M - 2, -1, -1):\n",
" indexOfO = V.index(O[t + 1]) # 找出序列对应的索引\n",
" for i in range(N):\n",
" betas[i][t] = np.dot(np.multiply(A[i], [b[indexOfO] for b in B]), [beta[t + 1] for beta in betas])\n",
" realT = t + 1\n",
" realI = i + 1\n",
" print('beta%d(%d)=[sigma a%djbj(o%d)]beta%d(j)=(' % (realT, realI, realI, realT + 1, realT + 1),\n",
" end='')\n",
" for j in range(N):\n",
" print(\"%.2f*%.2f*%.2f+\" % (A[i][j], B[j][indexOfO], betas[j][t + 1]), end='')\n",
" print(\"0)=%.3f\" % betas[i][t])\n",
" # print(betas)\n",
" indexOfO = V.index(O[0])\n",
" P = np.dot(np.multiply(PI, [b[indexOfO] for b in B]), [beta[0] for beta in betas])\n",
" print(\"P(O|lambda)=\", end=\"\")\n",
" for i in range(N):\n",
" print(\"%.1f*%.1f*%.5f+\" % (PI[0][i], B[i][indexOfO], betas[i][0]), end=\"\")\n",
" print(\"0=%f\" % P)\n",
"\n",
" def viterbi(self, Q, V, A, B, O, PI):\n",
" N = len(Q) # 状态序列的大小\n",
" M = len(O) # 观测序列的大小\n",
" deltas = np.zeros((N, M))\n",
" psis = np.zeros((N, M))\n",
" I = np.zeros((1, M))\n",
" for t in range(M):\n",
" realT = t+1\n",
" indexOfO = V.index(O[t]) # 找出序列对应的索引\n",
" for i in range(N):\n",
" realI = i+1\n",
" if t == 0:\n",
" deltas[i][t] = PI[0][i] * B[i][indexOfO]\n",
" psis[i][t] = 0\n",
" print('delta1(%d)=pi%d * b%d(o1)=%.2f * %.2f=%.2f'%(realI, realI, realI, PI[0][i], B[i][indexOfO], deltas[i][t]))\n",
" print('psis1(%d)=0' % (realI))\n",
" else:\n",
" deltas[i][t] = np.max(np.multiply([delta[t-1] for delta in deltas], [a[i] for a in A])) * B[i][indexOfO]\n",
" print('delta%d(%d)=max[delta%d(j)aj%d]b%d(o%d)=%.2f*%.2f=%.5f'%(realT, realI, realT-1, realI, realI, realT, np.max(np.multiply([delta[t-1] for delta in deltas], [a[i] for a in A])), B[i][indexOfO], deltas[i][t]))\n",
" psis[i][t] = np.argmax(np.multiply([delta[t-1] for delta in deltas], [a[i] for a in A]))\n",
" print('psis%d(%d)=argmax[delta%d(j)aj%d]=%d' % (realT, realI, realT-1, realI, psis[i][t]))\n",
" print(deltas)\n",
" print(psis)\n",
" I[0][M-1] = np.argmax([delta[M-1] for delta in deltas])\n",
" print('i%d=argmax[deltaT(i)]=%d' % (M, I[0][M-1]+1))\n",
" for t in range(M-2, -1, -1):\n",
" I[0][t] = psis[int(I[0][t+1])][t+1]\n",
" print('i%d=psis%d(i%d)=%d' % (t+1, t+2, t+2, I[0][t]+1))\n",
" print(I)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 习题10.1"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"#习题10.1\n",
"Q = [1, 2, 3]\n",
"V = ['红', '白']\n",
"A = [[0.5, 0.2, 0.3], [0.3, 0.5, 0.2], [0.2, 0.3, 0.5]]\n",
"B = [[0.5, 0.5], [0.4, 0.6], [0.7, 0.3]]\n",
"# O = ['红', '白', '红', '红', '白', '红', '白', '白']\n",
"O = ['红', '白', '红', '白'] #习题10.1的例子\n",
"PI = [[0.2, 0.4, 0.4]]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"delta1(1)=pi1 * b1(o1)=0.20 * 0.50=0.10\n",
"psis1(1)=0\n",
"delta1(2)=pi2 * b2(o1)=0.40 * 0.40=0.16\n",
"psis1(2)=0\n",
"delta1(3)=pi3 * b3(o1)=0.40 * 0.70=0.28\n",
"psis1(3)=0\n",
"delta2(1)=max[delta1(j)aj1]b1(o2)=0.06*0.50=0.02800\n",
"psis2(1)=argmax[delta1(j)aj1]=2\n",
"delta2(2)=max[delta1(j)aj2]b2(o2)=0.08*0.60=0.05040\n",
"psis2(2)=argmax[delta1(j)aj2]=2\n",
"delta2(3)=max[delta1(j)aj3]b3(o2)=0.14*0.30=0.04200\n",
"psis2(3)=argmax[delta1(j)aj3]=2\n",
"delta3(1)=max[delta2(j)aj1]b1(o3)=0.02*0.50=0.00756\n",
"psis3(1)=argmax[delta2(j)aj1]=1\n",
"delta3(2)=max[delta2(j)aj2]b2(o3)=0.03*0.40=0.01008\n",
"psis3(2)=argmax[delta2(j)aj2]=1\n",
"delta3(3)=max[delta2(j)aj3]b3(o3)=0.02*0.70=0.01470\n",
"psis3(3)=argmax[delta2(j)aj3]=2\n",
"delta4(1)=max[delta3(j)aj1]b1(o4)=0.00*0.50=0.00189\n",
"psis4(1)=argmax[delta3(j)aj1]=0\n",
"delta4(2)=max[delta3(j)aj2]b2(o4)=0.01*0.60=0.00302\n",
"psis4(2)=argmax[delta3(j)aj2]=1\n",
"delta4(3)=max[delta3(j)aj3]b3(o4)=0.01*0.30=0.00220\n",
"psis4(3)=argmax[delta3(j)aj3]=2\n",
"[[0.1 0.028 0.00756 0.00189 ]\n",
" [0.16 0.0504 0.01008 0.003024]\n",
" [0.28 0.042 0.0147 0.002205]]\n",
"[[0. 2. 1. 0.]\n",
" [0. 2. 1. 1.]\n",
" [0. 2. 2. 2.]]\n",
"i4=argmax[deltaT(i)]=2\n",
"i3=psis4(i4)=2\n",
"i2=psis3(i3)=2\n",
"i1=psis2(i2)=3\n",
"[[2. 1. 1. 1.]]\n"
]
}
],
"source": [
"HMM = HiddenMarkov()\n",
"# HMM.forward(Q, V, A, B, O, PI)\n",
"# HMM.backward(Q, V, A, B, O, PI)\n",
"HMM.viterbi(Q, V, A, B, O, PI)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 习题10.2"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"Q = [1, 2, 3]\n",
"V = ['红', '白']\n",
"A = [[0.5, 0.2, 0.3], [0.3, 0.5, 0.2], [0.2, 0.3, 0.5]]\n",
"B = [[0.5, 0.5], [0.4, 0.6], [0.7, 0.3]]\n",
"O = ['红', '白', '红', '红', '白', '红', '白', '白']\n",
"PI = [[0.2, 0.3, 0.5]]"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"alpha1(0)=p0b0b(o1)=0.100000\n",
"alpha1(1)=p1b1b(o1)=0.120000\n",
"alpha1(2)=p2b2b(o1)=0.350000\n",
"alpha1(0)=[sigma alpha0(i)ai0]b0(o1)=0.078000\n",
"alpha1(1)=[sigma alpha0(i)ai1]b1(o1)=0.111000\n",
"alpha1(2)=[sigma alpha0(i)ai2]b2(o1)=0.068700\n",
"alpha2(0)=[sigma alpha1(i)ai0]b0(o2)=0.043020\n",
"alpha2(1)=[sigma alpha1(i)ai1]b1(o2)=0.036684\n",
"alpha2(2)=[sigma alpha1(i)ai2]b2(o2)=0.055965\n",
"alpha3(0)=[sigma alpha2(i)ai0]b0(o3)=0.021854\n",
"alpha3(1)=[sigma alpha2(i)ai1]b1(o3)=0.017494\n",
"alpha3(2)=[sigma alpha2(i)ai2]b2(o3)=0.033758\n",
"alpha4(0)=[sigma alpha3(i)ai0]b0(o4)=0.011463\n",
"alpha4(1)=[sigma alpha3(i)ai1]b1(o4)=0.013947\n",
"alpha4(2)=[sigma alpha3(i)ai2]b2(o4)=0.008080\n",
"alpha5(0)=[sigma alpha4(i)ai0]b0(o5)=0.005766\n",
"alpha5(1)=[sigma alpha4(i)ai1]b1(o5)=0.004676\n",
"alpha5(2)=[sigma alpha4(i)ai2]b2(o5)=0.007188\n",
"alpha6(0)=[sigma alpha5(i)ai0]b0(o6)=0.002862\n",
"alpha6(1)=[sigma alpha5(i)ai1]b1(o6)=0.003389\n",
"alpha6(2)=[sigma alpha5(i)ai2]b2(o6)=0.001878\n",
"alpha7(0)=[sigma alpha6(i)ai0]b0(o7)=0.001411\n",
"alpha7(1)=[sigma alpha6(i)ai1]b1(o7)=0.001698\n",
"alpha7(2)=[sigma alpha6(i)ai2]b2(o7)=0.000743\n",
"beta8(0)=1\n",
"beta8(1)=1\n",
"beta8(2)=1\n",
"beta7(1)=[sigma a1jbj(o8)]beta8(j)=(0.50*0.50*1.00+0.20*0.60*1.00+0.30*0.30*1.00+0)=0.460\n",
"beta7(2)=[sigma a2jbj(o8)]beta8(j)=(0.30*0.50*1.00+0.50*0.60*1.00+0.20*0.30*1.00+0)=0.510\n",
"beta7(3)=[sigma a3jbj(o8)]beta8(j)=(0.20*0.50*1.00+0.30*0.60*1.00+0.50*0.30*1.00+0)=0.430\n",
"beta6(1)=[sigma a1jbj(o7)]beta7(j)=(0.50*0.50*0.46+0.20*0.60*0.51+0.30*0.30*0.43+0)=0.215\n",
"beta6(2)=[sigma a2jbj(o7)]beta7(j)=(0.30*0.50*0.46+0.50*0.60*0.51+0.20*0.30*0.43+0)=0.248\n",
"beta6(3)=[sigma a3jbj(o7)]beta7(j)=(0.20*0.50*0.46+0.30*0.60*0.51+0.50*0.30*0.43+0)=0.202\n",
"beta5(1)=[sigma a1jbj(o6)]beta6(j)=(0.50*0.50*0.21+0.20*0.40*0.25+0.30*0.70*0.20+0)=0.116\n",
"beta5(2)=[sigma a2jbj(o6)]beta6(j)=(0.30*0.50*0.21+0.50*0.40*0.25+0.20*0.70*0.20+0)=0.110\n",
"beta5(3)=[sigma a3jbj(o6)]beta6(j)=(0.20*0.50*0.21+0.30*0.40*0.25+0.50*0.70*0.20+0)=0.122\n",
"beta4(1)=[sigma a1jbj(o5)]beta5(j)=(0.50*0.50*0.12+0.20*0.60*0.11+0.30*0.30*0.12+0)=0.053\n",
"beta4(2)=[sigma a2jbj(o5)]beta5(j)=(0.30*0.50*0.12+0.50*0.60*0.11+0.20*0.30*0.12+0)=0.058\n",
"beta4(3)=[sigma a3jbj(o5)]beta5(j)=(0.20*0.50*0.12+0.30*0.60*0.11+0.50*0.30*0.12+0)=0.050\n",
"beta3(1)=[sigma a1jbj(o4)]beta4(j)=(0.50*0.50*0.05+0.20*0.40*0.06+0.30*0.70*0.05+0)=0.028\n",
"beta3(2)=[sigma a2jbj(o4)]beta4(j)=(0.30*0.50*0.05+0.50*0.40*0.06+0.20*0.70*0.05+0)=0.026\n",
"beta3(3)=[sigma a3jbj(o4)]beta4(j)=(0.20*0.50*0.05+0.30*0.40*0.06+0.50*0.70*0.05+0)=0.030\n",
"beta2(1)=[sigma a1jbj(o3)]beta3(j)=(0.50*0.50*0.03+0.20*0.40*0.03+0.30*0.70*0.03+0)=0.015\n",
"beta2(2)=[sigma a2jbj(o3)]beta3(j)=(0.30*0.50*0.03+0.50*0.40*0.03+0.20*0.70*0.03+0)=0.014\n",
"beta2(3)=[sigma a3jbj(o3)]beta3(j)=(0.20*0.50*0.03+0.30*0.40*0.03+0.50*0.70*0.03+0)=0.016\n",
"beta1(1)=[sigma a1jbj(o2)]beta2(j)=(0.50*0.50*0.02+0.20*0.60*0.01+0.30*0.30*0.02+0)=0.007\n",
"beta1(2)=[sigma a2jbj(o2)]beta2(j)=(0.30*0.50*0.02+0.50*0.60*0.01+0.20*0.30*0.02+0)=0.007\n",
"beta1(3)=[sigma a3jbj(o2)]beta2(j)=(0.20*0.50*0.02+0.30*0.60*0.01+0.50*0.30*0.02+0)=0.006\n",
"P(O|lambda)=0.2*0.5*0.00698+0.3*0.4*0.00741+0.5*0.7*0.00647+0=0.003852\n"
]
}
],
"source": [
"HMM.forward(Q, V, A, B, O, PI)\n",
"HMM.backward(Q, V, A, B, O, PI)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit b37ed39

Please sign in to comment.