Skip to content

Commit

Permalink
Xiaowu/20220330 (microsoft#718)
Browse files Browse the repository at this point in the history
* update

* up

* duao

* update
  • Loading branch information
xiaowuhu authored Apr 1, 2022
1 parent 86a7979 commit 92bdba6
Show file tree
Hide file tree
Showing 16 changed files with 644 additions and 9 deletions.
15 changes: 11 additions & 4 deletions 基础教程/A7-强化学习/draft/三门问题/ThreeDoors.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,23 @@ def try_once(n_doors: int):

return no_change_but_win, win_after_change

if __name__ == "__main__":

def try_n_doors(n_doors):
total = 100000
n_doors = 8
n_win_0 = 0
n_win_1 = 0
for i in range(total):
no_change_but_win, win_after_change = try_once(n_doors)
n_win_0 += win_after_change
n_win_1 += no_change_but_win

print(str.format("{0}扇门:", n_doors))
print(n_win_0, n_win_1)
print(str.format("更换选择而中奖的概率={0} \n不更换而中奖的概率={1}",
print(str.format("更换选择而中奖的概率={0} \n不换选择而中奖的概率={1}",
n_win_0/total, n_win_1/total))

if __name__ == "__main__":
n_doors = 3
try_n_doors(n_doors)

n_doors = 8
try_n_doors(n_doors)
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
10 changes: 5 additions & 5 deletions 基础教程/A7-强化学习/draft/三门问题/三门问题.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

参赛者面前有三扇关闭着的门,其中一扇的后面是一辆汽车,选中后面有车的那扇门就可以赢得该汽车,而另外两扇门后面则各藏有一只山羊。当参赛者选定了一扇门,但未去开启它的时候,主持人会开启剩下两扇门中的一扇,露出其中一只山羊。主持人其后会问参赛者要不要换另一扇仍然关上的门。问题是:换另一扇门是否会增加参赛者赢得汽车的机率?

<img src="./images/ThreeDoors1.png" width="500">
<img src="./img/ThreeDoors1.png" width="500">

图1

Expand Down Expand Up @@ -102,7 +102,7 @@

另外,有些读者看表格可能有困难,所以我们把这些情况变成概率数字放在图 2 中。

<img src="./images/ThreeDoors2.png" width="600">
<img src="./img/ThreeDoors2.png" width="600">

图 2

Expand Down Expand Up @@ -263,7 +263,7 @@ if __name__ == "__main__":
```
66259 33741
更换选择而中奖的概率=0.66259
不更换而中奖的概率=0.33741
不换选择而中奖的概率=0.33741
```

更换选择中奖的概率约等于 $\frac{2}{3}$,不更换选择中奖的概率约等于 $\frac{1}{3}$,与穷举法和理论推导结论一致。
Expand All @@ -273,7 +273,7 @@ if __name__ == "__main__":
```
14554 12480
更换选择而中奖的概率=0.14554
不更换而中奖的概率=0.1248
不换选择而中奖的概率=0.1248
```

比如当 n_doors=8 时,按公式 2,结果是 $0.1248 \approx \frac{1}{8}$;按公式 3,结果是 $0.14554 \approx \frac{8-1}{8 \times (8-2)}=\frac{7}{48}$。
Expand All @@ -288,4 +288,4 @@ if __name__ == "__main__":
1. 强化学习中,基于模型的理论部分,也是以概率论为基础的,可以先热复习一下热身。
2. 图 2 中,其实是由“策略-动作-状态”组成的,这与强化学习的理论基本一致。
3. 使用代码模拟,也是一种“聪明的笨办法”,利用计算机快速模拟实际环境的交互,这也是强化学习的重要方法。
4. 用代码理解公式及理论知识,是程序员的一种重要技能
4. 对于没有受过训练的可以直接阅读并理解公式推导的读者来说,用代码理解公式及理论知识,是一种重要手段

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import numpy as np
import os
import json

# 作为目标的转移概率矩阵
P = np.array([
[0.1, 0.3, 0.0, 0.6],
[0.8, 0.0, 0.2, 0.0],
[0.0, 0.9, 0.1, 0.0],
[0.0, 0.3, 0.3, 0.4]
])

# 采样
def sample(n_samples, n_states, start_state):
states = [i for i in range(n_states)]
# 状态转移序列
X = []
# 开始采样
X.append(start_state)
current = start_state
for i in range(n_samples):
next = np.random.choice(states, p=P[current])
X.append(next)
current = next
#endfor
return X

def save_file(X, file_name):
# 把0123变成ABCD
Y = [chr(x+65) for x in X]
#print(Y)
# 保存Y到文件
json_list = json.dumps(Y)
file = open(file_name, "w")
file.write(json_list)
file.close()


if __name__ == "__main__":
# 采样数量
n_samples = 10000
# 状态空间
n_states = 4
# 起始状态(从0开始)
start_state = 1
X = sample(n_samples, n_states, start_state)
#print(X)
# 保存文件
root = os.path.split(os.path.realpath(__file__))[0]
file_name = os.path.join(root, "CarData.txt")
save_file(X, file_name)
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import numpy as np
import json
import os

# 计算从a_i转移到a_j的概率
def calculate_P(X, a_i, a_j):
n_i = 0
n_j = 0
for x in range(len(X)-1):
if a_i == X[x]:
n_i += 1
if a_j == X[x+1]:
n_j += 1
print(n_i, n_j, n_j/n_i)

def open_file(file_name):
file = open(file_name, "r")
lines = file.read()
file.close()
data_list = json.loads(lines)
return data_list

if __name__ == "__main__":
# 状态空间
n_states = 4
# 读取文件
root = os.path.split(os.path.realpath(__file__))[0]
file_name = os.path.join(root, "CarData.txt")
data_list = open_file(file_name)
# 把 ABCD 变成 0123
X = [ord(x)-65 for x in data_list]
# 计算转移矩阵
calculate_P(X, 0, 1) # 1代表B店,0代表A店


Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import numpy as np
import json
import os

def calculate_Matrix(n_states, X):
P_counter = np.zeros((n_states, n_states))
for i in range(len(X)-1):
a_i = X[i]
a_j = X[i+1]
P_counter[a_i, a_j] += 1
#endfor
# 计算各列之和
sum = np.sum(P_counter, axis=1, keepdims=True)
print("各个状态出现的次数:\n",sum)
P = P_counter / sum
return P

def open_file(file_name):
file = open(file_name, "r")
lines = file.read()
file.close()
data_list = json.loads(lines)
return data_list

if __name__ == "__main__":
# 状态空间
n_states = 4
# 读取文件
root = os.path.split(os.path.realpath(__file__))[0]
file_name = os.path.join(root, "CarData.txt")
data_list = open_file(file_name)
# 把 ABCD 变成 0123
X = [ord(x)-65 for x in data_list]
# 计算转移矩阵
P = calculate_Matrix(n_states, X)
print("概率转移矩阵:")
print(np.around(P, 1))
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import numpy as np

P = np.array([
[0.1, 0.3, 0.0, 0.6],
[0.8, 0.0, 0.2, 0.0],
[0.0, 0.9, 0.1, 0.0],
[0.0, 0.3, 0.3, 0.4]
])

def calculate_day(X, P, day):
X_curr = X.copy()
for i in range(day):
print(str.format("day {0}: {1} ", i, X_curr))
X_next = np.dot(X_curr, P)
X_curr = X_next.copy()

if __name__=="__main__":
X = np.array([0,1,0,0])
calculate_day(X, P, 6)
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import numpy as np

P = np.array([
[0.1, 0.3, 0.0, 0.6],
[0.8, 0.0, 0.2, 0.0],
[0.0, 0.9, 0.1, 0.0],
[0.0, 0.3, 0.3, 0.4]
])

# 计算K步转移概率矩阵
def K_step_matrix(P, K):
Pk=P.copy()
for i in range(K-1):
Pk=np.dot(P,Pk)
#print(Pk)
return Pk

if __name__=="__main__":
X = np.array([0,1,0,0])
P5 = K_step_matrix(P, 5)
print("5步转移矩阵:\n", P5)
X5 = np.dot(X, P5)
print("第 5 天的情况:", X5)
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import numpy as np

P = np.array([
[0.1, 0.3, 0.0, 0.6],
[0.8, 0.0, 0.2, 0.0],
[0.0, 0.9, 0.1, 0.0],
[0.0, 0.3, 0.3, 0.4]
])

def Check_Convergence(P):
P_curr = P.copy()
for i in range(100000):
P_next=np.dot(P,P_curr)
print("迭代次数 =",i+1)
print(P_next)
if np.allclose(P_curr, P_next):
break
P_curr = P_next
return P_next

if __name__=="__main__":
Pn = Check_Convergence(P)
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit 92bdba6

Please sign in to comment.