forked from sotetsuk/pgx
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_kuhn_poker.py
121 lines (99 loc) · 3.18 KB
/
test_kuhn_poker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import jax
import jax.numpy as jnp
from pgx.kuhn_poker import BET, PASS, KuhnPoker
env = KuhnPoker()
init = jax.jit(env.init)
step = jax.jit(env.step)
observe = jax.jit(env.observe)
def test_init():
key = jax.random.PRNGKey(0)
state = init(key=key)
assert state._cards[0] != state._cards[1]
assert (state.legal_action_mask == jnp.bool_([1, 1])).all()
def test_step():
key = jax.random.PRNGKey(0)
# cards = [2, 0]
state = init(key)
state = step(state, PASS)
assert not state.terminated
state = step(state, PASS)
assert state.terminated
assert (state.rewards == jnp.float32([1, -1])).all()
state = init(key)
state = step(state, PASS)
assert not state.terminated
state = step(state, BET)
assert not state.terminated
state = step(state, PASS)
assert state.terminated
assert (state.rewards == jnp.float32([-1, 1])).all()
state = init(key)
state = step(state, PASS)
assert not state.terminated
state = step(state, BET)
assert not state.terminated
state = step(state, BET)
assert state.terminated
assert (state.rewards == jnp.float32([2, -2])).all()
state = init(key)
state = step(state, BET)
assert not state.terminated
state = step(state, PASS)
assert state.terminated
assert (state.rewards == jnp.float32([1, -1])).all()
state = init(key)
state = step(state, BET)
assert not state.terminated
state = step(state, BET)
assert state.terminated
assert (state.rewards == jnp.float32([2, -2])).all()
def test_legal_action():
key = jax.random.PRNGKey(0)
# cards = [2, 0]
state = init(key)
state = step(state, PASS)
assert (state.legal_action_mask == jnp.bool_([1, 1])).all()
state = step(state, PASS)
assert state.terminated
state = init(key)
state = step(state, PASS)
assert (state.legal_action_mask == jnp.bool_([1, 1])).all()
state = step(state, BET)
assert (state.legal_action_mask == jnp.bool_([1, 1])).all()
state = step(state, PASS)
assert state.terminated
state = init(key)
state = step(state, PASS)
assert (state.legal_action_mask == jnp.bool_([1, 1])).all()
state = step(state, BET)
assert (state.legal_action_mask == jnp.bool_([1, 1])).all()
state = step(state, BET)
assert state.terminated
state = init(key)
state = step(state, BET)
assert (state.legal_action_mask == jnp.bool_([1, 1])).all()
state = step(state, PASS)
assert state.terminated
state = init(key)
state = step(state, BET)
assert (state.legal_action_mask == jnp.bool_([1, 1])).all()
state = step(state, BET)
assert state.terminated
def test_observation():
key = jax.random.PRNGKey(0)
_, key = jax.random.split(key) # due to API update
state = init(key)
"""
Player 0: K
Player 1: J
"""
state = step(state, BET) # Player 0 bets 1 chip
obs = observe(state, 0)
assert (obs == jnp.bool_([0, 0, 1, 0, 1, 1, 0])).all()
obs = observe(state, 1)
assert (obs == jnp.bool_([1, 0, 0, 1, 0, 0, 1])).all()
def test_api():
import pgx
env = pgx.make("kuhn_poker")
pgx.api_test(env, 3, use_key=False)
pgx.api_test(env, 3, use_key=True)