Skip to content

Commit

Permalink
added determinism tests, fixed blackjack
Browse files Browse the repository at this point in the history
Former-commit-id: a1d0a18
  • Loading branch information
benblack769 committed Apr 29, 2020
1 parent 0ebfa36 commit af3029f
Show file tree
Hide file tree
Showing 16 changed files with 105 additions and 7 deletions.
2 changes: 1 addition & 1 deletion rlcard/envs/blackjack.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class BlackjackEnv(Env):
def __init__(self, config):
''' Initialize the Blackjack environment
'''
self.game = Game()
self.game = Game(seed=config['seed'])
super().__init__(config)
self.rank2score = {"A":11, "2":2, "3":3, "4":4, "5":5, "6":6, "7":7, "8":8, "9":9, "T":10, "J":10, "Q":10, "K":10}
self.actions = ['hit', 'stand']
Expand Down
1 change: 0 additions & 1 deletion rlcard/envs/limitholdem.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,4 +110,3 @@ def get_perfect_information(self):
state['current_player'] = self.game.game_pointer
state['legal_actions'] = self.game.get_legal_actions()
return state

1 change: 1 addition & 0 deletions rlcard/envs/registration.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
'single_agent_mode' : False,
'active_player' : 0,
'record_action' : False,
'seed': None,
}

class EnvSpec(object):
Expand Down
6 changes: 4 additions & 2 deletions rlcard/games/blackjack/dealer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from rlcard.utils import init_standard_deck

import numpy as np

class BlackjackDealer(object):

Expand All @@ -16,7 +16,9 @@ def __init__(self, np_random):
def shuffle(self):
''' Shuffle the deck
'''
self.np_random.shuffle(self.deck)
shuffle_deck = np.array(self.deck)
self.np_random.shuffle(shuffle_deck)
self.deck = list(shuffle_deck)

def deal_card(self, player):
''' Distribute one card to the player
Expand Down
4 changes: 2 additions & 2 deletions rlcard/games/blackjack/game.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@

class BlackjackGame(object):

def __init__(self, allow_step_back=False):
def __init__(self, allow_step_back=False, seed=None):
''' Initialize the class Blackjack Game
'''
self.allow_step_back = allow_step_back
self.np_random = np.random.RandomState()
self.np_random = np.random.RandomState(seed)


def init_game(self):
Expand Down
60 changes: 60 additions & 0 deletions tests/envs/determism_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import rlcard
from rlcard.agents.random_agent import RandomAgent
import random
import numpy as np

def hash_obsevation(obs):
try:
val = hash(obs.tobytes())
return val
except AttributeError:
try:
return hash(obs)
except TypeError:
warnings.warn("Observation not an int or an Numpy array")
return 0

def rand_iter(n):
for x in range(n+1):
random.randint(0, 1000)
np.random.normal(size=100)

def gather_observations(env, actions, num_rand_steps):
rand_iter(num_rand_steps)
state, player_id = env.reset()
rand_iter(num_rand_steps)

action_idx = 0
observations = []
while not env.is_over() and action_idx < len(actions):
# Agent plays
rand_iter(num_rand_steps)
legals = state['legal_actions']
action = legals[actions[action_idx]%len(legals)]
# Environment steps
next_state, next_player_id = env.step(action)
# Set the state and player
state = next_state
player_id = next_player_id

action_idx += 1
# Save state.
if not env.game.is_over():
observations.append(state)

return observations

def is_deterministic(env_name):
env = rlcard.make(env_name)

NUM_STEPS = 25

actions = [random.randrange(env.game.get_action_num()) for _ in range(NUM_STEPS)]
base_seed = 12941
hashes = []
for rand_iters in range(2):
env = rlcard.make(env_name,config={'seed':base_seed})

hashes.append(hash(tuple([hash_obsevation(obs['obs']) for obs in gather_observations(env,actions,rand_iters)])))

return hashes[0] == hashes[1]
5 changes: 4 additions & 1 deletion tests/envs/test_blackjack_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import rlcard
from rlcard.agents.random_agent import RandomAgent

from .determism_util import is_deterministic

class TestBlackjackEnv(unittest.TestCase):

Expand All @@ -13,6 +13,9 @@ def test_init_and_extract_state(self):
for score in state['obs']:
self.assertLessEqual(score, 30)

def test_is_deterministic(self):
self.assertTrue(is_deterministic('blackjack'))

def test_decode_action(self):
env = rlcard.make('blackjack')
self.assertEqual(env._decode_action(0), 'hit')
Expand Down
4 changes: 4 additions & 0 deletions tests/envs/test_doudizhu_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import rlcard
from rlcard.utils.utils import get_downstream_player_id
from rlcard.agents.random_agent import RandomAgent
from .determism_util import is_deterministic


class TestDoudizhuEnv(unittest.TestCase):
Expand All @@ -12,6 +13,9 @@ def test_reset_and_extract_state(self):
state, _ = env.reset()
self.assertEqual(state['obs'].size, 450)

def test_is_deterministic(self):
self.assertTrue(is_deterministic('doudizhu'))

def test_get_legal_actions(self):
env = rlcard.make('doudizhu')
env.set_agents([RandomAgent(env.action_num) for _ in range(env.player_num)])
Expand Down
4 changes: 4 additions & 0 deletions tests/envs/test_gin_rummy_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import rlcard
from rlcard.agents.random_agent import RandomAgent
from .determism_util import is_deterministic


class TestGinRummyEnv(unittest.TestCase):
Expand All @@ -18,6 +19,9 @@ def test_reset_and_extract_state(self):
state, _ = env.reset()
self.assertEqual(state['obs'].size, 5 * 52)

def test_is_deterministic(self):
self.assertTrue(is_deterministic('gin-rummy'))

def test_get_legal_actions(self):
env = rlcard.make('gin-rummy')
env.set_agents([RandomAgent(env.action_num) for _ in range(env.player_num)])
Expand Down
4 changes: 4 additions & 0 deletions tests/envs/test_leducholdem_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import rlcard
from rlcard.agents.random_agent import RandomAgent
from .determism_util import is_deterministic


class TestLeducholdemEnv(unittest.TestCase):
Expand All @@ -14,6 +15,9 @@ def test_reset_and_extract_state(self):
for action in state['legal_actions']:
self.assertLess(action, env.action_num)

def test_is_deterministic(self):
self.assertTrue(is_deterministic('leduc-holdem'))

def test_get_legal_actions(self):
env = rlcard.make('leduc-holdem')
env.reset()
Expand Down
4 changes: 4 additions & 0 deletions tests/envs/test_limitholdem_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import rlcard
from rlcard.agents.random_agent import RandomAgent
from .determism_util import is_deterministic


class TestLimitholdemEnv(unittest.TestCase):
Expand All @@ -13,6 +14,9 @@ def test_reset_and_extract_state(self):
for action in state['legal_actions']:
self.assertLess(action, env.action_num)

def test_is_deterministic(self):
self.assertTrue(is_deterministic('limit-holdem'))

def test_get_legal_actions(self):
env = rlcard.make('limit-holdem')
env.reset()
Expand Down
4 changes: 4 additions & 0 deletions tests/envs/test_mahjong.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import rlcard
from rlcard.agents.random_agent import RandomAgent
from .determism_util import is_deterministic

class TestMahjongEnv(unittest.TestCase):

Expand All @@ -11,6 +12,9 @@ def test_reset_and_extract_state(self):
state, _ = env.reset()
self.assertEqual(state['obs'].size, 816)

def test_is_deterministic(self):
self.assertTrue(is_deterministic('mahjong'))

def test_get_legal_actions(self):
env = rlcard.make('mahjong')
env.set_agents([RandomAgent(env.action_num) for _ in range(env.player_num)])
Expand Down
4 changes: 4 additions & 0 deletions tests/envs/test_nolimitholdem_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import rlcard
from rlcard.agents.random_agent import RandomAgent
from rlcard.games.nolimitholdem.round import Action
from .determism_util import is_deterministic


class TestNolimitholdemEnv(unittest.TestCase):
Expand All @@ -12,6 +13,9 @@ def test_reset_and_extract_state(self):
state, _ = env.reset()
self.assertEqual(state['obs'].size, 54)

def test_is_deterministic(self):
self.assertTrue(is_deterministic('no-limit-holdem'))

def test_get_legal_actions(self):
env = rlcard.make('no-limit-holdem')
env.reset()
Expand Down
1 change: 1 addition & 0 deletions tests/envs/test_registration.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import rlcard
from rlcard.envs.registration import register, make
from .determism_util import is_deterministic


class TestRegistration(unittest.TestCase):
Expand Down
4 changes: 4 additions & 0 deletions tests/envs/test_simpledoudizhu_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import rlcard
from rlcard.utils.utils import get_downstream_player_id
from rlcard.agents.random_agent import RandomAgent
from .determism_util import is_deterministic


class TestSimpleDoudizhuEnv(unittest.TestCase):
Expand All @@ -12,6 +13,9 @@ def test_reset_and_extract_state(self):
state, _ = env.reset()
self.assertEqual(state['obs'].size, 450)

def test_is_deterministic(self):
self.assertTrue(is_deterministic('simple-doudizhu'))

def test_get_legal_actions(self):
env = rlcard.make('simple-doudizhu')
env.set_agents([RandomAgent(env.action_num) for _ in range(env.player_num)])
Expand Down
4 changes: 4 additions & 0 deletions tests/envs/test_uno_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import rlcard
from rlcard.agents.random_agent import RandomAgent
from rlcard.games.uno.utils import ACTION_LIST
from .determism_util import is_deterministic


class TestUnoEnv(unittest.TestCase):
Expand All @@ -13,6 +14,9 @@ def test_reset_and_extract_state(self):
state, _ = env.reset()
self.assertEqual(state['obs'].size, 420)

def test_is_deterministic(self):
self.assertTrue(is_deterministic('uno'))

def test_get_legal_actions(self):
env = rlcard.make('uno')
env.set_agents([RandomAgent(env.action_num) for _ in range(env.player_num)])
Expand Down

0 comments on commit af3029f

Please sign in to comment.