added determinism tests, fixed blackjack

Former-commit-id: a1d0a18
chenmich · Apr 29, 2020 · af3029f · af3029f
1 parent 0ebfa36
commit af3029f
Show file tree

Hide file tree

Showing 16 changed files with 105 additions and 7 deletions.
diff --git a/rlcard/envs/blackjack.py b/rlcard/envs/blackjack.py
@@ -11,7 +11,7 @@ class BlackjackEnv(Env):
     def __init__(self, config):
         ''' Initialize the Blackjack environment
         '''
-        self.game = Game()
+        self.game = Game(seed=config['seed'])
         super().__init__(config)
         self.rank2score = {"A":11, "2":2, "3":3, "4":4, "5":5, "6":6, "7":7, "8":8, "9":9, "T":10, "J":10, "Q":10, "K":10}
         self.actions = ['hit', 'stand']

diff --git a/rlcard/envs/limitholdem.py b/rlcard/envs/limitholdem.py
@@ -110,4 +110,3 @@ def get_perfect_information(self):
         state['current_player'] = self.game.game_pointer
         state['legal_actions'] = self.game.get_legal_actions()
         return state
-
diff --git a/rlcard/envs/registration.py b/rlcard/envs/registration.py
@@ -8,6 +8,7 @@
         'single_agent_mode' : False,
         'active_player' : 0,
         'record_action' : False,
+        'seed': None,
         }
 
 class EnvSpec(object):

diff --git a/rlcard/games/blackjack/dealer.py b/rlcard/games/blackjack/dealer.py
@@ -1,5 +1,5 @@
 from rlcard.utils import init_standard_deck
-
+import numpy as np
 
 class BlackjackDealer(object):
 
@@ -16,7 +16,9 @@ def __init__(self, np_random):
     def shuffle(self):
         ''' Shuffle the deck
         '''
-        self.np_random.shuffle(self.deck)
+        shuffle_deck = np.array(self.deck)
+        self.np_random.shuffle(shuffle_deck)
+        self.deck = list(shuffle_deck)
 
     def deal_card(self, player):
         ''' Distribute one card to the player

diff --git a/rlcard/games/blackjack/game.py b/rlcard/games/blackjack/game.py
@@ -7,11 +7,11 @@
 
 class BlackjackGame(object):
 
-    def __init__(self, allow_step_back=False):
+    def __init__(self, allow_step_back=False, seed=None):
         ''' Initialize the class Blackjack Game
         '''
         self.allow_step_back = allow_step_back
-        self.np_random = np.random.RandomState()
+        self.np_random = np.random.RandomState(seed)
 
 
     def init_game(self):

diff --git a/tests/envs/determism_util.py b/tests/envs/determism_util.py
@@ -0,0 +1,60 @@
+import rlcard
+from rlcard.agents.random_agent import RandomAgent
+import random
+import numpy as np
+
+def hash_obsevation(obs):
+    try:
+        val = hash(obs.tobytes())
+        return val
+    except AttributeError:
+        try:
+            return hash(obs)
+        except TypeError:
+            warnings.warn("Observation not an int or an Numpy array")
+            return 0
+
+def rand_iter(n):
+    for x in range(n+1):
+        random.randint(0, 1000)
+        np.random.normal(size=100)
+
+def gather_observations(env, actions, num_rand_steps):
+    rand_iter(num_rand_steps)
+    state, player_id = env.reset()
+    rand_iter(num_rand_steps)
+
+    action_idx = 0
+    observations = []
+    while not env.is_over() and action_idx < len(actions):
+        # Agent plays
+        rand_iter(num_rand_steps)
+        legals = state['legal_actions']
+        action = legals[actions[action_idx]%len(legals)]
+        # Environment steps
+        next_state, next_player_id = env.step(action)
+        # Set the state and player
+        state = next_state
+        player_id = next_player_id
+
+        action_idx += 1
+        # Save state.
+        if not env.game.is_over():
+            observations.append(state)
+
+    return observations
+
+def is_deterministic(env_name):
+    env = rlcard.make(env_name)
+
+    NUM_STEPS = 25
+
+    actions = [random.randrange(env.game.get_action_num()) for _ in range(NUM_STEPS)]
+    base_seed = 12941
+    hashes = []
+    for rand_iters in range(2):
+        env = rlcard.make(env_name,config={'seed':base_seed})
+
+        hashes.append(hash(tuple([hash_obsevation(obs['obs']) for obs in gather_observations(env,actions,rand_iters)])))
+
+    return hashes[0] == hashes[1]
diff --git a/tests/envs/test_blackjack_env.py b/tests/envs/test_blackjack_env.py
@@ -3,7 +3,7 @@
 
 import rlcard
 from rlcard.agents.random_agent import RandomAgent
-
+from .determism_util import is_deterministic
 
 class TestBlackjackEnv(unittest.TestCase):
 
@@ -13,6 +13,9 @@ def test_init_and_extract_state(self):
         for score in state['obs']:
             self.assertLessEqual(score, 30)
 
+    def test_is_deterministic(self):
+        self.assertTrue(is_deterministic('blackjack'))
+
     def test_decode_action(self):
         env = rlcard.make('blackjack')
         self.assertEqual(env._decode_action(0), 'hit')

diff --git a/tests/envs/test_doudizhu_env.py b/tests/envs/test_doudizhu_env.py
@@ -3,6 +3,7 @@
 import rlcard
 from rlcard.utils.utils import get_downstream_player_id
 from rlcard.agents.random_agent import RandomAgent
+from .determism_util import is_deterministic
 
 
 class TestDoudizhuEnv(unittest.TestCase):
@@ -12,6 +13,9 @@ def test_reset_and_extract_state(self):
         state, _ = env.reset()
         self.assertEqual(state['obs'].size, 450)
 
+    def test_is_deterministic(self):
+        self.assertTrue(is_deterministic('doudizhu'))
+
     def test_get_legal_actions(self):
         env = rlcard.make('doudizhu')
         env.set_agents([RandomAgent(env.action_num) for _ in range(env.player_num)])

diff --git a/tests/envs/test_gin_rummy_env.py b/tests/envs/test_gin_rummy_env.py
@@ -9,6 +9,7 @@
 
 import rlcard
 from rlcard.agents.random_agent import RandomAgent
+from .determism_util import is_deterministic
 
 
 class TestGinRummyEnv(unittest.TestCase):
@@ -18,6 +19,9 @@ def test_reset_and_extract_state(self):
         state, _ = env.reset()
         self.assertEqual(state['obs'].size, 5 * 52)
 
+    def test_is_deterministic(self):
+        self.assertTrue(is_deterministic('gin-rummy'))
+
     def test_get_legal_actions(self):
         env = rlcard.make('gin-rummy')
         env.set_agents([RandomAgent(env.action_num) for _ in range(env.player_num)])

diff --git a/tests/envs/test_leducholdem_env.py b/tests/envs/test_leducholdem_env.py
@@ -3,6 +3,7 @@
 
 import rlcard
 from rlcard.agents.random_agent import RandomAgent
+from .determism_util import is_deterministic
 
 
 class TestLeducholdemEnv(unittest.TestCase):
@@ -14,6 +15,9 @@ def test_reset_and_extract_state(self):
         for action in state['legal_actions']:
             self.assertLess(action, env.action_num)
 
+    def test_is_deterministic(self):
+        self.assertTrue(is_deterministic('leduc-holdem'))
+
     def test_get_legal_actions(self):
         env = rlcard.make('leduc-holdem')
         env.reset()

diff --git a/tests/envs/test_limitholdem_env.py b/tests/envs/test_limitholdem_env.py
@@ -2,6 +2,7 @@
 
 import rlcard
 from rlcard.agents.random_agent import RandomAgent
+from .determism_util import is_deterministic
 
 
 class TestLimitholdemEnv(unittest.TestCase):
@@ -13,6 +14,9 @@ def test_reset_and_extract_state(self):
         for action in state['legal_actions']:
             self.assertLess(action, env.action_num)
 
+    def test_is_deterministic(self):
+        self.assertTrue(is_deterministic('limit-holdem'))
+
     def test_get_legal_actions(self):
         env = rlcard.make('limit-holdem')
         env.reset()

diff --git a/tests/envs/test_mahjong.py b/tests/envs/test_mahjong.py
@@ -3,6 +3,7 @@
 
 import rlcard
 from rlcard.agents.random_agent import RandomAgent
+from .determism_util import is_deterministic
 
 class TestMahjongEnv(unittest.TestCase):
 
@@ -11,6 +12,9 @@ def test_reset_and_extract_state(self):
         state, _ = env.reset()
         self.assertEqual(state['obs'].size, 816)
 
+    def test_is_deterministic(self):
+        self.assertTrue(is_deterministic('mahjong'))
+
     def test_get_legal_actions(self):
         env = rlcard.make('mahjong')
         env.set_agents([RandomAgent(env.action_num) for _ in range(env.player_num)])

diff --git a/tests/envs/test_nolimitholdem_env.py b/tests/envs/test_nolimitholdem_env.py
@@ -3,6 +3,7 @@
 import rlcard
 from rlcard.agents.random_agent import RandomAgent
 from rlcard.games.nolimitholdem.round import Action
+from .determism_util import is_deterministic
 
 
 class TestNolimitholdemEnv(unittest.TestCase):
@@ -12,6 +13,9 @@ def test_reset_and_extract_state(self):
         state, _ = env.reset()
         self.assertEqual(state['obs'].size, 54)
 
+    def test_is_deterministic(self):
+        self.assertTrue(is_deterministic('no-limit-holdem'))
+
     def test_get_legal_actions(self):
         env = rlcard.make('no-limit-holdem')
         env.reset()

diff --git a/tests/envs/test_registration.py b/tests/envs/test_registration.py
@@ -2,6 +2,7 @@
 
 import rlcard
 from rlcard.envs.registration import register, make
+from .determism_util import is_deterministic
 
 
 class TestRegistration(unittest.TestCase):

diff --git a/tests/envs/test_simpledoudizhu_env.py b/tests/envs/test_simpledoudizhu_env.py
@@ -3,6 +3,7 @@
 import rlcard
 from rlcard.utils.utils import get_downstream_player_id
 from rlcard.agents.random_agent import RandomAgent
+from .determism_util import is_deterministic
 
 
 class TestSimpleDoudizhuEnv(unittest.TestCase):
@@ -12,6 +13,9 @@ def test_reset_and_extract_state(self):
         state, _ = env.reset()
         self.assertEqual(state['obs'].size, 450)
 
+    def test_is_deterministic(self):
+        self.assertTrue(is_deterministic('simple-doudizhu'))
+
     def test_get_legal_actions(self):
         env = rlcard.make('simple-doudizhu')
         env.set_agents([RandomAgent(env.action_num) for _ in range(env.player_num)])

diff --git a/tests/envs/test_uno_env.py b/tests/envs/test_uno_env.py
@@ -4,6 +4,7 @@
 import rlcard
 from rlcard.agents.random_agent import RandomAgent
 from rlcard.games.uno.utils import ACTION_LIST
+from .determism_util import is_deterministic
 
 
 class TestUnoEnv(unittest.TestCase):
@@ -13,6 +14,9 @@ def test_reset_and_extract_state(self):
         state, _ = env.reset()
         self.assertEqual(state['obs'].size, 420)
 
+    def test_is_deterministic(self):
+        self.assertTrue(is_deterministic('uno'))
+
     def test_get_legal_actions(self):
         env = rlcard.make('uno')
         env.set_agents([RandomAgent(env.action_num) for _ in range(env.player_num)])
Original file line number	Diff line number	Diff line change
Expand Up		@@ -110,4 +110,3 @@ def get_perfect_information(self):
		state['current_player'] = self.game.game_pointer
		state['legal_actions'] = self.game.get_legal_actions()
		return state