tweaked action masks during combat

NeuralMMO · jsuarez5341 · Sep 6, 2023 · Aug 30, 2023 · Aug 30, 2023 · Aug 30, 2023
commit d3febd2c76e92d1827a4dd1b3591117409719b21
diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py
@@ -349,8 +349,10 @@ def _make_give_gold_target_mask(self):
  if self.config.PROVIDE_NOOP_ACTION_TARGET:
  give_mask[-1] = 1
 
+ # To prevent entropy collapse, allow agents to issue random give actions during early training
  if not self.config.EXCHANGE_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat\
  or int(self.agent().gold) == 0:
+ give_mask[self.config.PLAYER_N_OBS//2:] = 1
  return give_mask
 
  agent = self.agent()
@@ -371,12 +373,13 @@ def _make_give_gold_target_mask(self):
  def _make_give_gold_mask(self):
  mask = np.zeros(self.config.PRICE_N_OBS, dtype=np.int8)
  mask[0] = 1 # To avoid all-0 masks. If the agent has no gold, this action will be ignored.
- if self.dummy_obs:
+ if self.dummy_obs or self.agent_in_combat:
+ # To prevent entropy collapse, allow agents to issue random give actions during early training
+ mask[:] = 1
  return mask
 
  gold = int(self.agent().gold)
- if gold and not self.agent_in_combat:
- mask[:gold] = 1 # NOTE that action.Price starts from Discrete_1
+ mask[:gold] = 1 # NOTE that action.Price starts from Discrete_1
 
  return mask
 
@@ -401,12 +404,10 @@ def _make_buy_mask(self):
  if self.config.PROVIDE_NOOP_ACTION_TARGET:
  buy_mask[-1] = 1
 
- if not self.config.EXCHANGE_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat:
- return buy_mask
-
  # To prevent entropy collapse, allow agents to issue random buy actions during early training
- if self.market.len == 0: # nothing in the market
- buy_mask[self.config.MARKET_N_OBS//10:] = 1
+ if not self.config.EXCHANGE_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat \
+ or self.market.len == 0:
+ buy_mask[self.config.MARKET_N_OBS//2:] = 1
  return buy_mask
 
  agent = self.agent()

diff --git a/tests/action/test_ammo_use.py b/tests/action/test_ammo_use.py
@@ -30,7 +30,8 @@ def _assert_action_targets_zero(self, gym_obs):
  for atn in [action.Use, action.Give, action.Destroy, action.Sell]:
  mask += np.sum(gym_obs["ActionTargets"][atn.__name__]["InventoryItem"])
  # If MarketItem and InventoryTarget have no-action flags, these sum up to 5
- self.assertEqual(mask, 1 + 5*int(self.config.PROVIDE_NOOP_ACTION_TARGET))
+ # To prevent entropy collapse, GiveGold/Price and Buy/MarketItem masks are tweaked
+ self.assertEqual(mask, 99 + 512 + 5*int(self.config.PROVIDE_NOOP_ACTION_TARGET))
 
  def test_spawn_immunity(self):
  env = self._setup_env(random_seed=RANDOM_SEED)