forked from suragnair/alpha-zero-general
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathConnect4Players.py
64 lines (52 loc) · 2.38 KB
/
Connect4Players.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import numpy as np
class RandomPlayer():
def __init__(self, game):
self.game = game
def play(self, board):
a = np.random.randint(self.game.getActionSize())
valids = self.game.getValidMoves(board, 1)
while valids[a] != 1:
a = np.random.randint(self.game.getActionSize())
return a
class HumanConnect4Player():
def __init__(self, game):
self.game = game
def play(self, board):
valid_moves = self.game.getValidMoves(board, 1)
print('\nMoves:', [i for (i, valid) in enumerate(valid_moves) if valid])
while True:
move = int(input())
if valid_moves[move]: break
else: print('Invalid move')
return move
class OneStepLookaheadConnect4Player():
"""Simple player who always takes a win if presented, or blocks a loss if obvious, otherwise is random."""
def __init__(self, game, verbose=True):
self.game = game
self.player_num = 1
self.verbose = verbose
def play(self, board):
valid_moves = self.game.getValidMoves(board, self.player_num)
win_move_set = set()
fallback_move_set = set()
stop_loss_move_set = set()
for move, valid in enumerate(valid_moves):
if not valid: continue
if self.player_num == self.game.getGameEnded(*self.game.getNextState(board, self.player_num, move)):
win_move_set.add(move)
if -self.player_num == self.game.getGameEnded(*self.game.getNextState(board, -self.player_num, move)):
stop_loss_move_set.add(move)
else:
fallback_move_set.add(move)
if len(win_move_set) > 0:
ret_move = np.random.choice(list(win_move_set))
if self.verbose: print('Playing winning action %s from %s' % (ret_move, win_move_set))
elif len(stop_loss_move_set) > 0:
ret_move = np.random.choice(list(stop_loss_move_set))
if self.verbose: print('Playing loss stopping action %s from %s' % (ret_move, stop_loss_move_set))
elif len(fallback_move_set) > 0:
ret_move = np.random.choice(list(fallback_move_set))
if self.verbose: print('Playing random action %s from %s' % (ret_move, fallback_move_set))
else:
raise Exception('No valid moves remaining: %s' % game.stringRepresentation(board))
return ret_move