我們在上節完成了圍棋規則和棋盤狀態監測功能,本節我們在基於上節的基礎上,設計一個能自己下棋的圍棋機器人
主要有兩點:
一個是讓機器人能自己跟自己下棋
一個是讓機器人跟我們下棋
在完成這一節之後,AlphaGo所需要的所有基礎設施就基本完備了。
首先我們設計一個類叫Agent,它的初始化代碼如下
class Agent: def __init__(self): pass def select_move(self, game_state): raise NotImplementedError()
代碼中的select_move用於機器人選擇當前走法。該函數是整個機器人的核心所在,因爲所有智能表現都集中在走法的評估和選擇上,一開始我們只使用簡單規則和推理來設定機器人的落子算法,因此機器人在實現初期會非常弱雞,後面我們會在該函數中加入相應智能算法,讓它變得像AlphGo一樣強大。
現在我們實現方法非常簡單,就是隨便選擇一個不違反規則的地方落子,只要機器人能避開棋眼以及防止出現ko情形。因此我們現在對select_move的邏輯是,遍歷整個棋盤狀態,找到一個不違背給定規則的位置就可以落子:
class RandomBot(Agent): def select_move(self, game_state): ''' 遍歷棋盤,只要看到一個不違反規則的位置就落子 ''' candidates = [] for r in range(1, game_state.board.num_rows + 1): for c in range(1, game_state.board.cols + 1): candidate = Point(row=r, col=c) if game_state.is_valid_move(Move.play(candidate) and not is_point_an_eye(game_state.board, candidate, game_state.next_player)): candidates.append(candidate) if not candidates: return Move.pass_turn() #在所有可能位置隨便選一個 return Move.play(random.choice(candidates))
有了上面代碼後,我們可以實現機器人的自我對弈,當然過程頗爲簡單和無聊,無法是兩個機器人隨機掃描棋盤,找到一個合理點後落子。接着我們要繪製棋盤,通常情況下,我們應該用貼圖方式繪製出遊戲那種美輪美奐的棋盤,但爲了簡便行事,我們使用簡單的點和線來構造一個簡易棋盤。
棋盤上可落子的空位,我們用'.'來代替,已經被棋子佔據的位置,我們用'x'來表示黑子,用一個圓圈’o‘來表示白子。我們看看實現簡易棋盤的代碼:
到目前爲止的總的代碼
AlphaGo.py
import enum class Player(enum.Enum): black = 1 white = 2 ''' 返回對方棋子顏色,如果本方是白棋,那就返回Player.black ''' @property def other(self): if self == Player.white: return Player.black else: return Player.white from collections import namedtuple class Point(namedtuple('Point', 'row col')): def neighbors(self): ''' 返回當前點的相鄰點,也就是相對於當前點的上下左右四個點 ''' return [ Point(self.row - 1, self.col), Point(self.row + 1, self.col), Point(self.row, self.col - 1), Point(self.row, self.col + 1), ] import copy class Move(): def __init__(self, point=None, is_pass=False, is_resign=False): assert (point is not None) ^ is_pass ^ is_resign self.point = point # 是否輪到我下 self.is_play = (self.point is not None) self.is_pass = is_pass self.is_resign = is_resign @classmethod def play(cls, point): return Move(point=point) @classmethod # 讓對方繼續下 def pass_turn(cls): return Move(is_pass=True) @classmethod # 投子認輸 def resign(cls): return Move(is_resign=True) class GoString(): def __init__(self, color, stones, liberties): self.color = color #黑/白 self.stones = set(stones) #stone就是棋子 self.liberties = set(liberties) #自由點 def remove_liberty(self, point): self.liberties.remove(point) def add_liberty(self, point): self.liberties.add(point) def merged_with(self, go_string): # 落子之後,兩片相鄰棋子可能會合成一片 ''' 假設*代表黑棋,o代表白棋,x代表沒有落子的棋盤點,當前棋盤如下: x x x x x x x * x! * o * x x x * o x x x * x o x x x * o x x 注意看帶!的x,如果我們把黑子下在那個地方,那麼x!左邊的黑棋和新下的黑棋會調用當前函數進行合併, 同時x!上方的x和下面的x就會成爲合併後相鄰棋子共同具有的自由點。同時x!原來屬於左邊黑棋的自由點, 現在被一個黑棋佔據了,所以下面代碼要把該點從原來的自由點集合中去掉 ''' assert go_string.color == self.color combined_stones = self.stones | go_string.stones return GoString(self.color, combined_stones, (self.liberties | go_string.liberties) - combined_stones) @property def num_liberties(self): #自由點的數量 return len(self.liberties) def __eq__(self, other): #是否相等 return isinstance(other, GoString) and self.color == other.color and self.stones == other.stones and self.liberties == other.liberties #實現棋盤 class Board(): def __init__(self, num_rows, num_cols): self.num_rows = num_rows self.num_cols = num_cols self._grid = {} def place_stone(self, player, point): # 確保位置在棋盤內 assert self.is_on_grid(point) # 確定給定位置沒有被佔據 assert self._grid.get(point) is None adjacent_same_color = [] adjacent_opposite_color = [] liberties = [] for neighbor in point.neighbors(): # 判斷落子點上下左右的鄰接點情況 if not self.is_on_grid(neighbor): continue neighbor_string = self._grid.get(neighbor) if neighbor_string is None: # 如果鄰接點沒有被佔據,那麼就是當前落子點的自由點 liberties.append(neighbor) elif neighbor_string.color == player: if neighbor_string not in adjacent_same_color: # 記錄與棋子同色的連接棋子 adjacent_same_color.append(neighbor_string) else: if neighbor_string not in adjacent_opposite_color: # 記錄落點鄰接點與棋子不同色的棋子 adjacent_opposite_color.append(neighbor_string) # 將當前落子與棋盤上相鄰的棋子合併成一片 new_string = GoString(player, [point], liberties) for same_color_string in adjacent_same_color: new_string = new_string.merged_with(same_color_string) for new_string_point in new_string.stones: # 訪問棋盤某個點時返回與該點棋子相鄰的所有棋子集合 self._grid[new_string_point] = new_string for other_color_string in adjacent_opposite_color: # 當該點被佔據前,它屬於反色棋子的自由點,佔據後就不再屬於反色棋子自由點 other_color_string.remove_liberty(point) for other_color_string in adjacent_opposite_color: # 如果落子後,相鄰反色棋子的所有自由點都被堵住,對方棋子被喫掉 if other_color_string.num_liberties == 0: self._remove_string(other_color_string) def is_on_grid(self, point): return 1 <= point.row <= self.num_rows and 1 <= point.col <= self.num_cols def get(self, point): string = self._grid.get(point) if string is None: return None return string.color def get_go_string(self, point): string = self._grid.get(point) if string is None: return None return string def _remove_string(self, string): # 從棋盤上刪除一整片連接棋子 for point in string.stones: for neighbor in point.neighbors(): neighbor_string = self._grid.get(neighbor) if neighbor_string is None: continue if neighbor_string is not string: neighbor_string.add_liberty(point) self._grid[point] = None #棋盤狀態的檢測和落子檢測 class GameState(): def __init__(self, board, next_player, previous, move): self.board = board self.next_player = next_player self.previous_state = previous self.last_move = move def apply_move(self, move): if move.is_play: next_board = copy.deepcopy(self.board) next_board.place_stone(self.next_player, move.point) else: next_board = self.board return GameState(next_board, self.next_player.other, self, move) @classmethod def new_game(cls, board_size): if isinstance(board_size, int): board_size = (board_size, board_size) board = Board(*board_size) return GameState(board, Player.black, None, None) def is_over(self): if self.last_move is None: return False if self.last_move.is_resign: return True second_last_move = self.previous_state.last_move if second_last_move is None: return False # 如果兩個棋手同時放棄落子,棋局結束 return self.last_move.is_pass and second_last_move.is_pass def is_move_self_capture(self, player, move): if not move.is_play: return False next_board = copy.deepcopy(self.board) # 先落子,完成喫子後再判斷是否是自己喫自己 next_board.place_stone(player, move.point) new_string = next_board.get_go_string(move.point) return new_string.num_liberties == 0 @property def situation(self): return (self.next_player, self.board) def does_move_violate_ko(self, player, move): if not move.is_play: return False next_board = copy.deepcopy(self.board) next_board.place_stone(player, move.point) next_situation = (player.other, next_board) past_state = self.previous_state # 判斷Ko不僅僅看是否返回上一步的棋盤而是檢測是否返回以前有過的棋盤狀態 while past_state is not None: if past_state.situation == next_situation: return True past_state = past_state.previous_state return False # O(n^2)->O(1) 3*19*19 [0, 3*19*19] bit64 def is_valid_move(self, move): if self.is_over(): return False if move.is_pass or move.is_resign: return True return (self.board.get(move.point) is None and not self.is_move_self_capture(self.next_player, move) and not self.does_move_violate_ko(self.next_player, move)) def is_point_an_eye(board, point, color): if board.get(point) is not None: return False for neighbor in point.neighbors(): # 檢測鄰接點全是己方棋子 if board.is_on_grid(neighbor): neighbor_color = board.get(neighbor) if neighbor_color != color: return False # 四個對角線位置至少有三個被己方棋子佔據 friendly_corners = 0 off_board_corners = 0 corners = [ Point(point.row - 1, point.col - 1), Point(point.row - 1, point.col + 1), Point(point.row + 1, point.col - 1), Point(point.row + 1, point.col + 1) ] for corner in corners: if board.is_on_grid(corner): corner_color = board.get(corner) if corner_color == color: friendly_corners += 1 else: off_board_corners += 1 if off_board_corners > 0: return off_board_corners + friendly_corners == 4 return friendly_corners >= 3 class Agent: def __init__(self): pass def select_move(self, game_state): raise NotImplementedError() import random class RandomBot(Agent): def select_move(self, game_state): ''' 遍歷棋盤,只要看到一個不違反規則的位置就落子 ''' candidates = [] for r in range(1, game_state.board.num_rows + 1): for c in range(1, game_state.board.num_cols + 1): candidate = Point(row=r, col=c) if game_state.is_valid_move(Move.play(candidate)) and not \ is_point_an_eye(game_state.board, candidate, game_state.next_player): candidates.append(candidate) if not candidates: return Move.pass_turn() # 在所有可選位置隨便選一個 return Move.play(random.choice(candidates)) # 棋盤的列用字母表示 COLS = 'ABCDEFGHJKLMNOPQRST' STONE_TO_CHAR = { None: ' . ', Player.black: 'x', Player.white: 'o' } def print_move(player, move): if move.is_pass: move_str = 'passes' elif move.is_resign: move_str = 'resign' else: move_str = '%s%d' % (COLS[move.point.col - 1], move.point.row) print('%s %s' % (player, move_str)) def print_board(board): for row in range(board.num_rows, 0, -1): bump = ' ' if row <= 9 else '' line = [] for col in range(1, board.num_cols + 1): stone = board.get(Point(row=row, col=col)) line.append(STONE_TO_CHAR[stone]) print('%s%d %s' % (bump, row, ''.join(line))) print(' ' + ' '.join(COLS[:board.num_cols])) import time def main(): # 初始化9*9棋盤 board_size = 9 game = GameState.new_game(board_size) bots = { Player.black: RandomBot(), Player.white: RandomBot() } while not game.is_over(): time.sleep(0.3) print(chr(27) + "[2J") # 打印棋盤 print_board(game.board) bot_move = bots[game.next_player].select_move(game) print_move(game.next_player, bot_move) game = game.apply_move(bot_move) if __name__ == '__main__': main()
當上面代碼運行的時候,程序運行速度比較慢,要等一會才能等到其結束。主要原因就在於以前實現的does_move_violate_ko,該函數會一直追溯回過去所有棋盤狀況去比較,隨着落子次數越多,過去的棋盤狀況數量就越多,因此該函數的執行非常耗時,要想加快速度就必須改進該函數的比對算法。
一種常用算法是對每一步落子進行編碼,然後把落子編碼與棋盤編碼做異或運算,具體過程如下,首先我們面對一個空白棋盤,給它的編碼爲0:
接着有棋子落在C3時,我們給它一個二進制編碼0x1001001:
於是我們做一次異或運算 0x00 XOR 0x1001001 = 0x1001001,接着白棋落子在D3,我們對此進行的編碼爲0x101000:
於是我們再次與前面數值做異或運算: 0x1001001 XOR 0x101000 = 0x100001,如果此時我們把白子拿走,於是棋盤返回到上一個狀態,由於位置D3上的變化被我們編碼爲0x101000,那麼我們再次用該值與先前值做異或運算:0x100001 XOR 0x101000 = 0x1001001:
由此我們比對數值就可以發現棋盤狀態是否產生了回滾,不需要像原來那樣進行一次二維數組的掃描比對,如果棋盤對應的二維數組維度爲n,一次掃描比對需要的時間是O(n^2),但是一次數值比對的時間是O(1),由此在效率上能夠提高兩個數量級!!
上面描述的編碼其實很簡單,對於一個19*19的棋盤而言,我們給每一個位置一個整數值,因此總共對應3*19*19個整數,其中3對應3種狀態,也就是位置空着,位置落黑棋,位置落白棋,我們把對應整數轉換爲二進制數進行運算即可,實現代碼如下:
也就是用了哈希散列的思想
import enum import time import random from collections import namedtuple import copy class Player(enum.Enum): black = 1 white = 2 ''' 返回對方棋子顏色,如果本方是白棋,那就返回Player.black ''' @property def other(self): if self == Player.white: return Player.black else: return Player.white class Point(namedtuple('Point', 'row col')): def neighbors(self): ''' 返回當前點的相鄰點,也就是相對於當前點的上下左右四個點 ''' return [ Point(self.row - 1, self.col), Point(self.row + 1, self.col), Point(self.row, self.col - 1), Point(self.row, self.col + 1), ] class Move(): def __init__(self, point=None, is_pass=False, is_resign=False): assert (point is not None) ^ is_pass ^ is_resign self.point = point # 是否輪到我下 self.is_play = (self.point is not None) self.is_pass = is_pass self.is_resign = is_resign @classmethod def play(cls, point): return Move(point=point) @classmethod # 讓對方繼續下 def pass_turn(cls): return Move(is_pass=True) @classmethod # 投子認輸 def resign(cls): return Move(is_resign=True) class GoString(): def __init__(self, color, stones, liberties): self.color = color #黑/白 self.stones = set(stones) #stone就是棋子 self.liberties = set(liberties) #自由點 def remove_liberty(self, point): self.liberties.remove(point) def add_liberty(self, point): self.liberties.add(point) def merged_with(self, go_string): # 落子之後,兩片相鄰棋子可能會合成一片 ''' 假設*代表黑棋,o代表白棋,x代表沒有落子的棋盤點,當前棋盤如下: x x x x x x x * x! * o * x x x * o x x x * x o x x x * o x x 注意看帶!的x,如果我們把黑子下在那個地方,那麼x!左邊的黑棋和新下的黑棋會調用當前函數進行合併, 同時x!上方的x和下面的x就會成爲合併後相鄰棋子共同具有的自由點。同時x!原來屬於左邊黑棋的自由點, 現在被一個黑棋佔據了,所以下面代碼要把該點從原來的自由點集合中去掉 ''' assert go_string.color == self.color combined_stones = self.stones | go_string.stones return GoString(self.color, combined_stones, (self.liberties | go_string.liberties) - combined_stones) @property def num_liberties(self): #自由點的數量 return len(self.liberties) def __eq__(self, other): #是否相等 return isinstance(other, GoString) and self.color == other.color and self.stones == other.stones and self.liberties == other.liberties #實現棋盤 class Board(): def __init__(self, num_rows, num_cols): self.num_rows = num_rows self.num_cols = num_cols self._grid = {} def place_stone(self, player, point): # 確保位置在棋盤內 assert self.is_on_grid(point) # 確定給定位置沒有被佔據 assert self._grid.get(point) is None adjacent_same_color = [] adjacent_opposite_color = [] liberties = [] for neighbor in point.neighbors(): # 判斷落子點上下左右的鄰接點情況 if not self.is_on_grid(neighbor): continue neighbor_string = self._grid.get(neighbor) if neighbor_string is None: # 如果鄰接點沒有被佔據,那麼就是當前落子點的自由點 liberties.append(neighbor) elif neighbor_string.color == player: if neighbor_string not in adjacent_same_color: # 記錄與棋子同色的連接棋子 adjacent_same_color.append(neighbor_string) else: if neighbor_string not in adjacent_opposite_color: # 記錄落點鄰接點與棋子不同色的棋子 adjacent_opposite_color.append(neighbor_string) # 將當前落子與棋盤上相鄰的棋子合併成一片 new_string = GoString(player, [point], liberties) for same_color_string in adjacent_same_color: new_string = new_string.merged_with(same_color_string) for new_string_point in new_string.stones: # 訪問棋盤某個點時返回與該點棋子相鄰的所有棋子集合 self._grid[new_string_point] = new_string for other_color_string in adjacent_opposite_color: # 當該點被佔據前,它屬於反色棋子的自由點,佔據後就不再屬於反色棋子自由點 other_color_string.remove_liberty(point) for other_color_string in adjacent_opposite_color: # 如果落子後,相鄰反色棋子的所有自由點都被堵住,對方棋子被喫掉 if other_color_string.num_liberties == 0: self._remove_string(other_color_string) def is_on_grid(self, point): return 1 <= point.row <= self.num_rows and 1 <= point.col <= self.num_cols def get(self, point): string = self._grid.get(point) if string is None: return None return string.color def get_go_string(self, point): string = self._grid.get(point) if string is None: return None return string def _remove_string(self, string): # 從棋盤上刪除一整片連接棋子 for point in string.stones: for neighbor in point.neighbors(): neighbor_string = self._grid.get(neighbor) if neighbor_string is None: continue if neighbor_string is not string: neighbor_string.add_liberty(point) self._grid[point] = None #棋盤狀態的檢測和落子檢測 class GameState(): def __init__(self, board, next_player, previous, move): self.board = board self.next_player = next_player self.previous_state = previous self.last_move = move def apply_move(self, move): if move.is_play: next_board = copy.deepcopy(self.board) next_board.place_stone(self.next_player, move.point) else: next_board = self.board return GameState(next_board, self.next_player.other, self, move) @classmethod def new_game(cls, board_size): if isinstance(board_size, int): board_size = (board_size, board_size) board = Board(*board_size) return GameState(board, Player.black, None, None) def is_over(self): if self.last_move is None: return False if self.last_move.is_resign: return True second_last_move = self.previous_state.last_move if second_last_move is None: return False # 如果兩個棋手同時放棄落子,棋局結束 return self.last_move.is_pass and second_last_move.is_pass def is_move_self_capture(self, player, move): if not move.is_play: return False next_board = copy.deepcopy(self.board) # 先落子,完成喫子後再判斷是否是自己喫自己 next_board.place_stone(player, move.point) new_string = next_board.get_go_string(move.point) return new_string.num_liberties == 0 @property def situation(self): return (self.next_player, self.board) def does_move_violate_ko(self, player, move): if not move.is_play: return False next_board = copy.deepcopy(self.board) next_board.place_stone(player, move.point) next_situation = (player.other, next_board) past_state = self.previous_state # 判斷Ko不僅僅看是否返回上一步的棋盤而是檢測是否返回以前有過的棋盤狀態 while past_state is not None: if past_state.situation == next_situation: return True past_state = past_state.previous_state return False # O(n^2)->O(1) 3*19*19 [0, 3*19*19] bit64 def is_valid_move(self, move): if self.is_over(): return False if move.is_pass or move.is_resign: return True return (self.board.get(move.point) is None and not self.is_move_self_capture(self.next_player, move) and not self.does_move_violate_ko(self.next_player, move)) def is_point_an_eye(board, point, color): if board.get(point) is not None: return False for neighbor in point.neighbors(): # 檢測鄰接點全是己方棋子 if board.is_on_grid(neighbor): neighbor_color = board.get(neighbor) if neighbor_color != color: return False # 四個對角線位置至少有三個被己方棋子佔據 friendly_corners = 0 off_board_corners = 0 corners = [ Point(point.row - 1, point.col - 1), Point(point.row - 1, point.col + 1), Point(point.row + 1, point.col - 1), Point(point.row + 1, point.col + 1) ] for corner in corners: if board.is_on_grid(corner): corner_color = board.get(corner) if corner_color == color: friendly_corners += 1 else: off_board_corners += 1 if off_board_corners > 0: return off_board_corners + friendly_corners == 4 return friendly_corners >= 3 class Agent: def __init__(self): pass def select_move(self, game_state): raise NotImplementedError() class RandomBot(Agent): def select_move(self, game_state): ''' 遍歷棋盤,只要看到一個不違反規則的位置就落子 ''' candidates = [] for r in range(1, game_state.board.num_rows + 1): for c in range(1, game_state.board.num_cols + 1): candidate = Point(row=r, col=c) if game_state.is_valid_move(Move.play(candidate)) and not \ is_point_an_eye(game_state.board, candidate, game_state.next_player): candidates.append(candidate) if not candidates: return Move.pass_turn() # 在所有可選位置隨便選一個 return Move.play(random.choice(candidates)) def print_move(player, move): if move.is_pass: move_str = 'passes' elif move.is_resign: move_str = 'resign' else: move_str = '%s%d' % (COLS[move.point.col - 1], move.point.row) print('%s %s' % (player, move_str)) def print_board(board): for row in range(board.num_rows, 0, -1): bump = ' ' if row <= 9 else '' line = [] for col in range(1, board.num_cols + 1): stone = board.get(Point(row=row, col=col)) line.append(STONE_TO_CHAR[stone]) print('%s%d %s' % (bump, row, ''.join(line))) print(' ' + ' '.join(COLS[:board.num_cols])) def to_python(player_state): if player_state is None: return 'None' if player_state == Player.black: return Player.black return Player.white # 棋盤的列用字母表示 COLS = 'ABCDEFGHJKLMNOPQRST' STONE_TO_CHAR = { None: ' . ', Player.black: 'x', Player.white: 'o' } #用一個64位整型對應每個棋盤 MAX63 = 0x7fffffffffffffff #3*19*19 / MAX63 #發明這種編碼算法的人叫zobrist zobrist_HASH_CODE = {} zobrist_EMPTY_BOARD = 0 for row in range(1,20): for col in range(1,20): for state in (None,Player.black,Player.white): # 隨機選取一個整數對應當前位置,這裏默認當前取隨機值時不會與前面取值發生碰撞 code = random.randint(0, MAX63) zobrist_HASH_CODE[Point(row, col), state] = code print('HASH_CODE = {') for (pt, state), hash_code in zobrist_HASH_CODE.items(): print(' (%r, %s): %r,' % (pt, to_python(state), hash_code)) print('}') print(' ') print('EMPTY_BOARD = %d' % (zobrist_EMPTY_BOARD,)) def main(): # 初始化9*9棋盤 board_size = 9 game = GameState.new_game(board_size) bots = { Player.black: RandomBot(), Player.white: RandomBot() } while not game.is_over(): time.sleep(0.3) print(chr(27) + "[2J") # 打印棋盤 print_board(game.board) bot_move = bots[game.next_player].select_move(game) print_move(game.next_player, bot_move) game = game.apply_move(bot_move) if __name__ == '__main__': # main() pass
接下來我們對代碼的一些函數做相應修改
對GoString()、Board()、GameState()等都有修改
目前最新AlphaGo.py
import enum import time import random from collections import namedtuple import copy class Player(enum.Enum): black = 1 white = 2 ''' 返回對方棋子顏色,如果本方是白棋,那就返回Player.black ''' @property def other(self): if self == Player.white: return Player.black else: return Player.white class Point(namedtuple('Point', 'row col')): def neighbors(self): ''' 返回當前點的相鄰點,也就是相對於當前點的上下左右四個點 ''' return [ Point(self.row - 1, self.col), Point(self.row + 1, self.col), Point(self.row, self.col - 1), Point(self.row, self.col + 1), ] class Move(): def __init__(self, point=None, is_pass=False, is_resign=False): assert (point is not None) ^ is_pass ^ is_resign self.point = point # 是否輪到我下 self.is_play = (self.point is not None) self.is_pass = is_pass self.is_resign = is_resign @classmethod def play(cls, point): return Move(point=point) @classmethod # 讓對方繼續下 def pass_turn(cls): return Move(is_pass=True) @classmethod # 投子認輸 def resign(cls): return Move(is_resign=True) class GoString(): def __init__(self, color, stones, liberties): self.color = color #黑/白 # 將兩個集合修改爲immutable類型 self.stones = frozenset(stones) #stone就是棋子 self.liberties = frozenset(liberties) #自由點 # 替換掉原來的remove_liberty 和 add_liberty def without_liberty(self, point): new_liberties = self.liberties - set([point]) return GoString(self.color, self.stones, new_liberties) def with_liberty(self, point): new_liberties = self.liberties | set([point]) return GoString(self.color, self.stones, new_liberties) def merged_with(self, go_string): # 落子之後,兩片相鄰棋子可能會合成一片 ''' 假設*代表黑棋,o代表白棋,x代表沒有落子的棋盤點,當前棋盤如下: x x x x x x x * x! * o * x x x * o x x x * x o x x x * o x x 注意看帶!的x,如果我們把黑子下在那個地方,那麼x!左邊的黑棋和新下的黑棋會調用當前函數進行合併, 同時x!上方的x和下面的x就會成爲合併後相鄰棋子共同具有的自由點。同時x!原來屬於左邊黑棋的自由點, 現在被一個黑棋佔據了,所以下面代碼要把該點從原來的自由點集合中去掉 ''' assert go_string.color == self.color combined_stones = self.stones | go_string.stones return GoString(self.color, combined_stones, (self.liberties | go_string.liberties) - combined_stones) @property def num_liberties(self): #自由點的數量 return len(self.liberties) def __eq__(self, other): #是否相等 return isinstance(other, GoString) and self.color == other.color and self.stones == other.stones and self.liberties == other.liberties #實現棋盤 class Board(): def __init__(self, num_rows, num_cols): self.num_rows = num_rows self.num_cols = num_cols self._grid = {} # 添加hash self._hash = zobrist_EMPTY_BOARD def zobrist_hash(self): return self._hash def place_stone(self, player, point): # 確保位置在棋盤內 assert self.is_on_grid(point) # 確定給定位置沒有被佔據 assert self._grid.get(point) is None adjacent_same_color = [] adjacent_opposite_color = [] liberties = [] for neighbor in point.neighbors(): # 判斷落子點上下左右的鄰接點情況 if not self.is_on_grid(neighbor): continue neighbor_string = self._grid.get(neighbor) if neighbor_string is None: # 如果鄰接點沒有被佔據,那麼就是當前落子點的自由點 liberties.append(neighbor) elif neighbor_string.color == player: if neighbor_string not in adjacent_same_color: # 記錄與棋子同色的連接棋子 adjacent_same_color.append(neighbor_string) else: if neighbor_string not in adjacent_opposite_color: # 記錄落點鄰接點與棋子不同色的棋子 adjacent_opposite_color.append(neighbor_string) # 將當前落子與棋盤上相鄰的棋子合併成一片 new_string = GoString(player, [point], liberties) # 從下面開始新的修改 for same_color_string in adjacent_same_color: new_string = new_string.merged_with(same_color_string) for new_string_point in new_string.stones: # 訪問棋盤某個點時返回與該點棋子相鄰的所有棋子集合 self._grid[new_string_point] = new_string # 增加落子的hash值記錄 self._hash ^= zobrist_HASH_CODE[point, None] self._hash ^= zobrist_HASH_CODE[point, player] for other_color_string in adjacent_opposite_color: # 當該點被佔據前,它屬於反色棋子的自由點,佔據後就不再屬於反色棋子自由點 # 修改成without_liberty replacement = other_color_string.without_liberty(point) if replacement.num_liberties: self._replace_string(other_color_string.without_liberty(point)) else: # 如果落子後,相鄰反色棋子的所有自由點都被堵住,對方棋子被喫掉 self._remove_string(other_color_string) # 增加一個新函數 def _replace_string(self, new_string): for point in new_string.stones: self._grid[point] = new_string def is_on_grid(self, point): return 1 <= point.row <= self.num_rows and 1 <= point.col <= self.num_cols def get(self, point): string = self._grid.get(point) if string is None: return None return string.color def get_go_string(self, point): string = self._grid.get(point) if string is None: return None return string def _remove_string(self, string): # 從棋盤上刪除一整片連接棋子 for point in string.stones: for neighbor in point.neighbors(): neighbor_string = self._grid.get(neighbor) if neighbor_string is None: continue if neighbor_string is not string: # 修改 self._replace_string(neighbor_string.with_liberty(point)) self._grid[point] = None # 由於棋子被拿掉後,對應位置狀態發生變化,因此修改編碼 self._hash ^= zobrist_HASH_CODE[point, string.color] self._hash ^= zobrist_HASH_CODE[point, None] #棋盤狀態的檢測和落子檢測 class GameState(): def __init__(self, board, next_player, previous, move): self.board = board self.next_player = next_player self.previous_state = previous self.last_move = move # 添加新修改 if previous is None: self.previous_states = frozenset() else: self.previous_states = frozenset(previous.previous_states | {(previous.next_player, previous.board.zobrist_hash())}) def apply_move(self, move): if move.is_play: next_board = copy.deepcopy(self.board) next_board.place_stone(self.next_player, move.point) else: next_board = self.board return GameState(next_board, self.next_player.other, self, move) @classmethod def new_game(cls, board_size): if isinstance(board_size, int): board_size = (board_size, board_size) board = Board(*board_size) return GameState(board, Player.black, None, None) def is_over(self): if self.last_move is None: return False if self.last_move.is_resign: return True second_last_move = self.previous_state.last_move if second_last_move is None: return False # 如果兩個棋手同時放棄落子,棋局結束 return self.last_move.is_pass and second_last_move.is_pass def is_move_self_capture(self, player, move): if not move.is_play: return False next_board = copy.deepcopy(self.board) # 先落子,完成喫子後再判斷是否是自己喫自己 next_board.place_stone(player, move.point) new_string = next_board.get_go_string(move.point) return new_string.num_liberties == 0 @property def situation(self): return (self.next_player, self.board) def does_move_violate_ko(self, player, move): if not move.is_play: return False next_board = copy.deepcopy(self.board) next_board.place_stone(player, move.point) next_situation = (player.other, next_board) # 判斷Ko不僅僅看是否返回上一步的棋盤而是檢測是否返回以前有過的棋盤狀態 # 修改,我們不用在循環檢測,只要看當前數值與前面數值是否匹配即可 return next_situation in self.previous_states def is_valid_move(self, move): if self.is_over(): return False if move.is_pass or move.is_resign: return True return (self.board.get(move.point) is None and not self.is_move_self_capture(self.next_player, move) and not self.does_move_violate_ko(self.next_player, move)) def is_point_an_eye(board, point, color): if board.get(point) is not None: return False for neighbor in point.neighbors(): # 檢測鄰接點全是己方棋子 if board.is_on_grid(neighbor): neighbor_color = board.get(neighbor) if neighbor_color != color: return False # 四個對角線位置至少有三個被己方棋子佔據 friendly_corners = 0 off_board_corners = 0 corners = [ Point(point.row - 1, point.col - 1), Point(point.row - 1, point.col + 1), Point(point.row + 1, point.col - 1), Point(point.row + 1, point.col + 1) ] for corner in corners: if board.is_on_grid(corner): corner_color = board.get(corner) if corner_color == color: friendly_corners += 1 else: off_board_corners += 1 if off_board_corners > 0: return off_board_corners + friendly_corners == 4 return friendly_corners >= 3 class Agent: def __init__(self): pass def select_move(self, game_state): raise NotImplementedError() class RandomBot(Agent): def select_move(self, game_state): ''' 遍歷棋盤,只要看到一個不違反規則的位置就落子 ''' candidates = [] for r in range(1, game_state.board.num_rows + 1): for c in range(1, game_state.board.num_cols + 1): candidate = Point(row=r, col=c) if game_state.is_valid_move(Move.play(candidate)) and not \ is_point_an_eye(game_state.board, candidate, game_state.next_player): candidates.append(candidate) if not candidates: return Move.pass_turn() # 在所有可選位置隨便選一個 return Move.play(random.choice(candidates)) def print_move(player, move): if move.is_pass: move_str = 'passes' elif move.is_resign: move_str = 'resign' else: move_str = '%s%d' % (COLS[move.point.col - 1], move.point.row) print('%s %s' % (player, move_str)) def print_board(board): for row in range(board.num_rows, 0, -1): bump = ' ' if row <= 9 else '' line = [] for col in range(1, board.num_cols + 1): stone = board.get(Point(row=row, col=col)) line.append(STONE_TO_CHAR[stone]) print('%s%d %s' % (bump, row, ''.join(line))) print(' ' + ' '.join(COLS[:board.num_cols])) def to_python(player_state): if player_state is None: return 'None' if player_state == Player.black: return Player.black return Player.white # 棋盤的列用字母表示 COLS = 'ABCDEFGHJKLMNOPQRST' STONE_TO_CHAR = { None: ' . ', Player.black: 'x', Player.white: 'o' } #用一個64位整型對應每個棋盤 MAX63 = 0x7fffffffffffffff #3*19*19 / MAX63 #發明這種編碼算法的人叫zobrist zobrist_HASH_CODE = {} zobrist_EMPTY_BOARD = 0 for row in range(1,20): for col in range(1,20): for state in (None,Player.black,Player.white): # 隨機選取一個整數對應當前位置,這裏默認當前取隨機值時不會與前面取值發生碰撞 code = random.randint(0, MAX63) zobrist_HASH_CODE[Point(row, col), state] = code print('HASH_CODE = {') for (pt, state), hash_code in zobrist_HASH_CODE.items(): print(' (%r, %s): %r,' % (pt, to_python(state), hash_code)) print('}') print(' ') print('EMPTY_BOARD = %d' % (zobrist_EMPTY_BOARD,)) def main(): # 初始化9*9棋盤 board_size = 9 game = GameState.new_game(board_size) bots = { Player.black: RandomBot(), Player.white: RandomBot() } while not game.is_over(): time.sleep(0.3) print(chr(27) + "[2J") # 打印棋盤 print_board(game.board) bot_move = bots[game.next_player].select_move(game) print_move(game.next_player, bot_move) game = game.apply_move(bot_move) if __name__ == '__main__': main()
修改完上面代碼後,我們再次運行main函數,會發現它的執行比原來快了很多。
最後我們再添加人與機器人對弈的功能,要實現人機對弈,我們必須把人的落子位置告知程序,這一點不難,只要我們輸入類似A3,D4這樣的信息即可,由此我們增加一個輔助函數用於輸入人類棋手的落子位置
目前最新AlphaGo.py
import enum import time import random from collections import namedtuple import copy from six.moves import input class Player(enum.Enum): black = 1 white = 2 ''' 返回對方棋子顏色,如果本方是白棋,那就返回Player.black ''' @property def other(self): if self == Player.white: return Player.black else: return Player.white class Point(namedtuple('Point', 'row col')): def neighbors(self): ''' 返回當前點的相鄰點,也就是相對於當前點的上下左右四個點 ''' return [ Point(self.row - 1, self.col), Point(self.row + 1, self.col), Point(self.row, self.col - 1), Point(self.row, self.col + 1), ] class Move(): def __init__(self, point=None, is_pass=False, is_resign=False): assert (point is not None) ^ is_pass ^ is_resign self.point = point # 是否輪到我下 self.is_play = (self.point is not None) self.is_pass = is_pass self.is_resign = is_resign @classmethod def play(cls, point): return Move(point=point) @classmethod # 讓對方繼續下 def pass_turn(cls): return Move(is_pass=True) @classmethod # 投子認輸 def resign(cls): return Move(is_resign=True) class GoString(): def __init__(self, color, stones, liberties): self.color = color #黑/白 # 將兩個集合修改爲immutable類型 self.stones = frozenset(stones) #stone就是棋子 self.liberties = frozenset(liberties) #自由點 # 替換掉原來的remove_liberty 和 add_liberty def without_liberty(self, point): new_liberties = self.liberties - set([point]) return GoString(self.color, self.stones, new_liberties) def with_liberty(self, point): new_liberties = self.liberties | set([point]) return GoString(self.color, self.stones, new_liberties) def merged_with(self, go_string): # 落子之後,兩片相鄰棋子可能會合成一片 ''' 假設*代表黑棋,o代表白棋,x代表沒有落子的棋盤點,當前棋盤如下: x x x x x x x * x! * o * x x x * o x x x * x o x x x * o x x 注意看帶!的x,如果我們把黑子下在那個地方,那麼x!左邊的黑棋和新下的黑棋會調用當前函數進行合併, 同時x!上方的x和下面的x就會成爲合併後相鄰棋子共同具有的自由點。同時x!原來屬於左邊黑棋的自由點, 現在被一個黑棋佔據了,所以下面代碼要把該點從原來的自由點集合中去掉 ''' assert go_string.color == self.color combined_stones = self.stones | go_string.stones return GoString(self.color, combined_stones, (self.liberties | go_string.liberties) - combined_stones) @property def num_liberties(self): #自由點的數量 return len(self.liberties) def __eq__(self, other): #是否相等 return isinstance(other, GoString) and self.color == other.color and self.stones == other.stones and self.liberties == other.liberties #實現棋盤 class Board(): def __init__(self, num_rows, num_cols): self.num_rows = num_rows self.num_cols = num_cols self._grid = {} # 添加hash self._hash = zobrist_EMPTY_BOARD def zobrist_hash(self): return self._hash def place_stone(self, player, point): # 確保位置在棋盤內 assert self.is_on_grid(point) # 確定給定位置沒有被佔據 assert self._grid.get(point) is None adjacent_same_color = [] adjacent_opposite_color = [] liberties = [] for neighbor in point.neighbors(): # 判斷落子點上下左右的鄰接點情況 if not self.is_on_grid(neighbor): continue neighbor_string = self._grid.get(neighbor) if neighbor_string is None: # 如果鄰接點沒有被佔據,那麼就是當前落子點的自由點 liberties.append(neighbor) elif neighbor_string.color == player: if neighbor_string not in adjacent_same_color: # 記錄與棋子同色的連接棋子 adjacent_same_color.append(neighbor_string) else: if neighbor_string not in adjacent_opposite_color: # 記錄落點鄰接點與棋子不同色的棋子 adjacent_opposite_color.append(neighbor_string) # 將當前落子與棋盤上相鄰的棋子合併成一片 new_string = GoString(player, [point], liberties) # 從下面開始新的修改 for same_color_string in adjacent_same_color: new_string = new_string.merged_with(same_color_string) for new_string_point in new_string.stones: # 訪問棋盤某個點時返回與該點棋子相鄰的所有棋子集合 self._grid[new_string_point] = new_string # 增加落子的hash值記錄 self._hash ^= zobrist_HASH_CODE[point, None] self._hash ^= zobrist_HASH_CODE[point, player] for other_color_string in adjacent_opposite_color: # 當該點被佔據前,它屬於反色棋子的自由點,佔據後就不再屬於反色棋子自由點 # 修改成without_liberty replacement = other_color_string.without_liberty(point) if replacement.num_liberties: self._replace_string(other_color_string.without_liberty(point)) else: # 如果落子後,相鄰反色棋子的所有自由點都被堵住,對方棋子被喫掉 self._remove_string(other_color_string) # 增加一個新函數 def _replace_string(self, new_string): for point in new_string.stones: self._grid[point] = new_string def is_on_grid(self, point): return 1 <= point.row <= self.num_rows and 1 <= point.col <= self.num_cols def get(self, point): string = self._grid.get(point) if string is None: return None return string.color def get_go_string(self, point): string = self._grid.get(point) if string is None: return None return string def _remove_string(self, string): # 從棋盤上刪除一整片連接棋子 for point in string.stones: for neighbor in point.neighbors(): neighbor_string = self._grid.get(neighbor) if neighbor_string is None: continue if neighbor_string is not string: # 修改 self._replace_string(neighbor_string.with_liberty(point)) self._grid[point] = None # 由於棋子被拿掉後,對應位置狀態發生變化,因此修改編碼 self._hash ^= zobrist_HASH_CODE[point, string.color] self._hash ^= zobrist_HASH_CODE[point, None] #棋盤狀態的檢測和落子檢測 class GameState(): def __init__(self, board, next_player, previous, move): self.board = board self.next_player = next_player self.previous_state = previous self.last_move = move # 添加新修改 if previous is None: self.previous_states = frozenset() else: self.previous_states = frozenset(previous.previous_states | {(previous.next_player, previous.board.zobrist_hash())}) def apply_move(self, move): if move.is_play: next_board = copy.deepcopy(self.board) next_board.place_stone(self.next_player, move.point) else: next_board = self.board return GameState(next_board, self.next_player.other, self, move) @classmethod def new_game(cls, board_size): if isinstance(board_size, int): board_size = (board_size, board_size) board = Board(*board_size) return GameState(board, Player.black, None, None) def is_over(self): if self.last_move is None: return False if self.last_move.is_resign: return True second_last_move = self.previous_state.last_move if second_last_move is None: return False # 如果兩個棋手同時放棄落子,棋局結束 return self.last_move.is_pass and second_last_move.is_pass def is_move_self_capture(self, player, move): if not move.is_play: return False next_board = copy.deepcopy(self.board) # 先落子,完成喫子後再判斷是否是自己喫自己 next_board.place_stone(player, move.point) new_string = next_board.get_go_string(move.point) return new_string.num_liberties == 0 @property def situation(self): return (self.next_player, self.board) def does_move_violate_ko(self, player, move): if not move.is_play: return False next_board = copy.deepcopy(self.board) next_board.place_stone(player, move.point) next_situation = (player.other, next_board) # 判斷Ko不僅僅看是否返回上一步的棋盤而是檢測是否返回以前有過的棋盤狀態 # 修改,我們不用在循環檢測,只要看當前數值與前面數值是否匹配即可 return next_situation in self.previous_states def is_valid_move(self, move): if self.is_over(): return False if move.is_pass or move.is_resign: return True return (self.board.get(move.point) is None and not self.is_move_self_capture(self.next_player, move) and not self.does_move_violate_ko(self.next_player, move)) def is_point_an_eye(board, point, color): if board.get(point) is not None: return False for neighbor in point.neighbors(): # 檢測鄰接點全是己方棋子 if board.is_on_grid(neighbor): neighbor_color = board.get(neighbor) if neighbor_color != color: return False # 四個對角線位置至少有三個被己方棋子佔據 friendly_corners = 0 off_board_corners = 0 corners = [ Point(point.row - 1, point.col - 1), Point(point.row - 1, point.col + 1), Point(point.row + 1, point.col - 1), Point(point.row + 1, point.col + 1) ] for corner in corners: if board.is_on_grid(corner): corner_color = board.get(corner) if corner_color == color: friendly_corners += 1 else: off_board_corners += 1 if off_board_corners > 0: return off_board_corners + friendly_corners == 4 return friendly_corners >= 3 class Agent: def __init__(self): pass def select_move(self, game_state): raise NotImplementedError() class RandomBot(Agent): def select_move(self, game_state): ''' 遍歷棋盤,只要看到一個不違反規則的位置就落子 ''' candidates = [] for r in range(1, game_state.board.num_rows + 1): for c in range(1, game_state.board.num_cols + 1): candidate = Point(row=r, col=c) if game_state.is_valid_move(Move.play(candidate)) and not \ is_point_an_eye(game_state.board, candidate, game_state.next_player): candidates.append(candidate) if not candidates: return Move.pass_turn() # 在所有可選位置隨便選一個 return Move.play(random.choice(candidates)) def print_move(player, move): if move.is_pass: move_str = 'passes' elif move.is_resign: move_str = 'resign' else: move_str = '%s%d' % (COLS[move.point.col - 1], move.point.row) print('%s %s' % (player, move_str)) def print_board(board): for row in range(board.num_rows, 0, -1): bump = ' ' if row <= 9 else '' line = [] for col in range(1, board.num_cols + 1): stone = board.get(Point(row=row, col=col)) line.append(STONE_TO_CHAR[stone]) print('%s%d %s' % (bump, row, ''.join(line))) print(' ' + ' '.join(COLS[:board.num_cols])) def to_python(player_state): if player_state is None: return 'None' if player_state == Player.black: return Player.black return Player.white #把A3,D3這樣的輸入轉換成具體座標 def point_from_coords(coords): #獲取表示列的字母 col = COLS.index(coords[0]) + 1 #獲取表示行的數字 row = int(coords[1:]) return Point(row=row, col = col) # 棋盤的列用字母表示 COLS = 'ABCDEFGHJKLMNOPQRST' STONE_TO_CHAR = { None: ' . ', Player.black: 'x', Player.white: 'o' } #用一個64位整型對應每個棋盤 MAX63 = 0x7fffffffffffffff #3*19*19 / MAX63 #發明這種編碼算法的人叫zobrist zobrist_HASH_CODE = {} zobrist_EMPTY_BOARD = 0 for row in range(1,20): for col in range(1,20): for state in (None,Player.black,Player.white): # 隨機選取一個整數對應當前位置,這裏默認當前取隨機值時不會與前面取值發生碰撞 code = random.randint(0, MAX63) zobrist_HASH_CODE[Point(row, col), state] = code print('HASH_CODE = {') for (pt, state), hash_code in zobrist_HASH_CODE.items(): print(' (%r, %s): %r,' % (pt, to_python(state), hash_code)) print('}') print(' ') print('EMPTY_BOARD = %d' % (zobrist_EMPTY_BOARD,)) def main(): # 初始化9*9棋盤 board_size = 9 game = GameState.new_game(board_size) bot = RandomBot() while not game.is_over(): #time.sleep(0.3) print(chr(27) + "[2J") print_board(game.board) # 人類用黑棋 if game.next_player == Player.black: human_move = input('--') point = point_from_coords(human_move.strip()) move = Move.play(point) else: move = bot.select_move(game) print_move(game.next_player, move) game = game.apply_move(move) if __name__ == '__main__': main()
自己添加了棋盤的版本
但是從棋盤上拿走棋子的功能還沒有實現
上面機機對弈的改良
import enum import time import random from collections import namedtuple import copy import turtle def bgpic(self,picname=None): if picname is None: return self._bgpicname if picname not in self._bgpics: self._bgpics[picname] = self._image(picname) self._setbgpic(self._bgpic, self._bgpics[picname]) self._bgpicname = picname class Player(enum.Enum): black = 1 white = 2 ''' 返回對方棋子顏色,如果本方是白棋,那就返回Player.black ''' @property def other(self): if self == Player.white: return Player.black else: return Player.white class Point(namedtuple('Point', 'row col')): def neighbors(self): ''' 返回當前點的相鄰點,也就是相對於當前點的上下左右四個點 ''' return [ Point(self.row - 1, self.col), Point(self.row + 1, self.col), Point(self.row, self.col - 1), Point(self.row, self.col + 1), ] class Move(): def __init__(self, point=None, is_pass=False, is_resign=False): assert (point is not None) ^ is_pass ^ is_resign self.point = point # 是否輪到我下 self.is_play = (self.point is not None) self.is_pass = is_pass self.is_resign = is_resign @classmethod def play(cls, point): return Move(point=point) @classmethod # 讓對方繼續下 def pass_turn(cls): return Move(is_pass=True) @classmethod # 投子認輸 def resign(cls): return Move(is_resign=True) class GoString(): def __init__(self, color, stones, liberties): self.color = color # 黑/白 # 將兩個集合修改爲immutable類型 self.stones = frozenset(stones) # stone就是棋子 self.liberties = frozenset(liberties) # 自由點 # 替換掉原來的remove_liberty 和 add_liberty def without_liberty(self, point): new_liberties = self.liberties - set([point]) return GoString(self.color, self.stones, new_liberties) def with_liberty(self, point): new_liberties = self.liberties | set([point]) return GoString(self.color, self.stones, new_liberties) def merged_with(self, go_string): # 落子之後,兩片相鄰棋子可能會合成一片 ''' 假設*代表黑棋,o代表白棋,x代表沒有落子的棋盤點,當前棋盤如下: x x x x x x x * x! * o * x x x * o x x x * x o x x x * o x x 注意看帶!的x,如果我們把黑子下在那個地方,那麼x!左邊的黑棋和新下的黑棋會調用當前函數進行合併, 同時x!上方的x和下面的x就會成爲合併後相鄰棋子共同具有的自由點。同時x!原來屬於左邊黑棋的自由點, 現在被一個黑棋佔據了,所以下面代碼要把該點從原來的自由點集合中去掉 ''' assert go_string.color == self.color combined_stones = self.stones | go_string.stones return GoString(self.color, combined_stones, (self.liberties | go_string.liberties) - combined_stones) @property def num_liberties(self): # 自由點的數量 return len(self.liberties) def __eq__(self, other): # 是否相等 return isinstance(other, GoString) and self.color == other.color and self.stones == other.stones and self.liberties == other.liberties # 實現棋盤 class Board(): def __init__(self, num_rows, num_cols): self.num_rows = num_rows self.num_cols = num_cols self._grid = {} # 添加hash self._hash = zobrist_EMPTY_BOARD def zobrist_hash(self): return self._hash def place_stone(self, player, point): # 確保位置在棋盤內 assert self.is_on_grid(point) # 確定給定位置沒有被佔據 assert self._grid.get(point) is None adjacent_same_color = [] adjacent_opposite_color = [] liberties = [] for neighbor in point.neighbors(): # 判斷落子點上下左右的鄰接點情況 if not self.is_on_grid(neighbor): continue neighbor_string = self._grid.get(neighbor) if neighbor_string is None: # 如果鄰接點沒有被佔據,那麼就是當前落子點的自由點 liberties.append(neighbor) elif neighbor_string.color == player: if neighbor_string not in adjacent_same_color: # 記錄與棋子同色的連接棋子 adjacent_same_color.append(neighbor_string) else: if neighbor_string not in adjacent_opposite_color: # 記錄落點鄰接點與棋子不同色的棋子 adjacent_opposite_color.append(neighbor_string) # 將當前落子與棋盤上相鄰的棋子合併成一片 new_string = GoString(player, [point], liberties) # 從下面開始新的修改 for same_color_string in adjacent_same_color: new_string = new_string.merged_with(same_color_string) for new_string_point in new_string.stones: # 訪問棋盤某個點時返回與該點棋子相鄰的所有棋子集合 self._grid[new_string_point] = new_string # 增加落子的hash值記錄 self._hash ^= zobrist_HASH_CODE[point, None] self._hash ^= zobrist_HASH_CODE[point, player] for other_color_string in adjacent_opposite_color: # 當該點被佔據前,它屬於反色棋子的自由點,佔據後就不再屬於反色棋子自由點 # 修改成without_liberty replacement = other_color_string.without_liberty(point) if replacement.num_liberties: self._replace_string(other_color_string.without_liberty(point)) else: # 如果落子後,相鄰反色棋子的所有自由點都被堵住,對方棋子被喫掉 self._remove_string(other_color_string) # 增加一個新函數 def _replace_string(self, new_string): for point in new_string.stones: self._grid[point] = new_string def is_on_grid(self, point): return 1 <= point.row <= self.num_rows and 1 <= point.col <= self.num_cols def get(self, point): string = self._grid.get(point) if string is None: return None return string.color def get_go_string(self, point): string = self._grid.get(point) if string is None: return None return string def _remove_string(self, string): # 從棋盤上刪除一整片連接棋子 for point in string.stones: for neighbor in point.neighbors(): neighbor_string = self._grid.get(neighbor) if neighbor_string is None: continue if neighbor_string is not string: # 修改 self._replace_string(neighbor_string.with_liberty(point)) self._grid[point] = None # 由於棋子被拿掉後,對應位置狀態發生變化,因此修改編碼 self._hash ^= zobrist_HASH_CODE[point, string.color] self._hash ^= zobrist_HASH_CODE[point, None] # 棋盤狀態的檢測和落子檢測 class GameState(): def __init__(self, board, next_player, previous, move): self.board = board self.next_player = next_player self.previous_state = previous self.last_move = move # 添加新修改 if previous is None: self.previous_states = frozenset() else: self.previous_states = frozenset(previous.previous_states | {(previous.next_player, previous.board.zobrist_hash())}) def apply_move(self, move): if move.is_play: next_board = copy.deepcopy(self.board) next_board.place_stone(self.next_player, move.point) else: next_board = self.board return GameState(next_board, self.next_player.other, self, move) @classmethod def new_game(cls, board_size): if isinstance(board_size, int): board_size = (board_size, board_size) board = Board(*board_size) return GameState(board, Player.black, None, None) def is_over(self): if self.last_move is None: return False if self.last_move.is_resign: return True second_last_move = self.previous_state.last_move if second_last_move is None: return False # 如果兩個棋手同時放棄落子,棋局結束 return self.last_move.is_pass and second_last_move.is_pass def is_move_self_capture(self, player, move): if not move.is_play: return False next_board = copy.deepcopy(self.board) # 先落子,完成喫子後再判斷是否是自己喫自己 next_board.place_stone(player, move.point) new_string = next_board.get_go_string(move.point) return new_string.num_liberties == 0 @property def situation(self): return (self.next_player, self.board) def does_move_violate_ko(self, player, move): if not move.is_play: return False next_board = copy.deepcopy(self.board) next_board.place_stone(player, move.point) next_situation = (player.other, next_board) # 判斷Ko不僅僅看是否返回上一步的棋盤而是檢測是否返回以前有過的棋盤狀態 # 修改,我們不用在循環檢測,只要看當前數值與前面數值是否匹配即可 return next_situation in self.previous_states def is_valid_move(self, move): if self.is_over(): return False if move.is_pass or move.is_resign: return True return (self.board.get(move.point) is None and not self.is_move_self_capture(self.next_player, move) and not self.does_move_violate_ko(self.next_player, move)) def is_point_an_eye(board, point, color): if board.get(point) is not None: return False for neighbor in point.neighbors(): # 檢測鄰接點全是己方棋子 if board.is_on_grid(neighbor): neighbor_color = board.get(neighbor) if neighbor_color != color: return False # 四個對角線位置至少有三個被己方棋子佔據 friendly_corners = 0 off_board_corners = 0 corners = [ Point(point.row - 1, point.col - 1), Point(point.row - 1, point.col + 1), Point(point.row + 1, point.col - 1), Point(point.row + 1, point.col + 1) ] for corner in corners: if board.is_on_grid(corner): corner_color = board.get(corner) if corner_color == color: friendly_corners += 1 else: off_board_corners += 1 if off_board_corners > 0: return off_board_corners + friendly_corners == 4 return friendly_corners >= 3 class Agent: def __init__(self): pass def select_move(self, game_state): raise NotImplementedError() class RandomBot(Agent): def select_move(self, game_state): ''' 遍歷棋盤,只要看到一個不違反規則的位置就落子 ''' candidates = [] for r in range(1, game_state.board.num_rows + 1): for c in range(1, game_state.board.num_cols + 1): candidate = Point(row=r, col=c) if game_state.is_valid_move(Move.play(candidate)) and not \ is_point_an_eye(game_state.board, candidate, game_state.next_player): candidates.append(candidate) if not candidates: return Move.pass_turn() # 在所有可選位置隨便選一個 return Move.play(random.choice(candidates)) def print_move(player, move): if move.is_pass: move_str = 'passes' elif move.is_resign: move_str = 'resign' else: move_str = '%s%d' % (COLS[move.point.col - 1], move.point.row) print('%s %s' % (player, move_str)) def print_board(board): for row in range(board.num_rows, 0, -1): bump = ' ' if row <= 9 else '' line = [] for col in range(1, board.num_cols + 1): stone = board.get(Point(row=row, col=col)) line.append(STONE_TO_CHAR[stone]) print('%s%d %s' % (bump, row, ''.join(line))) print(' ' + ' '.join(COLS[:board.num_cols])) def to_python(player_state): if player_state is None: return 'None' if player_state == Player.black: return Player.black return Player.white #把A3,D3這樣的輸入轉換成具體座標 def point_from_coords(coords): #獲取表示列的字母 col = COLS.index(coords[0]) + 1 #獲取表示行的數字 row = int(coords[1:]) return Point(row=row, col = col) # 棋盤的列用字母表示 COLS = 'ABCDEFGHJKLMNOPQRST' STONE_TO_CHAR = { None: ' . ', Player.black: 'x', Player.white: 'o' } # 用一個64位整型對應每個棋盤 MAX63 = 0x7fffffffffffffff # 3*19*19 / MAX63 # 發明這種編碼算法的人叫zobrist zobrist_HASH_CODE = {} zobrist_EMPTY_BOARD = 0 for row in range(1, 20): for col in range(1, 20): for state in (None, Player.black, Player.white): # 隨機選取一個整數對應當前位置,這裏默認當前取隨機值時不會與前面取值發生碰撞 code = random.randint(0, MAX63) zobrist_HASH_CODE[Point(row, col), state] = code print('HASH_CODE = {') for (pt, state), hash_code in zobrist_HASH_CODE.items(): print(' (%r, %s): %r,' % (pt, to_python(state), hash_code)) print('}') print(' ') print('EMPTY_BOARD = %d' % (zobrist_EMPTY_BOARD,)) n = 50 # 兩條線間隔 x = -200 # x初始值 y = -200 # y初始值 turtle.speed(9) turtle.screensize(400, 400) turtle.penup() turtle.pencolor('black') turtle.bgpic(r'bg.gif') for i in range(9): turtle.goto(x, y + n * i) turtle.pendown() turtle.forward(8 * n) # 下面一條橫線 turtle.penup() # 19條橫線已畫完 turtle.left(90) for i in range(9): turtle.goto(x + n * i, y) turtle.pendown() turtle.forward(8 * n) turtle.penup() # 19條堅線已畫完 turtle.right(90) turtle.hideturtle() def hua(point, player): #print(point.row, point.col) #print(type(player)) #print(player) x0 = x + (point.col - 1) * n y0 = y + (point.row - 1) * n - n * 0.25 turtle.goto(x0, y0) turtle.begin_fill() if player == 'Player.white': turtle.fillcolor('white') else: turtle.fillcolor('black') turtle.circle(n * 0.25) turtle.end_fill() def main(): # 初始化9*9棋盤 board_size = 9 game = GameState.new_game(board_size) bots = { Player.black: RandomBot(), Player.white: RandomBot() } while not game.is_over(): time.sleep(0.3) print(chr(27) + "[2J") # 打印棋盤 print_board(game.board) bot_move = bots[game.next_player].select_move(game) if bot_move.point is not None: point = point_from_coords('%s%d' % (COLS[bot_move.point.col - 1], bot_move.point.row)) hua(point,str(game.next_player)) print_move(game.next_player, bot_move) game = game.apply_move(bot_move) if __name__ == '__main__': main()
人機對弈的改良
import enum import time import random from collections import namedtuple import copy from six.moves import input import turtle def bgpic(self, picname=None): if picname is None: return self._bgpicname if picname not in self._bgpics: self._bgpics[picname] = self._image(picname) self._setbgpic(self._bgpic, self._bgpics[picname]) self._bgpicname = picname class Player(enum.Enum): black = 1 white = 2 ''' 返回對方棋子顏色,如果本方是白棋,那就返回Player.black ''' @property def other(self): if self == Player.white: return Player.black else: return Player.white class Point(namedtuple('Point', 'row col')): def neighbors(self): ''' 返回當前點的相鄰點,也就是相對於當前點的上下左右四個點 ''' return [ Point(self.row - 1, self.col), Point(self.row + 1, self.col), Point(self.row, self.col - 1), Point(self.row, self.col + 1), ] class Move(): def __init__(self, point=None, is_pass=False, is_resign=False): assert (point is not None) ^ is_pass ^ is_resign self.point = point # 是否輪到我下 self.is_play = (self.point is not None) self.is_pass = is_pass self.is_resign = is_resign @classmethod def play(cls, point): return Move(point=point) @classmethod # 讓對方繼續下 def pass_turn(cls): return Move(is_pass=True) @classmethod # 投子認輸 def resign(cls): return Move(is_resign=True) class GoString(): def __init__(self, color, stones, liberties): self.color = color # 黑/白 # 將兩個集合修改爲immutable類型 self.stones = frozenset(stones) # stone就是棋子 self.liberties = frozenset(liberties) # 自由點 # 替換掉原來的remove_liberty 和 add_liberty def without_liberty(self, point): new_liberties = self.liberties - set([point]) return GoString(self.color, self.stones, new_liberties) def with_liberty(self, point): new_liberties = self.liberties | set([point]) return GoString(self.color, self.stones, new_liberties) def merged_with(self, go_string): # 落子之後,兩片相鄰棋子可能會合成一片 ''' 假設*代表黑棋,o代表白棋,x代表沒有落子的棋盤點,當前棋盤如下: x x x x x x x * x! * o * x x x * o x x x * x o x x x * o x x 注意看帶!的x,如果我們把黑子下在那個地方,那麼x!左邊的黑棋和新下的黑棋會調用當前函數進行合併, 同時x!上方的x和下面的x就會成爲合併後相鄰棋子共同具有的自由點。同時x!原來屬於左邊黑棋的自由點, 現在被一個黑棋佔據了,所以下面代碼要把該點從原來的自由點集合中去掉 ''' assert go_string.color == self.color combined_stones = self.stones | go_string.stones return GoString(self.color, combined_stones, (self.liberties | go_string.liberties) - combined_stones) @property def num_liberties(self): # 自由點的數量 return len(self.liberties) def __eq__(self, other): # 是否相等 return isinstance(other, GoString) and self.color == other.color and self.stones == other.stones and self.liberties == other.liberties # 實現棋盤 class Board(): def __init__(self, num_rows, num_cols): self.num_rows = num_rows self.num_cols = num_cols self._grid = {} # 添加hash self._hash = zobrist_EMPTY_BOARD def zobrist_hash(self): return self._hash def place_stone(self, player, point): # 確保位置在棋盤內 assert self.is_on_grid(point) # 確定給定位置沒有被佔據 assert self._grid.get(point) is None adjacent_same_color = [] adjacent_opposite_color = [] liberties = [] for neighbor in point.neighbors(): # 判斷落子點上下左右的鄰接點情況 if not self.is_on_grid(neighbor): continue neighbor_string = self._grid.get(neighbor) if neighbor_string is None: # 如果鄰接點沒有被佔據,那麼就是當前落子點的自由點 liberties.append(neighbor) elif neighbor_string.color == player: if neighbor_string not in adjacent_same_color: # 記錄與棋子同色的連接棋子 adjacent_same_color.append(neighbor_string) else: if neighbor_string not in adjacent_opposite_color: # 記錄落點鄰接點與棋子不同色的棋子 adjacent_opposite_color.append(neighbor_string) # 將當前落子與棋盤上相鄰的棋子合併成一片 new_string = GoString(player, [point], liberties) # 從下面開始新的修改 for same_color_string in adjacent_same_color: new_string = new_string.merged_with(same_color_string) for new_string_point in new_string.stones: # 訪問棋盤某個點時返回與該點棋子相鄰的所有棋子集合 self._grid[new_string_point] = new_string # 增加落子的hash值記錄 self._hash ^= zobrist_HASH_CODE[point, None] self._hash ^= zobrist_HASH_CODE[point, player] for other_color_string in adjacent_opposite_color: # 當該點被佔據前,它屬於反色棋子的自由點,佔據後就不再屬於反色棋子自由點 # 修改成without_liberty replacement = other_color_string.without_liberty(point) if replacement.num_liberties: self._replace_string(other_color_string.without_liberty(point)) else: # 如果落子後,相鄰反色棋子的所有自由點都被堵住,對方棋子被喫掉 self._remove_string(other_color_string) # 增加一個新函數 def _replace_string(self, new_string): for point in new_string.stones: self._grid[point] = new_string def is_on_grid(self, point): return 1 <= point.row <= self.num_rows and 1 <= point.col <= self.num_cols def get(self, point): string = self._grid.get(point) if string is None: return None return string.color def get_go_string(self, point): string = self._grid.get(point) if string is None: return None return string def _remove_string(self, string): # 從棋盤上刪除一整片連接棋子 for point in string.stones: for neighbor in point.neighbors(): neighbor_string = self._grid.get(neighbor) if neighbor_string is None: continue if neighbor_string is not string: # 修改 self._replace_string(neighbor_string.with_liberty(point)) self._grid[point] = None # 由於棋子被拿掉後,對應位置狀態發生變化,因此修改編碼 self._hash ^= zobrist_HASH_CODE[point, string.color] self._hash ^= zobrist_HASH_CODE[point, None] # 棋盤狀態的檢測和落子檢測 class GameState(): def __init__(self, board, next_player, previous, move): self.board = board self.next_player = next_player self.previous_state = previous self.last_move = move # 添加新修改 if previous is None: self.previous_states = frozenset() else: self.previous_states = frozenset(previous.previous_states | {(previous.next_player, previous.board.zobrist_hash())}) def apply_move(self, move): if move.is_play: next_board = copy.deepcopy(self.board) next_board.place_stone(self.next_player, move.point) else: next_board = self.board return GameState(next_board, self.next_player.other, self, move) @classmethod def new_game(cls, board_size): if isinstance(board_size, int): board_size = (board_size, board_size) board = Board(*board_size) return GameState(board, Player.black, None, None) def is_over(self): if self.last_move is None: return False if self.last_move.is_resign: return True second_last_move = self.previous_state.last_move if second_last_move is None: return False # 如果兩個棋手同時放棄落子,棋局結束 return self.last_move.is_pass and second_last_move.is_pass def is_move_self_capture(self, player, move): if not move.is_play: return False next_board = copy.deepcopy(self.board) # 先落子,完成喫子後再判斷是否是自己喫自己 next_board.place_stone(player, move.point) new_string = next_board.get_go_string(move.point) return new_string.num_liberties == 0 @property def situation(self): return (self.next_player, self.board) def does_move_violate_ko(self, player, move): if not move.is_play: return False next_board = copy.deepcopy(self.board) next_board.place_stone(player, move.point) next_situation = (player.other, next_board) # 判斷Ko不僅僅看是否返回上一步的棋盤而是檢測是否返回以前有過的棋盤狀態 # 修改,我們不用在循環檢測,只要看當前數值與前面數值是否匹配即可 return next_situation in self.previous_states def is_valid_move(self, move): if self.is_over(): return False if move.is_pass or move.is_resign: return True return (self.board.get(move.point) is None and not self.is_move_self_capture(self.next_player, move) and not self.does_move_violate_ko(self.next_player, move)) def is_point_an_eye(board, point, color): if board.get(point) is not None: return False for neighbor in point.neighbors(): # 檢測鄰接點全是己方棋子 if board.is_on_grid(neighbor): neighbor_color = board.get(neighbor) if neighbor_color != color: return False # 四個對角線位置至少有三個被己方棋子佔據 friendly_corners = 0 off_board_corners = 0 corners = [ Point(point.row - 1, point.col - 1), Point(point.row - 1, point.col + 1), Point(point.row + 1, point.col - 1), Point(point.row + 1, point.col + 1) ] for corner in corners: if board.is_on_grid(corner): corner_color = board.get(corner) if corner_color == color: friendly_corners += 1 else: off_board_corners += 1 if off_board_corners > 0: return off_board_corners + friendly_corners == 4 return friendly_corners >= 3 class Agent: def __init__(self): pass def select_move(self, game_state): raise NotImplementedError() class RandomBot(Agent): def select_move(self, game_state): ''' 遍歷棋盤,只要看到一個不違反規則的位置就落子 ''' candidates = [] for r in range(1, game_state.board.num_rows + 1): for c in range(1, game_state.board.num_cols + 1): candidate = Point(row=r, col=c) if game_state.is_valid_move(Move.play(candidate)) and not \ is_point_an_eye(game_state.board, candidate, game_state.next_player): candidates.append(candidate) if not candidates: return Move.pass_turn() # 在所有可選位置隨便選一個 return Move.play(random.choice(candidates)) def print_move(player, move): if move.is_pass: move_str = 'passes' elif move.is_resign: move_str = 'resign' else: move_str = '%s%d' % (COLS[move.point.col - 1], move.point.row) print('%s %s' % (player, move_str)) def print_board(board): for row in range(board.num_rows, 0, -1): bump = ' ' if row <= 9 else '' line = [] for col in range(1, board.num_cols + 1): stone = board.get(Point(row=row, col=col)) line.append(STONE_TO_CHAR[stone]) print('%s%d %s' % (bump, row, ''.join(line))) print(' ' + ' '.join(COLS[:board.num_cols])) def to_python(player_state): if player_state is None: return 'None' if player_state == Player.black: return Player.black return Player.white # 把A3,D3這樣的輸入轉換成具體座標 def point_from_coords(coords): # 獲取表示列的字母 col = COLS.index(coords[0]) + 1 # 獲取表示行的數字 row = int(coords[1:]) return Point(row=row, col=col) # 棋盤的列用字母表示 COLS = 'ABCDEFGHJKLMNOPQRST' STONE_TO_CHAR = { None: ' . ', Player.black: 'x', Player.white: 'o' } # 用一個64位整型對應每個棋盤 MAX63 = 0x7fffffffffffffff # 3*19*19 / MAX63 # 發明這種編碼算法的人叫zobrist zobrist_HASH_CODE = {} zobrist_EMPTY_BOARD = 0 for row in range(1, 20): for col in range(1, 20): for state in (None, Player.black, Player.white): # 隨機選取一個整數對應當前位置,這裏默認當前取隨機值時不會與前面取值發生碰撞 code = random.randint(0, MAX63) zobrist_HASH_CODE[Point(row, col), state] = code print('HASH_CODE = {') for (pt, state), hash_code in zobrist_HASH_CODE.items(): print(' (%r, %s): %r,' % (pt, to_python(state), hash_code)) print('}') print(' ') print('EMPTY_BOARD = %d' % (zobrist_EMPTY_BOARD,)) n = 50 # 兩條線間隔 x = -200 # x初始值 y = -200 # y初始值 turtle.speed(9) turtle.screensize(400, 400) turtle.penup() turtle.pencolor('black') turtle.bgpic(r'bg.gif') for i in range(9): turtle.goto(x, y + n * i) turtle.pendown() turtle.forward(8 * n) # 下面一條橫線 turtle.penup() # 19條橫線已畫完 turtle.left(90) for i in range(9): turtle.goto(x + n * i, y) turtle.pendown() turtle.forward(8 * n) turtle.penup() # 19條堅線已畫完 turtle.right(90) turtle.hideturtle() def hua(point, color): print(point.row, point.col) x0 = x + (point.col - 1) * n y0 = y + (point.row - 1) * n - n * 0.25 turtle.goto(x0, y0) turtle.begin_fill() if color == 1: turtle.fillcolor('white') else: turtle.fillcolor('black') turtle.circle(n * 0.25) turtle.end_fill() def main(): # 初始化9*9棋盤 board_size = 9 game = GameState.new_game(board_size) bot = RandomBot() while not game.is_over(): # time.sleep(0.3) print(chr(27) + "[2J") print_board(game.board) # 人類用黑棋 if game.next_player == Player.black: human_move = input('--') print(human_move.strip()) point = point_from_coords(human_move.strip()) hua(point, 0) move = Move.play(point) else: move = bot.select_move(game) point = point_from_coords('%s%d' % (COLS[move.point.col - 1], move.point.row)) print(point) hua(point, 1) print_move(game.next_player, move) game = game.apply_move(move) if __name__ == '__main__': main()
參考: