Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- import random
- import matplotlib.pyplot as plt
- import pygame
- import sys
- class TicTacToe:
- def __init__(self, board_size=3):
- self.board_size = board_size
- self.board = np.zeros((board_size, board_size), dtype=int) # Ігрове поле: 0 - порожньо, 1 - хрестик, 2 - нулик
- self.current_player = 1 # Починає хрестик
- def draw_board(self, window, cell_size, window_height, window_width):
- window.fill((255, 255, 255)) # Заповнюємо вікно білим кольором
- # Малюємо лінії, що розділяють комірки
- for i in range(1, self.board_size):
- pygame.draw.line(window, (0, 0, 0), (0, i * cell_size), (window_width, i * cell_size))
- pygame.draw.line(window, (0, 0, 0), (i * cell_size, 0), (i * cell_size, window_height))
- # Малюємо символи на ігровому полі
- symbols = {2: 'O', 0: ' ', 1: 'X'}
- for i in range(self.board_size):
- for j in range(self.board_size):
- font = pygame.font.Font(None, 100)
- text = font.render(symbols[self.board[i][j]], True, (0, 0, 0))
- text_rect = text.get_rect(center=(j * cell_size + cell_size // 2, i * cell_size + cell_size // 2))
- window.blit(text, text_rect)
- return window
- def exe_play(self, agents, vs='agent', player_side='random'):
- def we_get_winner(winner):
- if winner == 1:
- return 'Переміг хрестик!'
- elif winner == 2:
- return 'Переміг нулик!'
- elif winner == 0:
- return 'Нічия!'
- return '-'
- pygame.init()
- window_width = 300
- window_height = 300
- cell_size = window_width // self.board_size
- window = pygame.display.set_mode((window_width, window_height))
- pygame.display.set_caption("Хрестики-нулики")
- agents[0].epsilon = 0
- agents[1].epsilon = 0
- side = ['X', 'O']
- self.__init__(board_size=self.board_size)
- window = self.draw_board(window=window, cell_size=cell_size, window_height=window_height, window_width=window_width)
- pygame.display.flip()
- if player_side == 'random' and vs == 'agent':
- if np.random.rand() > 0.5:
- agent_side = 0
- move = agents[agent_side].choose_move(self)
- pygame.time.wait(1000)
- self.make_move(move)
- window = self.draw_board(window=window, cell_size=cell_size, window_height=window_height, window_width=window_width)
- pygame.display.flip()
- else:
- agent_side = 1
- if player_side == 'O' and vs == 'agent':
- agent_side = 0
- move = agents[agent_side].choose_move(self)
- pygame.time.wait(1000)
- self.make_move(move)
- window = self.draw_board(window=window, cell_size=cell_size, window_height=window_height, window_width=window_width)
- pygame.display.flip()
- if player_side == 'X' and vs == 'agent':
- agent_side = 1
- pygame.display.set_caption(f"Хід: {side[self.current_player-1]}")
- while True:
- for event in pygame.event.get():
- if event.type == pygame.QUIT:
- pygame.quit()
- sys.exit()
- return
- if vs == 'player':
- if event.type == pygame.MOUSEBUTTONDOWN:
- mouse_pos = pygame.mouse.get_pos()
- row = mouse_pos[1] // cell_size
- col = mouse_pos[0] // cell_size
- move = (row, col)
- if move in self.available_moves():
- self.make_move(move)
- pygame.display.set_caption(f"Хід: {side[self.current_player-1]}")
- window = self.draw_board(window=window, cell_size=cell_size, window_height=window_height, window_width=window_width)
- pygame.display.flip()
- winner = self.check_winner()
- winner = we_get_winner(winner)
- if winner != '-':
- pygame.display.set_caption(f"{winner}")
- pygame.time.wait(2000)
- pygame.quit()
- return
- if vs == 'agent':
- if event.type == pygame.MOUSEBUTTONDOWN:
- mouse_pos = pygame.mouse.get_pos()
- row = mouse_pos[1] // cell_size
- col = mouse_pos[0] // cell_size
- move = (row, col)
- if move in self.available_moves():
- self.make_move(move)
- pygame.display.set_caption(f"Хід: {side[self.current_player-1]}")
- window = self.draw_board(window=window, cell_size=cell_size, window_height=window_height, window_width=window_width)
- pygame.display.flip()
- winner = self.check_winner()
- winner = we_get_winner(winner)
- if winner != '-':
- pygame.display.set_caption(f"{winner}")
- pygame.time.wait(2000)
- pygame.quit()
- return
- # Хід комп'ютера
- move = agents[agent_side].choose_move(self)
- pygame.time.wait(1000)
- self.make_move(move)
- pygame.display.set_caption(f"Хід: {side[self.current_player-1]}")
- window = self.draw_board(window=window, cell_size=cell_size, window_height=window_height, window_width=window_width)
- pygame.display.flip()
- winner = self.check_winner()
- winner = we_get_winner(winner)
- if winner != '-':
- pygame.display.set_caption(f"{winner}")
- pygame.time.wait(2000)
- pygame.quit()
- return
- def print_board(self):
- symbols = {2: 'O', 0: ' ', 1: 'X'}
- print(f'==========\n')
- for row in self.board[:-1]:
- print(" | ".join([symbols[symbol] for symbol in row]))
- print("-" * 9)
- print(" | ".join([symbols[symbol] for symbol in self.board[-1]]))
- print(f'\n')
- def available_moves(self):
- return [(i, j) for i in range(self.board_size) for j in range(self.board_size) if self.board[i][j] == 0]
- def make_move(self, move):
- self.board[move[0]][move[1]] = self.current_player
- if self.current_player == 1:
- self.current_player = 2
- else:
- self.current_player = 1
- def check_winner(self):
- # Перевірка рядків і стовпців
- for row in self.board:
- if np.all(row == 1):
- return 1
- elif np.all(row == 2):
- return 2
- for col in range(self.board_size):
- if np.all(self.board[:, col] == 1):
- return 1
- elif np.all(self.board[:, col] == 2):
- return 2
- if np.all(np.diag(self.board) == 1):
- return 1
- elif np.all(np.diag(self.board) == 2):
- return 2
- # Перевірка діагоналей
- if np.all(np.diag(np.fliplr(self.board)) == 1):
- return 1
- elif np.all(np.diag(np.fliplr(self.board)) == 2):
- return 2
- # Нічия
- if len(self.available_moves()) == 0:
- return 0
- # Немає переможця
- return -1
- def train(self, agent1, agent2, epochs=1000, use_low_eps=False):
- agent1_len_Q, agent2_len_Q = [], []
- arr = [round(0.1*i*epochs) for i in range(1, 11)]
- x_win, o_win, draw = 0, 0, 0
- total_x_win, total_o_win, total_draw = 0, 0, 0
- total_x_wins, total_o_wins, total_draws = [], [], []
- for epoch in range(1, epochs+1):
- if epoch in arr:
- if use_low_eps == True:
- agent1.epsilon -= agent1.epsilon*0.175
- agent2.epsilon -= agent2.epsilon*0.175
- print(f'\nAfter {epoch} learning games:\nX: {round(x_win/(arr[0])*100, 2)}% ; O: {round(o_win/arr[0]*100, 2)}% ; Draw: {round(draw/arr[0]*100, 2)}%')
- x_win, o_win, draw = 0, 0, 0
- states_agent1, states_agent2 = [], []
- agent1_actions, agent2_actions = [], []
- self.__init__(board_size=self.board_size) # Скидання ігрового поля
- state = tuple(map(tuple, self.board))
- agent1_action = agent1.choose_move(self)
- states_agent1.append(state)
- agent1_actions.append(agent1_action)
- while True:
- self.make_move(agent1_action)
- state = tuple(map(tuple, self.board))
- rewards = np.zeros(2)
- winner = self.check_winner()
- if winner == 1:
- rewards[0] = 1
- rewards[1] = -1
- x_win += 1
- total_x_win += 1
- elif winner == 2:
- rewards[0] = -1
- rewards[1] = 1
- o_win += 1
- total_o_win += 1
- elif winner == 0:
- rewards[0] = 0.5
- rewards[1] = 0.5
- draw += 1
- total_draw += 1
- if winner != -1:
- total_x_wins.append(total_x_win)
- total_o_wins.append(total_o_win)
- total_draws.append(total_draw)
- agent1.update_Q_values(states_agent1, agent1_actions, rewards[0])
- agent2.update_Q_values(states_agent2, agent2_actions, rewards[1])
- break
- states_agent2.append(state)
- # Гравець 2 робить хід
- agent2_action = agent2.choose_move(self)
- agent2_actions.append(agent2_action)
- self.make_move(agent2_action)
- state = tuple(map(tuple, self.board))
- rewards = np.zeros(2)
- winner = self.check_winner()
- if winner == 1:
- rewards[0] = 1
- rewards[1] = -1
- x_win += 1
- total_x_win += 1
- elif winner == 2:
- rewards[0] = -1
- rewards[1] = 1
- o_win += 1
- total_o_win += 1
- elif winner == 0:
- rewards[0] = 0.5
- rewards[1] = 0.5
- draw += 1
- total_draw += 1
- if winner != -1:
- total_x_wins.append(total_x_win)
- total_o_wins.append(total_o_win)
- total_draws.append(total_draw)
- agent1.update_Q_values(states_agent1, agent1_actions, rewards[0])
- agent2.update_Q_values(states_agent2, agent2_actions, rewards[1])
- break
- states_agent1.append(state)
- agent1_action = agent1.choose_move(self)
- agent1_actions.append(agent1_action)
- agent1_len_Q.append(len(agent1.Q_values))
- agent2_len_Q.append(len(agent2.Q_values))
- return np.arange(1, epochs+1), agent1_len_Q, agent2_len_Q, [total_x_wins, total_o_wins, total_draws]
- def play(self, agent1, agent2):
- self.__init__()
- agent1.epsilon = 0
- agent2.epsilon = 0
- agent = [agent1, agent2]
- self.print_board()
- while True:
- winner = self.check_winner()
- if winner == 1:
- print("Переміг хрестик!")
- break
- elif winner == 2:
- print("Переміг нулик!")
- break
- elif winner == 0:
- print("Нічия!")
- break
- move = agent[self.current_player-1].choose_move(env)
- self.make_move(move)
- self.print_board()
- class SARSA_Agent:
- def __init__(self, epsilon=0.3, alpha=10**-4, gamma=0.95):
- self.epsilon = epsilon
- self.alpha = alpha
- self.gamma = gamma
- self.Q_values = {}
- def choose_move(self, env):
- if np.random.rand() < self.epsilon:
- # Дослідження: випадковий вибір ходу
- move = env.available_moves()
- move = move[np.random.randint(0, len(move))]
- return move
- else:
- # Експлуатація: вибір ходу на основі оцінок Q-значень
- state = tuple(map(tuple, env.board))
- if state not in self.Q_values:
- # Ініціалізація оцінок Q-значень для нового стану
- self.Q_values[state] = np.zeros_like(env.board, dtype=float)
- # Вибір ходу з максимальним Q-значенням
- Q_values_state = self.Q_values[state]
- valid_moves = env.available_moves()
- move_values = [Q_values_state[move] for move in valid_moves]
- best_moves = [move for move, value in zip(valid_moves, move_values) if value == max(move_values)]
- move = random.choice(best_moves)
- return move
- def update_Q_values(self, states, actions, reward):
- for state in states:
- if state not in self.Q_values:
- self.Q_values[state] = np.zeros_like(state, dtype=float)
- # Обчислення нового Q-значення
- td_target = reward + self.gamma * self.Q_values[states[-1]][actions[-1]]
- td_error = td_target - self.Q_values[states[-2]][actions[-2]]
- for state, action in zip(states, actions):
- val = self.Q_values[state][action]
- self.Q_values[state][action] += self.alpha * td_error
- class Player:
- def __init__(self, epsilon=0):
- self.epsilon = epsilon
- def choose_move(self, env):
- valid_moves = env.available_moves()
- move = input("Рядок, колонка: ")
- move = [tuple(int(x)-1 for x in move.split())][0]
- while move not in valid_moves:
- move = input("Вибраний вами хід неможливий\nРядок, колонка: ")
- move = [tuple(int(x)-1 for x in move.split())][0]
- return move
- env = TicTacToe()
- sarsa_agent_1 = SARSA_Agent(epsilon=0.5)
- sarsa_agent_2 = SARSA_Agent(epsilon=0.5)
- ep = 100000
- arange, ag1_Q, ag2_Q, wins = env.train(sarsa_agent_1, sarsa_agent_2, epochs=ep, use_low_eps=True)
- player = Player()
- # env.exe_play(agents=[sarsa_agent_1, sarsa_agent_2], vs='player')
- # env.exe_play(agents=[sarsa_agent_1, sarsa_agent_2], vs='agent', player_side='X')
- # env.exe_play(agents=[sarsa_agent_1, sarsa_agent_2], vs='agent', player_side='O')
- env.exe_play(agents=[sarsa_agent_1, sarsa_agent_2], vs='agent', player_side='random')
- plt.figure(figsize=(15, 6))
- plt.title(f'Залежність досвіду від кількості епізодів навчання')
- plt.plot(arange, ag1_Q, label='Agent X')
- plt.plot(arange, ag2_Q, label='Agent O')
- plt.xlabel(f'Номер епізоду')
- plt.ylabel(f'Кількість станів')
- plt.grid()
- plt.legend()
- plt.show()
- plt.figure(figsize=(15, 6))
- plt.title(f'Залежність перемог від кількості епізодів навчання')
- winner = ['X', 'O', 'Draw']
- for i in range(len(wins)):
- plt.plot(arange, np.array(wins[i])/ep*100, label=f'{winner[i]}')
- plt.xlabel(f'Номер епізоду')
- plt.ylabel(f'% Кількість перемог')
- plt.grid()
- plt.legend()
- plt.show()
Add Comment
Please, Sign In to add comment