Advertisement
mirosh111000

Q-learning(Maze)

May 17th, 2024
95
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 19.09 KB | None | 0 0
  1. import numpy as np
  2. import seaborn as sns
  3. import matplotlib.pyplot as plt
  4.  
  5.  
  6. def create_maze():
  7.     maze = np.zeros((39, 39), dtype=float)
  8.     maze_idx = [i for i, j in np.ndenumerate(np.zeros((39, 39), dtype=int))]
  9.     wall = -4
  10.     maze[0, :] = wall
  11.     maze[-1, :] = wall
  12.     maze[:, 0] = wall
  13.     maze[:, -1] = wall
  14.  
  15.     idx = [
  16.         (4, 1), (8, 1), (12, 1), (18, 1), (20, 1), (28, 1), (34, 1),
  17.  
  18.         (2, 2), (4, 2), (6, 2), (8, 2), (10, 2), (12, 2), (14, 2), (16, 2), (17, 2), (18, 2), (20, 2), (22, 2),
  19.         (23, 2), (24, 2), (25, 2), (26, 2), (28, 2), (30, 2), (31, 2), (32, 2), (34, 2), (36, 2),
  20.  
  21.         (2, 3), (6, 3), (8, 3), (10, 3), (14, 3), (20, 3), (26, 3), (28, 3), (32, 3), (36, 3),
  22.  
  23.         (2, 4), (3, 4), (4, 4), (5, 4), (6, 4), (7, 4), (8, 4), (10, 4), (11, 4), (12, 4), (13, 4), (14, 4), (16, 4),
  24.         (18, 4), (19, 4), (20, 4), (22, 4), (23, 4), (24, 4), (26, 4), (27, 4,), (28, 4), (29, 4), (30, 4),
  25.         (32, 4), (33, 4), (34, 4), (35, 4), (36, 4),
  26.  
  27.         (4, 5), (8, 5), (10, 5), (12, 5), (16, 5), (22, 5), (24, 5), (26, 5), (32, 5), (36, 5),
  28.  
  29.         (1, 6), (2, 6), (4, 6), (6, 6), (8, 6), (10, 6), (12, 6), (14, 6), (15, 6), (16, 6), (17, 6), (18, 6), (19, 6),
  30.         (20, 6), (21, 6), (22, 6), (24, 6), (26, 6), (28, 6), (29, 6), (30, 6), (32, 6), (33, 6), (34, 6), (36, 6),
  31.  
  32.         (4, 7), (6, 7), (10, 7), (12, 7), (18, 7), (24, 7), (28, 7), (30, 7), (34, 7), (36, 7),
  33.  
  34.         (2, 8), (3, 8), (4, 8), (6, 8), (8, 8), (9, 8), (10, 8), (12, 8), (14, 8), (15, 8), (16, 8), (18, 8), (20, 8),
  35.         (21, 8), (22, 8), (23, 8), (24, 8), (25, 8), (26, 8), (27, 8), (28, 8), (30, 8), (32, 8), (34, 8), (36, 8),
  36.  
  37.         (2, 9), (6, 9), (8, 9), (12, 9), (16, 9), (24, 9), (28, 9), (32, 9), (36, 9),
  38.  
  39.         (2, 10), (4, 10), (5, 10), (6, 10), (8, 10), (10, 10), (11, 10), (12, 10), (13, 10), (14, 10), (16, 10), (17, 10),
  40.         (18, 10), (20, 10), (21, 10), (22, 10), (24, 10), (26, 10), (28, 10), (30, 10), (31, 10), (32, 10), (33, 10),
  41.         (34, 10), (36, 10),
  42.  
  43.         (2, 11), (4, 11), (8, 11), (12, 11), (16, 11), (22, 11), (24, 11), (26, 11), (28, 11), (30, 11), (34, 11), (36, 11),
  44.  
  45.         (2, 12), (3, 12), (4, 12), (6, 12), (7, 12), (8, 12), (9, 12), (10, 12), (12, 12), (14, 12), (15, 12), (16, 12),
  46.         (17, 12), (18, 12), (20, 12), (22, 12), (23, 12), (24, 12), (26, 12), (28, 12), (30, 12), (32, 12), (34, 12),
  47.         (36, 12), (37, 12),
  48.  
  49.         (6, 13), (12, 13), (18, 13), (20, 13), (24, 13), (26, 13), (30, 13), (32, 13),
  50.  
  51.         (2, 14), (4, 14), (6, 14), (8, 14), (9, 14), (10, 14), (12, 14), (13, 14), (14, 14), (15, 14), (16, 14), (18, 14),
  52.         (20, 14), (21, 14), (22, 14), (24, 14), (25, 14), (26, 14), (27, 14), (28, 14), (29, 14), (30, 14), (32, 14),
  53.         (33, 14), (34, 14), (35, 14), (36, 14),
  54.  
  55.         (2, 15), (4, 15), (6, 15), (8, 15), (10, 15), (16, 15), (18, 15), (20, 15), (22, 15), (24, 15), (30, 15), (32, 15),
  56.         (36, 15),
  57.  
  58.         (1, 16), (2, 16), (4, 16), (5, 16), (6, 16), (8, 16), (10, 16), (11, 16), (12, 16), (13, 16), (14, 16), (16, 16),
  59.         (17, 16), (18, 16), (20, 16), (22, 16), (24, 16), (26, 16), (27, 16), (28, 16), (30, 16), (31, 16), (32, 16),
  60.         (34, 16), (35, 16), (36, 16),
  61.  
  62.         (4, 17), (14, 17), (18, 17), (22, 17), (26, 17), (32, 17),
  63.  
  64.         (2, 18), (3, 18), (4, 18), (6, 18), (7, 18), (8, 18), (9, 18), (10, 18), (11, 18), (12, 18), (14, 18), (16, 18),
  65.         (18, 18), (19, 18), (20, 18), (21, 18), (22, 18), (23, 18), (24, 18), (25, 18), (26, 18), (27, 18), (28, 18),
  66.         (29, 18), (30, 18), (32, 18), (33, 18), (34, 18), (36, 18),
  67.  
  68.         (2, 19), (6, 19), (8, 19), (10, 19), (14, 19), (16, 19), (22, 19), (30, 19), (34, 19), (36, 19),
  69.  
  70.         (2, 20), (4, 20), (5, 20), (6, 20), (8, 20), (10, 20), (11, 20), (12, 20), (13, 20), (14, 20), (16, 20), (17, 20),
  71.         (18, 20), (19, 20), (20, 20), (22, 20), (24, 20), (25, 20), (26, 20), (27, 20), (28, 20), (30, 20), (32, 20),
  72.         (34, 20), (36, 20), (37, 20),
  73.  
  74.         (2, 21), (4, 21), (8, 21), (12, 21), (16, 21), (20, 21), (22, 21), (28, 21), (32, 21), (34, 21),
  75.  
  76.         (2, 22), (4, 22), (5, 22), (6, 22), (8, 22), (9, 22), (10, 22), (12, 22), (13, 22), (14, 22), (16, 22), (18, 22),
  77.         (20, 22), (22, 22), (23, 22), (24, 22), (25, 22), (26, 22), (28, 22), (29, 22), (30, 22), (31, 22), (32, 22),
  78.         (34, 22), (35, 22), (36, 22),
  79.  
  80.         (2, 23), (6, 23), (10, 23), (14, 23), (18, 23), (20, 23), (24, 23), (30, 23), (34, 23),
  81.  
  82.         (2, 24), (3, 24), (4, 24), (6, 24), (7, 24), (8, 24), (10, 24), (11, 24), (12, 24), (14, 24), (15, 24), (16, 24),
  83.         (17, 24), (18, 24), (20, 24), (21, 24), (22, 24), (24, 24), (25, 24), (26, 24), (27, 24), (28, 24), (29, 24),
  84.         (30, 24), (32, 24), (33, 24), (34, 24), (36, 24), (37, 24),
  85.  
  86.         (4, 25), (12, 25), (18, 25), (20, 25), (22, 25), (28, 25), (32, 25), (34, 25),
  87.  
  88.         (1, 26), (2, 26), (4, 26), (5, 26), (6, 26), (8, 26), (9, 26), (10, 26), (11, 26), (12, 26), (13, 26), (14, 26),
  89.         (15, 26), (16, 26), (18, 26), (20, 26), (22, 26), (23, 26), (24, 26), (26, 26), (27, 26), (28, 26), (30, 26),
  90.         (31, 26), (32, 26), (34, 26), (35, 26), (36, 26),
  91.  
  92.         (4, 27), (6, 27), (8, 27), (12, 27), (14, 27), (18, 27), (24, 27), (28, 27), (30, 27), (34, 27),
  93.  
  94.         (2, 28), (3, 28), (4, 28), (6, 28), (8, 28), (10, 28), (12, 28), (14, 28), (16, 28), (17, 28), (18, 28), (19, 28),
  95.         (20, 28), (21, 28), (22, 28), (23, 28), (24, 28), (25, 28), (26, 28), (28, 28), (30, 28), (32, 28), (33, 28),
  96.         (34, 28), (36, 28), (37, 28),
  97.  
  98.         (4, 29), (6, 29), (10, 29), (14, 29), (18, 29), (20, 29), (30, 29), (36, 29),
  99.  
  100.         (2, 30), (4, 30), (6, 30), (8, 30), (10, 30), (11, 30), (12, 30), (13, 30), (14, 30), (15, 30), (16, 30), (18, 30),
  101.         (20, 30), (22, 30), (23, 30), (24, 30), (25, 30), (26, 30), (27, 30), (28, 30), (29, 30), (30, 30), (31, 30),
  102.         (32, 30), (34, 30), (35, 30), (36, 30),
  103.  
  104.         (2, 31), (4, 31), (6, 31), (8, 31), (12, 31), (16, 31), (18, 31), (22, 31), (26, 31), (30, 31), (34, 31),
  105.  
  106.         (1, 32), (2, 32), (4, 32), (6, 32), (7, 32), (8, 32), (9, 32), (10, 32), (12, 32), (14, 32), (15, 32), (16, 32),
  107.         (18, 32), (19, 32), (20, 32), (21, 32), (22, 32), (24, 32), (26, 32), (28, 32), (29, 32), (30, 32), (32, 32),
  108.         (33, 32), (34, 32), (36, 32), (37, 32),
  109.  
  110.         (4, 33), (10, 33), (12, 33), (18, 33), (24, 33), (28, 33), (32, 33),
  111.  
  112.         (2, 34), (3, 34), (4, 34), (5, 34), (6, 34), (7, 34), (8, 34), (10, 34), (12, 34), (14, 34), (15, 34), (16, 34),
  113.         (18, 34), (20, 34), (21, 34), (22, 34), (23, 34), (24, 34), (25, 34), (26, 34), (28, 34), (30, 34), (31, 34),
  114.         (32, 34), (33, 34), (34, 34), (35, 34), (36, 34),
  115.  
  116.         (4, 35), (8, 35), (10, 35), (16, 35), (22, 35), (26, 35), (28, 35), (34, 35),
  117.  
  118.         (2, 36), (4, 36), (6, 36), (8, 36), (10, 36), (11, 36), (12, 36), (14, 36), (16, 36), (17, 36), (18, 36),
  119.         (20, 36), (22, 36), (24, 36), (26, 36), (27, 36), (28, 36), (29, 36), (30, 36), (32, 36), (34, 36), (36, 36),
  120.  
  121.         (2, 37), (6, 37), (8, 37), (14, 37), (18, 37), (20, 37), (24, 37), (30, 37), (32, 37), (36, 37),
  122.     ]
  123.     for i in idx: maze[i] = wall
  124.  
  125.     # Додавання виходу
  126.     exit = [(19, 38), (31, 38)]
  127.     maze[exit[0]] = -2
  128.     maze[exit[1]] = -2
  129.  
  130.     # Додавання стартової позиції
  131.     start_pos = (19, 0)
  132.     maze[start_pos] = -1
  133.  
  134.  
  135.     idx2 = [
  136.         (19, 1), (19, 2), (19, 3), (18, 3), (17, 3), (17, 4), (17, 5), (18, 5), (19, 5), (20, 5), (21, 5), (21, 4),
  137.         (21, 3), (22, 3), (23, 3), (24, 3), (25, 3), (25, 4), (25, 5), (25, 6), (25, 7), (26, 7), (27, 7), (27, 6),
  138.         (27, 5), (28, 5), (29, 5), (30, 5), (31, 5), (31, 6), (31, 7), (32, 7), (33, 7), (33, 8), (33, 9), (34, 9),
  139.         (35, 9), (35, 10), (35, 11), (35, 12), (35, 13), (36, 13), (37, 13), (37, 14), (37, 15), (37, 16), (37, 17),
  140.         (36, 17), (35, 17), (35, 18), (35, 19), (35, 20), (35, 21), (36, 21), (37, 21), (37, 22), (37, 23), (36, 23),
  141.         (35, 23), (35, 24), (35, 25), (36, 25), (37, 25), (37, 26), (37, 27), (36, 27), (35, 27), (35, 28), (35, 29),
  142.         (34, 29), (33, 29), (33, 30), (33, 31), (32, 31), (31, 31), (31, 32), (31, 33), (30, 33), (29, 33), (29, 34),
  143.         (29, 35), (30, 35), (31, 35), (31, 36), (31, 37)
  144.         ]
  145.  
  146.     idx1 = [
  147.         (19, 1), (19, 2), (19, 3), (18, 3), (17, 3), (16, 3), (15, 3), (15, 4), (15, 5), (14, 5), (13, 5), (13, 6),
  148.         (13, 7), (14, 7), (15, 7), (16, 7), (17, 7), (17, 8), (17, 9), (18, 9), (19, 9), (19, 10), (19, 11), (20, 11),
  149.         (21, 11), (21, 12), (21, 13), (22, 13), (23, 13), (23, 14), (23, 15), (23, 16), (23, 17), (24, 17), (25, 17),
  150.         (25, 16), (25, 15), (26, 15), (27, 15), (28, 15), (29, 15), (29, 16), (29, 17), (30, 17), (31, 17), (31, 18),
  151.         (31, 19), (32, 19), (33, 19), (33, 20), (33, 21), (33, 22), (33, 23), (32, 23), (31, 23), (31, 24), (31, 25),
  152.         (30, 25), (29, 25), (29, 26), (29, 27), (29, 28), (29, 29), (28, 29), (27, 29), (27, 28), (27, 27), (26, 27),
  153.         (25, 27), (25, 26), (25, 25), (24, 25), (23, 25), (23, 24), (23, 23), (22, 23), (21, 23), (21, 22), (21, 21),
  154.         (21, 20), (21, 19), (20, 19), (19, 19), (18, 19), (17, 19), (17, 18), (17, 17), (16, 17), (15, 17), (15, 16),
  155.         (15, 15), (14, 15), (13, 15), (12, 15), (11, 15), (11, 14), (11, 13), (10, 13), (9, 13), (8, 13), (7, 13),
  156.         (7, 14), (7, 15), (7, 16), (7, 17), (6, 17), (5, 17), (5, 18), (5, 19), (4, 19), (3, 19), (3, 20), (3, 21),
  157.         (3, 22), (3, 23), (4, 23), (5, 23), (5, 24), (5, 25), (6, 25), (7, 25), (7, 26), (7, 27), (7, 28), (7, 29),
  158.         (8, 29), (9, 29), (9, 30), (9, 31), (10, 31), (11, 31), (11, 32), (11, 33), (11, 34), (11, 35), (12, 35),
  159.         (13, 35), (13, 34), (13, 33), (14, 33), (15, 33), (16, 33), (17, 33), (17, 34), (17, 35), (18, 35), (19, 35),
  160.         (19, 36), (19, 37)
  161.         ]
  162.  
  163.     maze2 = np.copy(maze)
  164.     for i in idx1: maze2[i] = -0.5
  165.     for i in idx2: maze2[i] = -0.5
  166.        
  167.    
  168.  
  169.  
  170.     return maze, maze_idx, maze2, start_pos, exit
  171.  
  172.  
  173. class Maze:
  174.     def __init__(self, maze, maze_idx, maze_with_path, current_position, exit):
  175.         self.maze = maze  # Ігрове поле: 0 - порожньо, -1 - старт, -2 - вихід, -4 - стіна
  176.         self.maze_idx = maze_idx
  177.         self.maze_with_path = maze_with_path
  178.         self.current_position = current_position
  179.         self.start = current_position
  180.         self.exit = exit
  181.  
  182.    
  183.    
  184.     def display_maze(self, maze, path=[], arrow_path=[], title='Лабіринт'):
  185.         plt.figure(figsize=(10, 10))
  186.         plt.title(title)
  187.         if len(path) > 0:
  188.             for move, arrow in zip(path, arrow_path):
  189.                 plt.text(move[1]+0.15, move[0]+0.75, arrow, color='green', fontsize=10)
  190.         sns.heatmap(maze, cmap='hot', cbar=False, square=True, linewidth=0.5, linecolor='black', annot=False, fmt='')
  191.         plt.show()
  192.        
  193.    
  194.     def available_moves(self, pos):
  195.        
  196.         moves = [(pos[0]-1, pos[1]), (pos[0]+1, pos[1]), (pos[0], pos[1]-1), (pos[0], pos[1]+1)]
  197.        
  198.         correct_moves = []
  199.         for move in moves:
  200.             if move in self.maze_idx:
  201.                 correct_moves.append(move)
  202.        
  203.         return correct_moves
  204.    
  205.     def make_move(self, move):
  206.        
  207.         direction = None
  208.         if move[0] - self.current_position[0] == -1:
  209.             direction = "↑"
  210.         elif move[1] - self.current_position[1] == 1:
  211.             direction = "→"
  212.         elif move[0] - self.current_position[0] == 1:
  213.             direction = "↓"
  214.         elif move[1] - self.current_position[1] == -1:
  215.             direction = "←"
  216.        
  217.         self.current_position = move
  218.  
  219.         return direction
  220.    
  221.     def check_win(self):
  222.         if self.current_position not in (self.exit):
  223.             return 0
  224.         else:
  225.             return 1
  226.        
  227.     def get_state(self):
  228.         return self.current_position
  229.    
  230.     def get_reward(self, move):
  231.        
  232.         if self.maze[move] == 0:
  233.             return -1
  234.         elif self.maze[move] == -4:
  235.             return -1000
  236.         elif self.maze[move] == -2:
  237.             return 0
  238.         elif self.maze[move] == -1:
  239.             return -1
  240.        
  241.    
  242.     def train(self, agent, epochs=10, use_low_eps=False):
  243.         agent.rows, agent.cols = len(self.maze[0]), len(self.maze)
  244.         arr = [round(0.1*i*epochs) for i in range(1, 11)]
  245.         len_moves = []
  246.         for epoch in range(1, epochs+1):
  247.             if epoch in arr:
  248.                 if use_low_eps == True:
  249.                     agent.epsilon -= agent.epsilon*0.175
  250.  
  251.             path, arrow_path = [], []
  252.             self.current_position = self.start  # Скидання ігрового поля
  253.             while True:
  254.  
  255.                 state = self.current_position
  256.                 path.append(state)
  257.                
  258.                 action = agent.choose_move(self)
  259.                 direction = self.make_move(action)
  260.                 arrow_path.append(direction)
  261.                 next_state = self.current_position
  262.                 reward = self.get_reward(action)
  263.  
  264.                 agent.update_Q_values(self, state, action, reward, next_state)
  265.  
  266.                 if self.check_win() == 1:
  267.                     len_moves.append(len(path))
  268.                     if epoch % round(epochs*0.1) == 0:
  269.                         self.display_maze(self.maze, path=path, arrow_path=arrow_path, title=fr'epoch {epoch} - {next_state}')
  270.                     break
  271.                    
  272.         return np.arange(1, epochs+1), len_moves
  273.    
  274.     def play(self, agent, len_sh_path=86):
  275.         agent.rows, agent.cols, agent.epsilon = len(self.maze[0]), len(self.maze), 0.0
  276.         path, arrow_path = [], []
  277.         self.current_position = self.start  # Скидання ігрового поля
  278.         while True:
  279.  
  280.             state = self.current_position
  281.             path.append(state)
  282.  
  283.             action = agent.choose_move(self)
  284.             direction = self.make_move(action)
  285.             arrow_path.append(direction)
  286.             next_state = self.current_position
  287.             reward = self.get_reward(action)
  288.             if len(path) > 999:
  289.                 self.display_maze(self.maze, path=path, arrow_path=arrow_path, title=fr'Вихід не знайдено(Кількість ходів - {len(path)})')
  290.                 break
  291.  
  292.             if self.check_win() == 1:
  293.                 self.display_maze(self.maze, path=path, arrow_path=arrow_path, title=fr'Кількість ходів - {len(path)}(min={len_sh_path})')
  294.                 break
  295.        
  296.        
  297.                
  298.  
  299. class QLearningAgent:
  300.     def __init__(self, learning_rate=10**-4, discount_factor=0.9, epsilon=0.5):
  301.         self.learning_rate = learning_rate
  302.         self.discount_factor = discount_factor
  303.         self.epsilon = epsilon
  304.         self.Q_values = {}
  305.         self.rows = None
  306.         self.cols = None
  307.  
  308.     def choose_move(self, game):
  309.         # Вибір дії для наступного кроку
  310.         state = game.get_state()
  311.         move = game.available_moves(state)
  312.         if np.random.uniform(0, 1) < self.epsilon:
  313.             return move[np.random.randint(0, len(move))]
  314.         else:
  315.             # Вибір дії згідно з Q-таблицею
  316.             if state not in self.Q_values:
  317.                 # Якщо стан не існує у Q-таблиці, повертаємо випадкову дію
  318.                 return move[np.random.randint(0, len(move))]
  319.             else:
  320.                 # Вибираємо найкращу дію, враховуючи Q-значення
  321.                 q_values = self.Q_values[state]
  322.                 max_q_value = np.max(q_values)
  323.                 zero_indices = np.where(q_values == max_q_value)
  324.                 best_moves = [(i, j) for i, j in zip(zero_indices[0], zero_indices[1])]
  325.                 return best_moves[np.random.randint(0, len(best_moves))]
  326.  
  327.     def update_Q_values(self, game, state, action, reward, next_state):
  328.         # Оновлення значень Q-таблиці за допомогою формули Q-learning
  329.         if state not in self.Q_values:
  330.             maze_with_inf = np.full((self.rows, self.cols), -np.inf)
  331.             for i in game.available_moves(state):
  332.                 maze_with_inf[i] = 0.0
  333.             self.Q_values[state] = maze_with_inf
  334.            
  335.         if next_state not in self.Q_values:
  336.             maze_with_inf = np.full((self.rows, self.cols), -np.inf)
  337.             for i in game.available_moves(next_state):
  338.                 maze_with_inf[i] = 0.0
  339.             self.Q_values[next_state] = maze_with_inf
  340.  
  341.         # Обчислення нового Q-значення за формулою Q-learning
  342.         old_q_value = self.Q_values[state][action]
  343.         max_next_q_value = np.max(self.Q_values[next_state]) if next_state in self.Q_values else 0
  344.         new_q_value = old_q_value + self.learning_rate * (reward + self.discount_factor * max_next_q_value - old_q_value)
  345.  
  346.         # Оновлення Q-значення в таблиці
  347.         self.Q_values[state][action] = new_q_value
  348.  
  349. def create_small_maze():
  350.     maze = np.zeros((9, 9), dtype=float)
  351.     maze_idx = [i for i, j in np.ndenumerate(np.zeros((9, 9), dtype=int))]
  352.     wall = -4
  353.     maze[0, :] = wall
  354.     maze[-1, :] = wall
  355.     maze[:, 0] = wall
  356.     maze[:, -1] = wall
  357.    
  358.     idx = [
  359.         (2, 2), (3, 2), (4, 4), (5, 6), (7, 6), (7, 2), (3, 5), (2, 6), (3, 7), (3, 6), (3, 4), (5, 2), (6, 2),
  360.         (6, 4), (7, 4), (2, 4)
  361.     ]
  362.    
  363.     for i in idx: maze[i] = wall
  364.  
  365.     # Додавання виходу
  366.     exit = [(5, 8)]
  367.     maze[exit[0]] = -2
  368.    
  369.     # Додавання стартової позиції
  370.     start_pos = (1, 0)
  371.     maze[start_pos] = -1
  372.    
  373.     maze2 = np.copy(maze)
  374.     return maze, maze_idx, maze2, start_pos, exit
  375.            
  376.            
  377. def display_results(arange, len_moves):
  378.     plt.figure(figsize=(15, 6))
  379.     plt.title(f'Залежність кількості ходів від кількості епізодів навчання')
  380.     plt.plot(arange, len_moves)
  381.     plt.xlabel(f'Номер епізоду')
  382.     plt.ylabel(f'Кількість ходів')
  383.     plt.grid()
  384.     plt.show()
  385.  
  386.    
  387. maze_small, maze_idx_small, maze_with_path_small, current_position_small, exit_small = create_small_maze()
  388.  
  389. game_small = Maze(maze_small, maze_idx_small, maze_with_path_small, current_position_small, exit_small)
  390. game_small.display_maze(game_small.maze)
  391.  
  392. agent_small = QLearningAgent(epsilon=0.2, learning_rate=10**-2)
  393. arange_small, len_moves_small = game_small.train(agent=agent_small, epochs=10000, use_low_eps=False)
  394. game_small.play(agent_small, len_sh_path=14)
  395. display_results(arange_small, len_moves_small)
  396.  
  397.  
  398. maze, maze_idx, maze_with_path, current_position, exit = create_maze()
  399.  
  400.  
  401. game = Maze(maze, maze_idx, maze_with_path, current_position, exit)
  402. game.display_maze(game.maze)
  403. game.display_maze(game.maze_with_path)
  404. agent = QLearningAgent(epsilon=0.2, learning_rate=10**-2)
  405. arange, len_moves = game.train(agent, epochs=25000, use_low_eps=True)
  406. game.play(agent)
  407. display_results(arange, len_moves)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement