Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from random import *
- import numpy as np
- num_games = 100
- source_matrix = [[2, -3],
- [-1, 2]]
- result_of_players_choice = {(0, 0): 2, (0, 1): -3, (1, 0): -1, (1, 1): 2} # результаты выбора
- sensor = lambda p: 0 if random() < p else 1
- def stats_list(stats_matrix):
- games = [(sensor(prob_a), sensor(prob_b)) for _ in range(num_games)] # генерируем вектор пар (0, 1)
- return [stats_matrix[game] for game in games]
- ave_ = lambda stats: np.mean(stats) # находим средний выигрыш
- def math_exp(matrix, probe_A, probe_B): # находим матожидание
- return matrix[0][0] * probe_A * probe_B + matrix[0][1] * probe_A * (1 - probe_B) + \
- matrix[1][0] * (1 - probe_A) * probe_B + matrix[1][1] * (1 - probe_A) * (1 - probe_B)
- def variance_f(expected_stats, probe_A, prob_b): # находим дисперсию
- sqr = lambda x, y: (x - y) ** 2
- variance = (sqr(result_of_players_choice[(0, 0)], expected_stats)) * probe_A * prob_b + \
- (sqr(result_of_players_choice[(0, 1)], expected_stats)) * probe_A * (1 - prob_b) + \
- (sqr(result_of_players_choice[(1, 0)], expected_stats)) * (1 - probe_A) * prob_b + \
- (sqr(result_of_players_choice[(1, 1)], expected_stats)) * (1 - probe_A) * (1 - prob_b)
- return variance
- def run_games(probe_A, probe_B):
- stats = stats_list(result_of_players_choice)
- average_stats = ave_(stats)
- math_exp_stats = math_exp(source_matrix, probe_A, probe_B)
- std_deviation = np.std(stats)
- variance = variance_f(math_exp_stats, probe_A, probe_B)
- theoretical_std_deviation = np.std(list(result_of_players_choice.values()))
- print(f"Средний выигрыш/проигрыш игрока {average_stats}")
- print(f"Матожидание: {math_exp_stats}")
- print(f"Среднеквадратичное отклонение: {std_deviation}")
- print(f"Дисперсия: {variance}")
- print(f"Cреднеквадратичное отклонение (теоретическое): {theoretical_std_deviation}")
- def reinforcement_learning(num_games=100):
- red, blue = 10, 10
- probe_A = red / (red + blue)
- probe_B = 0.25
- for _ in range(num_games):
- choice_A = sensor(probe_A)
- choice_B = sensor(probe_B)
- gain = source_matrix[choice_A][choice_B]
- if gain >= 0:
- if choice_A == 0:
- red += gain
- else:
- blue += gain
- probe_A = red / (red + blue)
- print(f"Вероятность выбора первой строки игроком А после обучения: {1 - probe_A}.")
- run_games(probe_A, probe_B)
- def learning_with_punishment(num_games=100):
- red, blue = 100, 100
- probe_A = red / (red + blue)
- probe_B = 0.25
- for _ in range(num_games):
- choice_A = sensor(probe_A)
- choice_B = sensor(probe_B)
- gain = source_matrix[choice_A][choice_B]
- if choice_A == 0:
- red += gain
- else:
- blue += gain
- probe_A = red / (red + blue)
- print(f"Вероятность выбора первой строки игроком А после обучения: {1 - probe_A}.")
- run_games(probe_A, probe_B)
- def reinforcement_learning_2(num_games=100):
- red_A, blue_A = 10, 10
- red_B, blue_B = 10, 10
- probe_A = red_A / (red_A + blue_A)
- probe_B = red_B / (red_B + blue_B)
- for i in range(num_games):
- choice_A = sensor(probe_A)
- choice_B = sensor(probe_B)
- gain = source_matrix[choice_A][choice_B]
- if gain >= 0:
- if choice_A == 0:
- red_A += gain
- else:
- blue_A += gain
- else:
- if choice_B == 0:
- red_B += -gain
- else:
- blue_B += -gain
- probe_A = red_A / (red_A + blue_A)
- probe_B = red_B / (red_B + blue_B)
- print(f"Вероятность выбора первой строки игроком А после обучения: {1 - probe_A}.")
- print(f"Вероятность выбора первой строки игроком B после обучения: {1 - probe_B}.")
- run_games(probe_A, probe_B)
- print("\nЗадание 2\n")
- prob_a = 0.5
- prob_b = 0.5
- run_games(prob_a, prob_b)
- print("\nЗадание 3\n")
- prob_a = 0.5
- prob_b = 0.25
- run_games(prob_a, prob_b)
- print("\nОбучение с подкреплением:\n")
- reinforcement_learning()
- print("\n")
- print("Обучение с наказанием:\n")
- learning_with_punishment()
- print("\n")
- print("Обучение с подкреплением 2:\n")
- reinforcement_learning_2()
Add Comment
Please, Sign In to add comment