-
Notifications
You must be signed in to change notification settings - Fork 29
/
evaluation_local.py
103 lines (78 loc) · 3.03 KB
/
evaluation_local.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import numpy as np
import torch
import random
from agent.rl.submission import agent, get_observations
from env.chooseenv import make
from tabulate import tabulate
import argparse
from torch.distributions import Categorical
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
def get_actions(state, algo, indexs):
# random agent
actions = np.random.randint(4, size=3)
# rl agent
if algo == 'rl':
obs = get_observations(state, indexs, obs_dim=26, height=10, width=20)
logits = agent.choose_action(obs)
logits = torch.Tensor(logits)
actions = np.array([Categorical(out).sample().item() for out in logits])
return actions
def get_join_actions(obs, algo_list):
obs_2_evaluation = obs[0]
indexs = [0,1,2,3,4,5]
first_action = get_actions(obs_2_evaluation, algo_list[0], indexs[:3])
second_action = get_actions(obs_2_evaluation, algo_list[1], indexs[3:])
actions = np.zeros(6)
actions[:3] = first_action[:]
actions[3:] = second_action[:]
return actions
def run_game(env, algo_list, episode, verbose=False):
total_reward = np.zeros(6)
num_win = np.zeros(3)
for i in range(1, episode + 1):
episode_reward = np.zeros(6)
state = env.reset()
step = 0
while True:
joint_action = get_join_actions(state, algo_list)
next_state, reward, done, _, info = env.step(env.encode(joint_action))
reward = np.array(reward)
episode_reward += reward
if done:
if np.sum(episode_reward[:3]) > np.sum(episode_reward[3:]):
num_win[0] += 1
elif np.sum(episode_reward[:3]) < np.sum(episode_reward[3:]):
num_win[1] += 1
else:
num_win[2] += 1
if not verbose:
print('.', end='')
if i % 100 == 0 or i == episode:
print()
break
state = next_state
step += 1
total_reward += episode_reward
# calculate results
total_reward /= episode
print("total_reward: ", total_reward)
print(f'\nResult base on {episode} ', end='')
print('episode:') if episode == 1 else print('episodes:')
header = ['Name', algo_list[0], algo_list[1]]
data = [['score', np.round(np.sum(total_reward[:3]), 2), np.round(np.sum(total_reward[3:]), 2)],
['win', num_win[0], num_win[1]]]
print(tabulate(data, headers=header, tablefmt='pretty'))
if __name__ == "__main__":
env_type = 'snakes_3v3'
game = make(env_type, conf=None)
torch.manual_seed(1)
np.random.seed(1)
random.seed(1)
parser = argparse.ArgumentParser()
parser.add_argument("--my_ai", default="rl", help="rl/random")
parser.add_argument("--opponent", default="random", help="rl/random")
parser.add_argument("--episode", default=100)
args = parser.parse_args()
agent_list = [args.my_ai, args.opponent]
run_game(game, algo_list=agent_list, episode=args.episode, verbose=False)