Skip to content

Commit

Permalink
Remove reward variance from training logs
Browse files Browse the repository at this point in the history
  • Loading branch information
ronaldosvieira committed Apr 23, 2020
1 parent 101b322 commit 4cd1b18
Showing 1 changed file with 7 additions and 23 deletions.
30 changes: 7 additions & 23 deletions gym_locm/util/interleaved-self-play.py
Original file line number Diff line number Diff line change
Expand Up @@ -691,7 +691,7 @@ def interleaved_self_play(params):
model1.save(model_path1 + '/0-episodes')
model2.save(model_path2 + '/0-episodes')

results = [[[], []], [[], []]]
results = [[], []]

# calculate utilities
eval_every_ep = train_episodes / num_evals
Expand Down Expand Up @@ -791,12 +791,7 @@ def callback2(_locals, _globals):
print(f"Done: {mean2}")
print()

if optimize_for == PlayerOrder.SECOND:
results[0][0].append(mean2)
results[0][1].append(std2)
else:
results[1][0].append(mean2)
results[1][1].append(std2)
results[1 if optimize_for == PlayerOrder.FIRST else 0].append(mean2)

model2.last_eval = episodes_so_far
model2.next_eval += eval_every_ep
Expand All @@ -817,12 +812,7 @@ def callback(_locals, _globals):
print(f"Done: {mean1}")
print()

if optimize_for == PlayerOrder.FIRST:
results[0][0].append(mean1)
results[0][1].append(std1)
else:
results[1][0].append(mean1)
results[1][1].append(std1)
results[0 if optimize_for == PlayerOrder.FIRST else 1].append(mean1)

model1.last_eval = episodes_so_far
model1.next_eval += eval_every_ep
Expand Down Expand Up @@ -855,12 +845,9 @@ def callback(_locals, _globals):

if optimize_for == PlayerOrder.SECOND:
mean_reward1, mean_reward2 = mean_reward2, mean_reward1
std_reward1, std_reward2 = std_reward2, std_reward1

results[0][0].append(mean_reward1)
results[1][0].append(mean_reward2)
results[0][1].append(std_reward1)
results[1][1].append(std_reward2)
results[0].append(mean_reward1)
results[1].append(mean_reward2)

# train the first player model
model1.learn(total_timesteps=1000000000, callback=callback)
Expand All @@ -882,12 +869,9 @@ def callback(_locals, _globals):

if optimize_for == PlayerOrder.SECOND:
mean_reward1, mean_reward2 = mean_reward2, mean_reward1
std_reward1, std_reward2 = std_reward2, std_reward1

results[0][0].append(mean_reward1)
results[1][0].append(mean_reward2)
results[0][1].append(std_reward1)
results[1][1].append(std_reward2)
results[0].append(mean_reward1)
results[1].append(mean_reward2)

# save the final models
model1.save(model_path1 + '/final')
Expand Down

0 comments on commit 4cd1b18

Please sign in to comment.