-
Notifications
You must be signed in to change notification settings - Fork 2
/
statistics.py
87 lines (73 loc) · 2.74 KB
/
statistics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
from tensorboardX import SummaryWriter
import numpy as np
class Memory:
"""
This class is used to store the data for Tensorboard summary and Terminal logs.
Length of the data stored is equal to SUMMARY_FREQ used while training.
Data length = BUFFER_SIZE is crunched to a single value before stored in this class.
"""
def __init__(self, RUN_ID):
self.base_tb_dir = "./training_data/summaries/" + RUN_ID
self.writer = SummaryWriter(self.base_tb_dir)
# lists to store data length = SUMMARY_FREQ
self.rewards = []
self.episode_lens = []
self.actor_losses = []
self.critic_losses = []
self.advantages = []
self.actor_lrs = [] # actor learning rate
self.critic_lrs = [] # critic learning rate
def add_data(
self,
reward,
episode_len,
actor_loss,
critic_loss,
advantage,
actor_lr,
critic_lr,
):
"""Add data for tensorboard and terminal logging."""
self.rewards.append(reward)
self.episode_lens.append(episode_len)
self.actor_losses.append(actor_loss)
self.critic_losses.append(critic_loss)
self.advantages.append(advantage)
self.actor_lrs.append(actor_lr)
self.critic_lrs.append(critic_lr)
def clear_memory(self):
"""Clear the collected data."""
self.rewards.clear()
self.episode_lens.clear()
self.actor_losses.clear()
self.critic_losses.clear()
self.advantages.clear()
self.actor_lrs.clear()
self.critic_lrs.clear()
def terminal_logs(self, step):
"""Display logs on terminal."""
if len(self.rewards) == 0:
self.rewards.append(0)
print(
"[INFO]\tSteps: {}\tMean Reward: {:0.3f}\tStd of Reward: {:0.3f}".format(
step, np.mean(self.rewards), np.std(self.rewards)
)
)
def tensorboard_logs(self, step):
"""Store the logs for tensorboard vis."""
self.writer.add_scalar(
"Environment/Cumulative_reward", np.mean(self.rewards), step
)
self.writer.add_scalar(
"Environment/Episode_length", np.mean(self.episode_lens), step
)
self.writer.add_scalar(
"Learning_rate/Actor_model", np.mean(self.actor_lrs), step
)
self.writer.add_scalar(
"Learning_rate/Critic_model", np.mean(self.critic_lrs), step
)
self.writer.add_scalar("Loss/Policy_loss", np.mean(self.actor_losses), step)
self.writer.add_scalar("Loss/Value_loss", np.mean(self.critic_losses), step)
self.writer.add_scalar("Policy/Value_estimate", np.mean(self.advantages), step)
self.clear_memory()