From e47853e6b5b8cae0e5bec50667dd4bf2cefc5a6d Mon Sep 17 00:00:00 2001
From: kwangjinoh <kjin.oh@gmail.com>
Date: Sat, 24 Mar 2018 11:15:18 +0900
Subject: [PATCH 1/2] Fixed 12_1_rnn_basics.py bugs

---
 12_1_rnn_basics.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/12_1_rnn_basics.py b/12_1_rnn_basics.py
index 2ec10a2..fc39d3f 100644
--- a/12_1_rnn_basics.py
+++ b/12_1_rnn_basics.py
@@ -11,8 +11,7 @@
 # One cell RNN input_dim (4) -> output_dim (2). sequence: 5
 cell = nn.RNN(input_size=4, hidden_size=2, batch_first=True)
 
-# (num_layers * num_directions, batch, hidden_size)
-# (batch, num_layers * num_directions, hidden_size) for batch_first=True
+# (num_layers * num_directions, batch, hidden_size) whether batch_first=True or False
 hidden = (Variable(torch.randn(1, 1, 2)))
 
 # Propagate input through RNN
@@ -32,6 +31,9 @@
 print("sequence input size", inputs.size(), "out size", out.size())
 
 
+# hidden : (num_layers * num_directions, batch, hidden_size) whether batch_first=True or False
+hidden = Variable(torch.randn(1, 3, 2))
+
 # One cell RNN input_dim (4) -> output_dim (2). sequence: 5, batch 3
 # 3 batches 'hello', 'eolll', 'lleel'
 # rank = (3, 5, 4)
@@ -50,7 +52,7 @@
 cell = nn.RNN(input_size=4, hidden_size=2)
 
 # The given dimensions dim0 and dim1 are swapped.
-inputs = inputs.transpose(3, dim1=1, dim2=2)
+inputs = inputs.transpose(dim0=0, dim1=1)
 # Propagate input through RNN
 # Input: (seq_len, batch_size, input_size) when batch_first=False (default)
 # S x B x I

From 11f238eae60aef00202a447229989c6b279e8256 Mon Sep 17 00:00:00 2001
From: kwangjinoh <kjin.oh@gmail.com>
Date: Sat, 24 Mar 2018 13:11:24 +0900
Subject: [PATCH 2/2] Fixed rnn hidden shape bugs

---
 12_2_hello_rnn.py     | 6 +++---
 12_3_hello_rnn_seq.py | 6 +++---
 12_4_hello_rnn_emb.py | 6 +++---
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/12_2_hello_rnn.py b/12_2_hello_rnn.py
index 848a43a..c0364e3 100644
--- a/12_2_hello_rnn.py
+++ b/12_2_hello_rnn.py
@@ -44,14 +44,14 @@ def forward(self, hidden, x):
 
         # Propagate input through RNN
         # Input: (batch, seq_len, input_size)
-        # hidden: (batch, num_layers * num_directions, hidden_size)
+        # hidden: (num_layers * num_directions, batch, hidden_size)
         out, hidden = self.rnn(x, hidden)
         return hidden, out.view(-1, num_classes)
 
     def init_hidden(self):
         # Initialize hidden and cell states
-        # (batch, num_layers * num_directions, hidden_size) for batch_first=True
-        return Variable(torch.zeros(batch_size, num_layers, hidden_size))
+        # (num_layers * num_directions, batch, hidden_size)
+        return Variable(torch.zeros(num_layers, batch_size, hidden_size))
 
 
 # Instantiate RNN model
diff --git a/12_3_hello_rnn_seq.py b/12_3_hello_rnn_seq.py
index 29f90d5..5a0c2f2 100644
--- a/12_3_hello_rnn_seq.py
+++ b/12_3_hello_rnn_seq.py
@@ -46,16 +46,16 @@ def __init__(self, num_classes, input_size, hidden_size, num_layers):
 
     def forward(self, x):
         # Initialize hidden and cell states
-        # (batch, num_layers * num_directions, hidden_size) for batch_first=True
+        # (num_layers * num_directions, batch, hidden_size) for batch_first=True
         h_0 = Variable(torch.zeros(
-            x.size(0), self.num_layers, self.hidden_size))
+            self.num_layers, x.size(0), self.hidden_size))
 
         # Reshape input
         x.view(x.size(0), self.sequence_length, self.input_size)
 
         # Propagate input through RNN
         # Input: (batch, seq_len, input_size)
-        # h_0: (batch, num_layers * num_directions, hidden_size)
+        # h_0: (num_layers * num_directions, batch, hidden_size)
 
         out, _ = self.rnn(x, h_0)
         return out.view(-1, num_classes)
diff --git a/12_4_hello_rnn_emb.py b/12_4_hello_rnn_emb.py
index 71fa85c..3ab2098 100644
--- a/12_4_hello_rnn_emb.py
+++ b/12_4_hello_rnn_emb.py
@@ -36,16 +36,16 @@ def __init__(self):
 
     def forward(self, x):
         # Initialize hidden and cell states
-        # (batch, num_layers * num_directions, hidden_size) for batch_first=True
+        # (num_layers * num_directions, batch, hidden_size)
         h_0 = Variable(torch.zeros(
-            x.size(0), num_layers, hidden_size))
+            self.num_layers, x.size(0), self.hidden_size))
 
         emb = self.embedding(x)
         emb = emb.view(batch_size, sequence_length, -1)
 
         # Propagate embedding through RNN
         # Input: (batch, seq_len, embedding_size)
-        # h_0: (batch, num_layers * num_directions, hidden_size)
+        # h_0: (num_layers * num_directions, batch, hidden_size)
         out, _ = self.rnn(emb, h_0)
         return self.fc(out.view(-1, num_classes))