-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathmodel.go
186 lines (152 loc) · 5.5 KB
/
model.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
package lstm
import (
"github.com/gorgonia/parser"
G "gorgonia.org/gorgonia"
"gorgonia.org/tensor"
)
// Model holds the tensor of the model
type Model struct {
wi []float32
ui []float32
biasI []float32
wf []float32
uf []float32
biasF []float32
wo []float32
uo []float32
biasO []float32
wc []float32
uc []float32
biasC []float32
wy []float32
biasY []float32
inputSize int
outputSize int
hiddenSize int
}
// lstm represent a single cell of the RNN
// each LSTM owns its own ExprGraph
type lstm struct {
g *G.ExprGraph
wi *G.Node
ui *G.Node
biasI *G.Node
wf *G.Node
uf *G.Node
biasF *G.Node
wo *G.Node
uo *G.Node
biasO *G.Node
wc *G.Node
uc *G.Node
biasC *G.Node
wy *G.Node
biasY *G.Node
parser *parser.Parser
inputSize int
outputSize int
hiddenSize int
//inputVector *G.Node
prevHidden *G.Node
prevCell *G.Node
}
func (m *Model) newLSTM(hiddenT, cellT tensor.Tensor) *lstm {
lstm := new(lstm)
g := G.NewGraph()
lstm.g = g
p := parser.NewParser(g)
lstm.parser = p
lstm.hiddenSize = m.hiddenSize
lstm.inputSize = m.inputSize
lstm.outputSize = m.outputSize
prevSize := m.inputSize
hiddenSize := m.hiddenSize
outputSize := m.outputSize
// Create the tensor first
// input gate weights
wiT := tensor.New(tensor.WithShape(hiddenSize, prevSize), tensor.WithBacking(m.wi))
uiT := tensor.New(tensor.WithShape(hiddenSize, hiddenSize), tensor.WithBacking(m.ui))
biasIT := tensor.New(tensor.WithBacking(m.biasI), tensor.WithShape(hiddenSize))
// output gate weights
woT := tensor.New(tensor.WithShape(hiddenSize, prevSize), tensor.WithBacking(m.wo))
uoT := tensor.New(tensor.WithShape(hiddenSize, hiddenSize), tensor.WithBacking(m.uo))
biasOT := tensor.New(tensor.WithBacking(m.biasO), tensor.WithShape(hiddenSize))
// forget gate weights
wfT := tensor.New(tensor.WithShape(hiddenSize, prevSize), tensor.WithBacking(m.wf))
ufT := tensor.New(tensor.WithShape(hiddenSize, hiddenSize), tensor.WithBacking(m.uf))
biasFT := tensor.New(tensor.WithBacking(m.biasF), tensor.WithShape(hiddenSize))
// cell write
wcT := tensor.New(tensor.WithShape(hiddenSize, prevSize), tensor.WithBacking(m.wc))
ucT := tensor.New(tensor.WithShape(hiddenSize, hiddenSize), tensor.WithBacking(m.uc))
biasCT := tensor.New(tensor.WithBacking(m.biasC), tensor.WithShape(hiddenSize))
// Output vector
wyT := tensor.New(tensor.WithShape(outputSize, hiddenSize), tensor.WithBacking(m.wy))
biasYT := tensor.New(tensor.WithBacking(m.biasY), tensor.WithShape(outputSize))
// input gate weights
lstm.wi = G.NewMatrix(g, tensor.Float32, G.WithName("Wᵢ"), G.WithShape(hiddenSize, prevSize), G.WithValue(wiT))
lstm.ui = G.NewMatrix(g, tensor.Float32, G.WithName("Uᵢ"), G.WithShape(hiddenSize, hiddenSize), G.WithValue(uiT))
lstm.biasI = G.NewVector(g, tensor.Float32, G.WithName("Bᵢ"), G.WithShape(hiddenSize), G.WithValue(biasIT))
p.Set(`Wᵢ`, lstm.wi)
p.Set(`Uᵢ`, lstm.ui)
p.Set(`Bᵢ`, lstm.biasI)
// output gate weights
lstm.wo = G.NewMatrix(g, tensor.Float32, G.WithName("Wₒ"), G.WithShape(hiddenSize, prevSize), G.WithValue(woT))
lstm.uo = G.NewMatrix(g, tensor.Float32, G.WithName("Uₒ"), G.WithShape(hiddenSize, hiddenSize), G.WithValue(uoT))
lstm.biasO = G.NewVector(g, tensor.Float32, G.WithName("Bₒ"), G.WithShape(hiddenSize), G.WithValue(biasOT))
p.Set(`Wₒ`, lstm.wo)
p.Set(`Uₒ`, lstm.uo)
p.Set(`Bₒ`, lstm.biasO)
// forget gate weights
lstm.wf = G.NewMatrix(g, tensor.Float32, G.WithName("Wf"), G.WithShape(hiddenSize, prevSize), G.WithValue(wfT))
lstm.uf = G.NewMatrix(g, tensor.Float32, G.WithName("Uf"), G.WithShape(hiddenSize, hiddenSize), G.WithValue(ufT))
lstm.biasF = G.NewVector(g, tensor.Float32, G.WithName("Bf"), G.WithShape(hiddenSize), G.WithValue(biasFT))
p.Set(`Wf`, lstm.wf)
p.Set(`Uf`, lstm.uf)
p.Set(`Bf`, lstm.biasF)
// cell write
lstm.wc = G.NewMatrix(g, tensor.Float32, G.WithName("Wc"), G.WithShape(hiddenSize, prevSize), G.WithValue(wcT))
lstm.uc = G.NewMatrix(g, tensor.Float32, G.WithName("Uc"), G.WithShape(hiddenSize, hiddenSize), G.WithValue(ucT))
lstm.biasC = G.NewVector(g, tensor.Float32, G.WithName("bc"), G.WithShape(hiddenSize), G.WithValue(biasCT))
p.Set(`Wc`, lstm.wc)
p.Set(`Uc`, lstm.uc)
p.Set(`Bc`, lstm.biasC)
// Output vector
lstm.wy = G.NewMatrix(g, tensor.Float32, G.WithName("Wy"), G.WithShape(outputSize, hiddenSize), G.WithValue(wyT))
lstm.biasY = G.NewVector(g, tensor.Float32, G.WithName("by"), G.WithShape(outputSize), G.WithValue(biasYT))
p.Set(`Wy`, lstm.wy)
p.Set(`By`, lstm.biasY)
// this is to simulate a default "previous" state
lstm.prevHidden = G.NewVector(g, tensor.Float32, G.WithName("hₜ₋₁"), G.WithShape(hiddenSize), G.WithValue(hiddenT))
lstm.prevCell = G.NewVector(g, tensor.Float32, G.WithName("Cₜ₋₁"), G.WithShape(hiddenSize), G.WithValue(cellT))
return lstm
}
func newModelFromBackends(back *backends) *Model {
m := new(Model)
m.hiddenSize = back.HiddenSize
m.inputSize = back.InputSize
m.outputSize = back.OutputSize
// input gate weights
m.wi = back.Wi
m.ui = back.Ui
m.biasI = back.BiasI
// output gate weights
m.wo = back.Wo
m.uo = back.Uo
m.biasO = back.BiasO
// forget gate weights
m.wf = back.Wf
m.uf = back.Uf
m.biasF = back.BiasF
// cell write
m.wc = back.Wc
m.uc = back.Uc
m.biasC = back.BiasC
// Output vector
m.wy = back.Wy
m.biasY = back.BiasY
return m
}
// NewModel creates a new model
func NewModel(inputSize, outputSize int, hiddenSize int) *Model {
return newModelFromBackends(initBackends(inputSize, outputSize, hiddenSize))
}