forked from pytorch/glow
-
Notifications
You must be signed in to change notification settings - Fork 0
/
char-rnn.cpp
304 lines (252 loc) · 10.6 KB
/
char-rnn.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
/**
* Copyright (c) Glow Contributors. See CONTRIBUTORS file.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "glow/ExecutionEngine/ExecutionEngine.h"
#include "glow/Graph/Graph.h"
#include "glow/IR/IR.h"
#include "glow/Optimizer/GraphOptimizer/GraphOptimizer.h"
#include "glow/Support/Support.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Timer.h"
#include <glog/logging.h>
#include <string>
//----------------------------------------------------------------------------//
// This is a small program that's based on Andrej's char-rnn generator. This is
// a small RNN-based neural network that's used to generate random text after
// analyzing some other text. The network is described here:
// http://karpathy.github.io/2015/05/21/rnn-effectiveness/
//----------------------------------------------------------------------------//
using namespace glow;
using llvm::format;
namespace {
llvm::cl::OptionCategory category("char-rnn Options");
static llvm::cl::opt<std::string> inputFilename(llvm::cl::desc("input file"),
llvm::cl::init("-"),
llvm::cl::Positional,
llvm::cl::cat(category));
llvm::cl::opt<std::string> executionBackend(
"backend",
llvm::cl::desc("Backend to use, e.g., Interpreter, CPU, OpenCL:"),
llvm::cl::Optional, llvm::cl::init("Interpreter"), llvm::cl::cat(category));
llvm::cl::opt<unsigned> numEpochs("epochs",
llvm::cl::desc("Process the input N times."),
llvm::cl::init(4), llvm::cl::value_desc("N"),
llvm::cl::cat(category));
llvm::cl::opt<unsigned>
generateChars("chars", llvm::cl::desc("Generate this number of chars."),
llvm::cl::init(10), llvm::cl::value_desc("N"),
llvm::cl::cat(category));
} // namespace
/// Clip the value \p c to the range 0..127, which is standard ascii.
static size_t clipASCII(char c) {
size_t c1 = c;
if (c1 > 127) {
c1 = 127;
}
return c1;
}
/// Load text into \p inputText that has the format [B, S, 128], where B is
/// the batch size, S is the length of the sentence, and 128 is the one-hot
/// representation of the text (https://en.wikipedia.org/wiki/One-hot).
/// Load the expected index into \p nextChar that has the format [B, S], where
/// each element is the softmax index of the next char. If \p train is false
/// then only load the first slice of inputText.
static void loadText(Tensor &inputText, Tensor &nextChar, llvm::StringRef text,
bool train) {
DCHECK_GT(text.size(), 2) << "The buffer must contain at least two chars";
inputText.zero();
nextChar.zero();
auto idim = inputText.dims();
DCHECK_EQ(idim.size(), 3) << "invalid input tensor";
auto B = idim[0];
auto S = idim[1];
auto IH = inputText.getHandle();
auto NH = nextChar.getHandle<int64_t>();
// Fill the tensor with slices from the sentence with an offset of 1.
// Example:
// |Hell|o| World
// |ello| |World
// |llo |W|orld
// |lo W|o|rld
for (dim_t i = 0; i < B; i++) {
for (dim_t j = 0; j < S; j++) {
dim_t c = clipASCII(text[i + j]);
IH.at({i, j, c}) = 1.0;
if (train) {
size_t c1 = clipASCII(text[i + j + 1]);
NH.at({i, j}) = c1;
}
}
// Only load the first slice in the batch when in inference mode.
if (!train) {
return;
}
}
}
PseudoRNG &getRNG() {
static PseudoRNG RNG;
return RNG;
}
/// This method selects a random number based on a softmax distribution. One
/// property of this distribution is that the sum of all probabilities is equal
/// to one. The algorithm that we use here picks a random number between zero
/// and one. Then, we scan the tensor and accumulate the probabilities. We stop
/// and pick the index when sum is greater than the selected random number.
static char getPredictedChar(Tensor &inputText, dim_t slice, dim_t word) {
auto IH = inputText.getHandle();
// Pick a random number between zero and one.
double x = std::abs(getRNG().nextRand());
double sum = 0;
// Accumulate the probabilities into 'sum'.
for (dim_t i = 0; i < 128; i++) {
sum += IH.at({slice, word, i});
// As soon as we cross the threshold return the index.
if (sum > x) {
return i;
}
}
return 127;
}
/// Loads the content of a file or stdin to a memory buffer.
/// The default filename of "-" reads from stdin.
static std::unique_ptr<llvm::MemoryBuffer> loadFile(llvm::StringRef filename) {
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> fileBufOrErr =
llvm::MemoryBuffer::getFileOrSTDIN(filename);
if (!fileBufOrErr) {
LOG(ERROR) << "Error! Failed to open file: " << filename.str() << "\n";
LOG(ERROR) << fileBufOrErr.getError().message() << "\n";
exit(-1);
}
return std::move(fileBufOrErr.get());
}
/// Creates a new RNN network. The network answers the question, given N chars
/// of input, what is the character following each one of these chars.
static Function *createNetwork(Module &mod, PlaceholderBindings &bindings,
dim_t minibatchSize, dim_t numSteps,
dim_t hiddenSize) {
Function *F = mod.createFunction("main");
auto *X = mod.createPlaceholder(
ElemKind::FloatTy, {minibatchSize, numSteps, 128}, "input", false);
bindings.allocate(X);
auto *Y = mod.createPlaceholder(ElemKind::Int64ITy, {minibatchSize, numSteps},
"expected", false);
bindings.allocate(Y);
std::vector<NodeValue> slicesX;
std::vector<Node *> expectedX;
for (unsigned t = 0; t < numSteps; t++) {
auto XtName = "X." + std::to_string(t);
auto *Xt =
F->createSlice(XtName, X, {0, t, 0}, {minibatchSize, t + 1, 128});
slicesX.push_back(Xt);
auto YtName = "Y." + std::to_string(t);
auto *Yt = F->createSlice(YtName, Y, {0, t}, {minibatchSize, t + 1});
expectedX.push_back(Yt);
}
std::vector<NodeValue> outputNodes;
F->createLSTM(bindings, "rnn", slicesX, minibatchSize, hiddenSize, 128,
outputNodes);
std::vector<NodeValue> resX;
for (unsigned i = 0; i < numSteps; i++) {
auto *R =
F->createReshape("reshapeSelector", expectedX[i], {minibatchSize, 1});
auto *SM = F->createSoftMax("softmax", outputNodes[i], R);
auto *K = F->createReshape("reshapeSM", SM, {minibatchSize, 1, 128});
resX.push_back(K);
}
Node *O = F->createConcat("output", resX, 1);
auto *S = F->createSave("result", O);
bindings.allocate(S->getPlaceholder());
return F;
}
int main(int argc, char **argv) {
llvm::cl::ParseCommandLineOptions(argc, argv, " The char-rnn test\n\n");
auto mb = loadFile(inputFilename);
auto text = mb.get()->getBuffer();
LOG(INFO) << "Loaded " << text.size() << " chars.\n";
PlaceholderBindings inferBindings, trainingBindings;
const dim_t numSteps = 50;
const dim_t minibatchSize = 32;
const dim_t batchSize = text.size() - numSteps;
const dim_t hiddenSize = 256;
CHECK_GT(text.size(), numSteps) << "Text is too short";
TrainingConfig TC;
ExecutionEngine EET(executionBackend);
TC.learningRate = 0.001;
TC.momentum = 0.9;
TC.batchSize = minibatchSize;
auto &modT = EET.getModule();
//// Train the network ////
Function *F2 = createNetwork(modT, trainingBindings, minibatchSize, numSteps,
hiddenSize);
differentiate(F2, TC);
EET.compile(CompilationMode::Train);
trainingBindings.allocate(modT.getPlaceholders());
auto *XT = modT.getPlaceholderByName("input");
auto *YT = modT.getPlaceholderByName("expected");
Tensor thisCharTrain(ElemKind::FloatTy, {batchSize, numSteps, 128});
Tensor nextCharTrain(ElemKind::Int64ITy, {batchSize, numSteps});
loadText(thisCharTrain, nextCharTrain, text, true);
// This variable records the number of the next sample to be used for
// training.
size_t sampleCounter = 0;
// Run this number of iterations over the input. On each iteration: train the
// network on the whole input and then generate some sample text.
for (unsigned i = 0; i < numEpochs; i++) {
// Train the network on the whole input.
LOG(INFO) << "Iteration " << i + 1 << "/" << numEpochs;
runBatch(EET, trainingBindings, batchSize / minibatchSize, sampleCounter,
{XT, YT}, {&thisCharTrain, &nextCharTrain});
ExecutionEngine EEO(executionBackend);
inferBindings.clear();
auto &mod = EEO.getModule();
auto OF =
createNetwork(mod, inferBindings, minibatchSize, numSteps, hiddenSize);
auto *X = mod.getPlaceholderByName("input");
inferBindings.allocate(mod.getPlaceholders());
trainingBindings.copyTrainableWeightsTo(inferBindings);
//// Use the trained network to generate some text ////
auto *res =
llvm::cast<SaveNode>(OF->getNodeByName("result"))->getPlaceholder();
// Promote placeholders to constants.
::glow::convertPlaceholdersToConstants(OF, inferBindings, {X, res});
EEO.compile(CompilationMode::Infer);
// Load a few characters to start the text that we generate.
Tensor currCharInfer(ElemKind::FloatTy, {minibatchSize, numSteps, 128});
Tensor nextCharInfer(ElemKind::Int64ITy, {minibatchSize, numSteps});
loadText(currCharInfer, nextCharInfer, text.slice(0, 128), false);
auto *T = inferBindings.get(res);
std::string result;
std::string input;
input.insert(input.begin(), text.begin(), text.begin() + numSteps);
result = input;
// Generate a sentence by running inference over and over again.
for (unsigned i = 0; i < generateChars; i++) {
// Generate a char:
updateInputPlaceholders(inferBindings, {X}, {&currCharInfer});
EEO.run(inferBindings);
// Pick a char at random from the softmax distribution.
char c = getPredictedChar(*T, 0, numSteps - 1);
// Update the inputs for the next iteration:
result.push_back(c);
input.push_back(c);
input.erase(input.begin());
loadText(currCharInfer, nextCharInfer, input, false);
}
llvm::outs() << "Generated output:\n" << result << "\n";
}
return 0;
}