-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrainer.ts
94 lines (88 loc) · 2.09 KB
/
trainer.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import * as use from "@tensorflow-models/universal-sentence-encoder";
import * as tf from "@tensorflow/tfjs-node";
async function main() {
const encoder = await use.load();
const trainData = tf.data
.csv(
"https://raw.githubusercontent.com/smilegate-ai/korean_unsmile_dataset/main/unsmile_train_v1.0.tsv",
{
delimiter: "\t",
hasHeader: true,
configuredColumnsOnly: true,
columnConfigs: {
clean: {
dtype: "int32",
isLabel: true,
},
문장: {
dtype: "string",
},
},
}
)
.mapAsync(async (data: any) => {
const out = await encoder.embed(data.xs["문장"]);
return {
xs: out.flatten(),
ys: Object.values(data.ys),
};
})
.batch(32)
.shuffle(32);
const valData = tf.data
.csv(
"https://raw.githubusercontent.com/smilegate-ai/korean_unsmile_dataset/main/unsmile_valid_v1.0.tsv",
{
delimiter: "\t",
hasHeader: true,
configuredColumnsOnly: true,
columnConfigs: {
clean: {
dtype: "int32",
isLabel: true,
},
문장: {
dtype: "string",
},
},
}
)
.mapAsync(async (data: any) => {
const out = await encoder.embed(data.xs["문장"]);
return {
xs: out.flatten(),
ys: Object.values(data.ys),
};
})
.batch(32)
.shuffle(32);
const model = tf.sequential({
layers: [
tf.layers.dense({
inputDim: 512,
units: 512,
activation: "relu",
}),
tf.layers.batchNormalization(),
tf.layers.dense({
units: 512,
activation: "relu",
}),
tf.layers.batchNormalization(),
tf.layers.dense({
units: 1,
activation: "sigmoid",
}),
],
});
model.compile({
optimizer: tf.train.adam(),
loss: tf.losses.sigmoidCrossEntropy,
metrics: [tf.metrics.binaryAccuracy],
});
model.fitDataset(trainData, {
epochs: 5,
validationData: valData
});
}
main();