-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain_pinn_model_4.m
177 lines (157 loc) · 5.95 KB
/
train_pinn_model_4.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
function [modelFile, trainLoss] = train_pinn_model_4(sampleFile, trainParams)
% PINN
% A physics-Informed Neural Network (PINN) is a type of neural network
% architecture desigend to incorporate physical principles or equations
% into the learning process. In combines deep learning techniques with
% domain-specific knowledge, making it particularly suitable for problems
% governed by physics.
% In addition to standard data-driven training, PINNs utilize terms in the
% loss function to enforce consistency with know physical law, equations,
% and constraints.
% https://en.wikipedia.org/wiki/Physics-informed_neural_networks
% https://benmoseley.blog/my-research/so-what-is-a-physics-informed-neural-network/
% load samples and prepare training dataset
ds = load(sampleFile);
numSamples = length(ds.samples);
modelFile = "model\"+trainParams.type+"_"+num2str(numSamples)+".mat";
% generate data
% Feature data: 6-D initial state x0 + time interval
% the label data is a predicted state x=[q1,q2,q1dot,q2dot,q1ddot,q2ddot]
initTimes = 1:trainParams.initTimeStep:5; %start from 1 sec to 4 sec with 0.5 sec step
tTrain = [];
xTrain = [];
yTrain = [];
for i = 1:numSamples
data = load(ds.samples{i,1}).state;
t = data(1,:);
x = data(2:5, :); % q1,q2,q1_dot,q2_dot
for tInit = initTimes
initIdx = find(t >= tInit, 1, 'first');
x0 = x(:, initIdx); % Initial state
t0 = t(initIdx); % Start time
for j = initIdx+1 : length(t)
tTrain = [tTrain, t(j)-t0];
xTrain = [xTrain, x0];
yTrain = [yTrain, x(:,j)];
end
end
end
disp(num2str(length(xTrain)) + " samples are generated for training.");
% Create neural network
numStates = 4;
layers = [
featureInputLayer(numStates+1, "Name", "input")
];
for i = 1:trainParams.numLayers
layers = [
layers
fullyConnectedLayer(trainParams.numNeurons)
reluLayer
];
end
layers = [
layers
fullyConnectedLayer(numStates, "Name", "output")
];
% convert the layer array to a dlnetwork object
net = dlnetwork(layers);
net = dlupdate(@double, net);
% plot(net)
% analyzeNetwork(net);
% training options
monitor = trainingProgressMonitor;
monitor.Metrics = "Loss";
monitor.Info = ["LearnRate" ...
"IterationPerEpoch" ...
"MaximumIteration" ...
"Epoch" ...
"Iteration" ...
"GradientsNorm"...
"StepNorm"];
monitor.XLabel = "Iteration";
net = train_adam_update(net, tTrain, xTrain, yTrain, trainParams, monitor);
trainLoss = monitor.MetricData.Loss(:,2);
save(modelFile, 'net');
end
%%
function net = train_adam_update(net, tTrain, xTrain, yTrain, trainParams, monitor)
% using stochastic gradient decent
miniBatchSize = trainParams.miniBatchSize;
lrRate = trainParams.initLearningRate;
dataSize = length(tTrain);
numBatches = floor(dataSize/miniBatchSize);
numIterations = trainParams.numEpochs * numBatches;
accFcn = dlaccelerate(@modelLoss);
avgGrad = [];
avgSqGrad = [];
iter = 0;
epoch = 0;
while epoch < trainParams.numEpochs && ~monitor.Stop
epoch = epoch + 1;
% Shuffle data.
idx = randperm(dataSize);
tAll = tTrain(:, idx);
XAll = xTrain(:, idx);
YAll = yTrain(:, idx);
for j = 1 : numBatches
iter = iter + 1;
startIdx = (j-1)*miniBatchSize + 1;
endIdx = min(j*miniBatchSize, dataSize);
tBatch = tAll(:, startIdx:endIdx);
xBatch = XAll(:, startIdx:endIdx);
yBatch = YAll(:, startIdx:endIdx);
T = gpuArray(dlarray(tBatch, "CB"));
X = gpuArray(dlarray(xBatch, "CB"));
Y = gpuArray(dlarray(yBatch, "CB"));
% Evaluate the model loss and gradients using dlfeval and the
% modelLoss function.
[loss, gradients] = dlfeval(accFcn, net, T, X, Y);
% Update the network parameters using the ADAM optimizer.
[net, avgGrad, avgSqGrad] = adamupdate(net, gradients, avgGrad, avgSqGrad, iter, lrRate);
recordMetrics(monitor, iter, Loss=loss);
if mod(iter, trainParams.numEpochs) == 0
monitor.Progress = 100*iter/numIterations;
updateInfo(monitor, ...
LearnRate = lrRate, ...
Epoch = epoch, ...
Iteration = iter, ...
MaximumIteration = numIterations, ...
IterationPerEpoch = numBatches);
end
end
% adaptive learning rate
if mod(epoch,trainParams.lrDropEpoch) == 0
if lrRate > trainParams.stopLearningRate
lrRate = lrRate*trainParams.lrDropFactor;
else
lrRate = trainParams.stopLearningRate;
end
end
end
end
%% loss function
function [loss, gradients] = modelLoss(net, T, X, Y)
% make prediction
[Z, ~] = forward(net, [X;T]);
dataLoss = mse(Z, Y);
% compute gradients using automatic differentiation
q1 = Z(1,:);
q2 = Z(2,:);
q1d = Z(3,:);
q2d = Z(4,:);
q1dd = dlgradient(sum(q1d, 'all'), T);
q2dd = dlgradient(sum(q2d, 'all'), T);
q1d = dlgradient(sum(q1, 'all'), T);
q2d = dlgradient(sum(q2, 'all'), T);
fY = physics_law([q1;q2], [q1d;q2d], [q1dd;q2dd]);
fT = zeros(size(fY), 'like', fY);
physicLoss = mse(fY, fT);
% initial constraints
% T0 = zeros(size(T), 'like', T);
% [Z0, ~] = forward(net, [X;T0]);
% initLoss = mse(Z0, X);
% total loss
global trainParams
loss = trainParams.alpha*dataLoss + (1-trainParams.alpha)*physicLoss;
gradients = dlgradient(loss, net.Learnables);
end