-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathAC+PID.txt
111 lines (93 loc) · 3.21 KB
/
AC+PID.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
clear;
clc;
% GLOBAL PARAMETERS
% Parameter values
num_episodes = 1024;
numValidationExperiments = 20;
% Buck Boost Converter Parameters
V_source_value = 48;
L_inductance = 10e-6;
C_capacitance = 40e-3;
R_load = 100;
% Controller Parameters
gain_K = 100;
integral_I = 350000;
derivative_D = 1000;
periodVal = 0.00001;
pw_percent = 50;
% Signal Processing Parameters
prev_time = 0;
init_action = 1;
stopping_criterion = 1000;
threshold1 = 0.4;
threshold2 = 1;
error_threshold = 0.02;
Ts = 0.0001;
Tf = 0.3;
V_ref = 110;
% RL Parameters
miniBatch_percent = 0.8;
learnRateActor = 0.05;
learnRateCritic = 0.05;
criticLayerSizes = [256 256];
actorLayerSizes = [256 256];
discountFactor = 0.995;
max_steps = ceil(Tf/Ts);
ExperienceHorisonLength = 10;
ClipFactorVal = 0.2;
EntropyLossWeightVal = 0.05;
MiniBatchSizeVal = ceil(ExperienceHorisonLength*miniBatch_percent);
NumEpochsVal = 5;
DiscountFactorVal = 0.99;
% RL Agent
mdl = 'DCDC_BBC_hybrid1';
open_system(mdl)
agentblk = [mdl '/RL Agent'];
numObs = 3; % [v0, e, de/dt]
observationInfo = rlNumericSpec([numObs,1],...
'LowerLimit',[-inf -inf 0]',...
'UpperLimit',[0.1 V_ref inf]');
observationInfo.Name = 'observations';
observationInfo.Description = 'integrated error, error, and measured height';
numObservations = observationInfo.Dimension(1);
a = [0;1];
actionInfo = rlFiniteSetSpec(a);
env = rlSimulinkEnv(mdl,agentblk,observationInfo,actionInfo);
env.ResetFcn = @(in) setVariable(in,'init_action',1);
num_inputs = numObs;
criticNetwork = [
imageInputLayer([num_inputs 1 1],'Normalization','none','Name','state')
fullyConnectedLayer(criticLayerSizes(1),'Name','CriticFC1')
reluLayer('Name','CriticRelu1')
fullyConnectedLayer(criticLayerSizes(2),'Name','CriticFC2')
reluLayer('Name','CriticRelu2')
fullyConnectedLayer(1,'Name','CriticOutput')
];
actorNetwork = [
imageInputLayer([numObs 1 1],'Normalization','none','Name','observation')
fullyConnectedLayer(actorLayerSizes(1),'Name','ActorFC1')
reluLayer('Name','ActorRelu1')
fullyConnectedLayer(actorLayerSizes(2),'Name','ActorFC2')
reluLayer('Name','ActorRelu2')
fullyConnectedLayer(2,'Name','Action')
softmaxLayer('Name','actionProbability')
];
actorOpts = rlRepresentationOptions('LearnRate',learnRateActor,'GradientThreshold',1);
actor = rlStochasticActorRepresentation(actorNetwork,observationInfo,actionInfo,...
'Observation',{'observation'},actorOpts);
criticOpts = rlRepresentationOptions('LearnRate',learnRateCritic,'GradientThreshold',1);
critic = rlValueRepresentation(criticNetwork,observationInfo,'Observation',{'state'},criticOpts);
agentOpts = rlACAgentOptions('SampleTime', Ts, 'DiscountFactor', discountFactor);
agent = rlACAgent(actor,critic,agentOpts);
trainOpts = rlTrainingOptions(...
'MaxEpisodes',num_episodes,...
'MaxStepsPerEpisode',max_steps,...
'Verbose',true,...
'Plots','training-progress',...
'StopTrainingCriteria','AverageReward',...
'StopTrainingValue',inf,...
'ScoreAveragingWindowLength',50,...
'SaveAgentCriteria',"EpisodeReward",...
'SaveAgentValue',50000);
% Train Agent
trainingStats = train(agent,env,trainOpts);