matlab 2021a RL SAC Error
    3 views (last 30 days)
  
       Show older comments
    
i got this error anyone has idea what is the problem ?
Caused by: Error using rl.env.SimulinkEnvWithAgent>localHandleSimoutErrors (line 667) Invalid input argument type or size such as observation, reward, isdone or loggedSignals. Error using rl.env.SimulinkEnvWithAgent>localHandleSimoutErrors (line 667) One input argument can have dimension labels only when the other input argument is an unformatted scalar. Use .* for element-wise multiplication.
-------------------------------------------
numObs = 21;  % Number of observations
numAct = 2;   % Number of actions
obsInfo = rlNumericSpec([numObs 1]);
actInfo = rlNumericSpec([numAct 1]);
actInfo.LowerLimit = -1;
actInfo.UpperLimit = 1;
mdl = 'RLagentsimlSAC';
agentblk = ['RLagentsimlSAC/CarMaker/VehicleControl/CreateBus VhclCtrl/RL Agent'];
env = rlSimulinkEnv(mdl,agentblk,obsInfo,actInfo);
env.ResetFcn = @(in) setVariable(in , 'x0',rand());
rng(0)
% open_system(mdl)
Ts = 0.1;
Tf = 150;
cnet = [
    featureInputLayer(numObs,"Normalization","none","Name","observation")
    fullyConnectedLayer(128,"Name","fc1")
    concatenationLayer(1,2,"Name","concat")
    reluLayer("Name","relu1")
    fullyConnectedLayer(64,"Name","fc3")
    reluLayer("Name","relu2")
    fullyConnectedLayer(32,"Name","fc4")
    reluLayer("Name","relu3")
    fullyConnectedLayer(1,"Name","CriticOutput")];
actionPath = [
    featureInputLayer(numAct,"Normalization","none","Name","action")
    fullyConnectedLayer(128,"Name","fc2")];
criticNetwork = layerGraph(cnet);
criticNetwork = addLayers(criticNetwork, actionPath);
criticNetwork = connectLayers(criticNetwork,"fc2","concat/in2");
% plot(criticNetwork)
criticdlnet = dlnetwork(criticNetwork,'Initialize',false);
criticdlnet1 = initialize(criticdlnet);
criticdlnet2 = initialize(criticdlnet);
criticOptions = rlRepresentationOptions('LearnRate',1e-3,'GradientThreshold',1,'L2RegularizationFactor',1e-4);
critic1 = rlQValueRepresentation(criticdlnet1,obsInfo,actInfo, ...
    'Observation',{'observation'},'Action',{'action'},criticOptions);
critic2 = rlQValueRepresentation(criticdlnet2,obsInfo,actInfo, ...
    'Observation',{'observation'},'Action',{'action'},criticOptions);
% Create the actor network layers.
statePath = [
    featureInputLayer(numObs,"Normalization","none","Name","observation")
    fullyConnectedLayer(128,"Name","fc1")
    reluLayer("Name","relu1")
    fullyConnectedLayer(64,"Name","fc2")
    reluLayer("Name","relu2")];
meanPath = [
    fullyConnectedLayer(32,"Name","MeanFC1")
    reluLayer("Name","relu3")
    fullyConnectedLayer(numAct,"Name","Mean")];
stdPath = [
    fullyConnectedLayer(numAct,"Name","StdFC")
    reluLayer("Name","relu4")
    softplusLayer("Name","StandardDeviation")];
concatPath = concatenationLayer(1,2,'Name','GaussianParameters');
% Connect the layers.
actorNetwork = layerGraph(statePath);
actorNetwork = addLayers(actorNetwork,meanPath);
actorNetwork = addLayers(actorNetwork,stdPath);
actorNetwork = addLayers(actorNetwork,concatPath);
actorNetwork = connectLayers(actorNetwork,'relu2','MeanFC1/in');
actorNetwork = connectLayers(actorNetwork,'relu2','StdFC/in');
actorNetwork = connectLayers(actorNetwork,'Mean','GaussianParameters/in1');
actorNetwork = connectLayers(actorNetwork,'StandardDeviation','GaussianParameters/in2');
% plot(actorNetwork)
actordlnet = dlnetwork(actorNetwork);
actorOptions = rlRepresentationOptions('Optimizer','adam','LearnRate',1e-3,...
                                       'GradientThreshold',1,'L2RegularizationFactor',1e-5);
actor = rlStochasticActorRepresentation(actordlnet,obsInfo,actInfo,actorOptions,...
    'Observation',{'observation'});
agentOpts = rlSACAgentOptions( ...
    "SampleTime",Ts, ...
    "TargetSmoothFactor",1e-3, ...    
    "ExperienceBufferLength",1e6, ...
    "MiniBatchSize",512, ...
    "NumWarmStartSteps",1000, ...
    "DiscountFactor",0.99);
% agentOpts.ActorOptimizerOptions.Algorithm = 'adam';
% agentOpts.ActorOptimizerOptions.LearnRate = 1e-4;
% agentOpts.ActorOptimizerOptions.GradientThreshold = 1;
% 
% for ct = 1:2
%     agentOpts.CriticOptimizerOptions(ct).Algorithm = "adam";
%     agentOpts.CriticOptimizerOptions(ct).LearnRate = 1e-4;
%     agentOpts.CriticOptimizerOptions(ct).GradientThreshold = 1;
% end
agent = rlSACAgent(actor,[critic1,critic2],agentOpts);
trainOpts = rlTrainingOptions(...
    "MaxEpisodes", 5000, ...
    "MaxStepsPerEpisode", floor(Tf/Ts), ...
    "ScoreAveragingWindowLength", 100, ...
    "Plots", "training-progress", ...
    "StopTrainingCriteria", "AverageReward", ...
    "StopTrainingValue", 100, ...
    "UseParallel", false);
if trainOpts.UseParallel
    trainOpts.ParallelizationOptions.AttachedFiles = [pwd,filesep] + ...
        ["bracelet_with_vision_link.STL";
        "half_arm_2_link.STL";
        "end_effector_link.STL";
        "shoulder_link.STL";
        "base_link.STL";
        "forearm_link.STL";
        "spherical_wrist_1_link.STL";
        "bracelet_no_vision_link.STL";
        "half_arm_1_link.STL";
        "spherical_wrist_2_link.STL"];
end
doTraining = true;
if doTraining
    stats = train(agent,env,trainOpts);
else
    load("kinovaBallBalanceAgent.mat")       
end
---------------------------------------------------------------
3 Comments
  Emmanouil Tzorakoleftherakis
    
 on 31 Jan 2023
				If you had this example working with TD3, then the error is likely due to how you structured your actor and critic for SAC. I would use the default agent feature, which gives you an initial policy architecture, and then compare to what you have to find the error. Without reproducible code, it's not easy to identify the error
Answers (0)
See Also
Community Treasure Hunt
Find the treasures in MATLAB Central and discover how the community can help you!
Start Hunting!
