Set value function input and target once

Before, the value function/critic target and input in VPG and VAC was re-set at each training iteration in a single call to `Step`. This is unneeded and increases the compute time. Now, we only set these values once.
samuelfneumann · Sep 24, 2021 · ea10135 · ea10135
1 parent f025b86
commit ea10135
Show file tree

Hide file tree

Showing 3 changed files with 23 additions and 21 deletions.
diff --git a/README.md b/README.md
@@ -714,3 +714,5 @@ sequential runs of hyperparameter setting `m` of the `Agent` in the
 * [ ] Move GAEBuffer and ExpReplay to a new `buffer` package - in which case GAE buffer needs a public API
 
 * [ ] Rename `FifoRemove1ExpReplay` to `Default` and document what default means
+
+* [ ] Agents should have a Close() method, or create an agent.Closer interface, and check if agent is a Closer before closing at the end of main
diff --git a/agent/nonlinear/continuous/vanillaac/VanillaAC.go b/agent/nonlinear/continuous/vanillaac/VanillaAC.go
@@ -453,12 +453,11 @@ func (v *VAC) Step() error {
 	v.trainPolicyVM.Reset()
 
 	// === === Value Function Train === ===
+	err = v.vTrainValueFn.SetInput(S)
+	if err != nil {
+		return fmt.Errorf("step: could not set critic input state: %v", err)
+	}
 	for i := 0; i < v.valueGradSteps; i++ {
-		err = v.vTrainValueFn.SetInput(S)
-		if err != nil {
-			return fmt.Errorf("step: could not set critic input state on "+
-				"training iteration %d: %v", i, err)
-		}
 		err = v.vTrainValueFnVM.RunAll()
 		if err != nil {
 			return fmt.Errorf("step: could not run critic vm on training "+

diff --git a/agent/nonlinear/continuous/vanillapg/VanillaPG.go b/agent/nonlinear/continuous/vanillapg/VanillaPG.go
@@ -323,23 +323,24 @@ func (v *VPG) Step() error {
 	}
 	v.trainPolicyVM.Reset()
 
-	// Value function update
-	for i := 0; i < v.valueGradSteps; i++ {
-		if err := v.vTrainValueFn.SetInput(obs); err != nil {
-			return fmt.Errorf("step: could not set value function input "+
-				"at training iteration %d: %v", i, err)
-		}
+	// Set value function input
+	if err := v.vTrainValueFn.SetInput(obs); err != nil {
+		return fmt.Errorf("step: could not set value function input: %v", err)
+	}
 
-		trainValueFnTargetsTensor := tensor.NewDense(
-			tensor.Float64,
-			v.vTrainValueFnTargets.Shape(),
-			tensor.WithBacking(ret),
-		)
-		err = G.Let(v.vTrainValueFnTargets, trainValueFnTargetsTensor)
-		if err != nil {
-			return fmt.Errorf("step: could not set value function target "+
-				"at training iteration %d: %v", i, err)
-		}
+	// Set value function target
+	trainValueFnTargetsTensor := tensor.NewDense(
+		tensor.Float64,
+		v.vTrainValueFnTargets.Shape(),
+		tensor.WithBacking(ret),
+	)
+	err = G.Let(v.vTrainValueFnTargets, trainValueFnTargetsTensor)
+	if err != nil {
+		return fmt.Errorf("step: could not set value function target: %v", err)
+	}
+
+	// Update value function
+	for i := 0; i < v.valueGradSteps; i++ {
 		if err := v.vTrainValueFnVM.RunAll(); err != nil {
 			return fmt.Errorf("step: could not run value function vm "+
 				"at training iteration %d: %v", i, err)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -714,3 +714,5 @@ sequential runs of hyperparameter setting `m` of the `Agent` in the
		* [ ] Move GAEBuffer and ExpReplay to a new `buffer` package - in which case GAE buffer needs a public API

		* [ ] Rename `FifoRemove1ExpReplay` to `Default` and document what default means

		* [ ] Agents should have a Close() method, or create an agent.Closer interface, and check if agent is a Closer before closing at the end of main