Add prints for debugging

samuelfneumann · Nov 2, 2021 · ab8bf97 · ab8bf97
1 parent 3e88818
commit ab8bf97
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 2 deletions.
diff --git a/README.md b/README.md
@@ -712,3 +712,5 @@ sequential runs of hyperparameter setting `m` of the `Agent` in the
 * [ ] Add `TimeLimit` to `gym` package so that time limits can be altered
 
 * [ ] All input nodes should have unique names. Use `gop.Unique()`.
+
+* [ ] VAC still gets NaNs. Problem could be with Multi-dim actions in GaussianTreeMLP. Does VPG also get NaNs??
diff --git a/agent/nonlinear/continuous/vanillaac/VanillaAC.go b/agent/nonlinear/continuous/vanillaac/VanillaAC.go
@@ -11,6 +11,7 @@ import (
 	"strings"
 
 	"github.com/samuelfneumann/golearn/agent"
+	"github.com/samuelfneumann/golearn/agent/nonlinear/continuous/policy"
 	"github.com/samuelfneumann/golearn/buffer/expreplay"
 	env "github.com/samuelfneumann/golearn/environment"
 	"github.com/samuelfneumann/golearn/network"
@@ -255,8 +256,10 @@ func New(e env.Environment, c agent.Config, seed int64) (agent.Agent, error) {
 // SelectAction returns an action for the timestep t
 func (v *VAC) SelectAction(t ts.TimeStep) *mat.VecDense {
 	a := v.behaviour.SelectAction(t)
+
+	// ! Standard deviation is taking off to infinity. Maybe use gop.Clamp()
 	fmt.Println()
-	fmt.Println(a)
+	fmt.Println(v.trainPolicy.(*policy.GaussianTreeMLP).StdDev())
 	return a
 }
 
@@ -286,7 +289,7 @@ func (v *VAC) ObserveFirst(t ts.TimeStep) error {
 // Observe stores an action taken in the environment and the next
 // time step as a result of taking that action
 func (v *VAC) Observe(action mat.Vector, nextStep ts.TimeStep) error {
-	fmt.Println(action)
+	//fmt.Println(action)
 	if !nextStep.First() {
 		nextAction := mat.NewVecDense(v.actionDims, nil)
 		transition := ts.NewTransition(v.prevStep, action.(*mat.VecDense),
Original file line number	Diff line number	Diff line change
Expand Up		@@ -712,3 +712,5 @@ sequential runs of hyperparameter setting `m` of the `Agent` in the
		* [ ] Add `TimeLimit` to `gym` package so that time limits can be altered

		* [ ] All input nodes should have unique names. Use `gop.Unique()`.

		* [ ] VAC still gets NaNs. Problem could be with Multi-dim actions in GaussianTreeMLP. Does VPG also get NaNs??