Skip to content
This repository has been archived by the owner on Nov 10, 2023. It is now read-only.

Commit

Permalink
Offset categorical probabilities
Browse files Browse the repository at this point in the history
Offset the categorical probabilities by a small positive constant for
numerical stability.
  • Loading branch information
samuelfneumann committed Sep 24, 2021
1 parent 30b5bd6 commit 3babb20
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 2 deletions.
6 changes: 6 additions & 0 deletions agent/nonlinear/continuous/policy/CategoricalMLP.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ import (
"gorgonia.org/tensor"
)

const minProb float64 = 1e-5

// CategoricalMLP implements a categorical policy using an MLP to
// predict action logits in each state. Given an environment with N
// actions in each state, the probabilities of selecting any action
Expand Down Expand Up @@ -117,6 +119,10 @@ func NewCategoricalMLP(env environment.Environment, batchForLogProb int,
logits = G.Must(G.BroadcastSub(logits, max, nil, []byte{1}))
probs := G.Must(G.Exp(logits))

// Offset the probabilities
offset := G.NewConstant(minProb, G.WithShape())
probs = G.Must(G.Add(probs, offset))

// Compute the log probability of actions that are input by an
// external source using the LogProbOf() method.
actionIndices := G.NewMatrix(
Expand Down
4 changes: 2 additions & 2 deletions expconfig/VPG_Categorical_Cartpole.json
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
"Beta1": 0.9,
"Beta2": 0.999,
"Batch": 1,
"Clip": -1.0
"Clip": 0.5
}
}
],
Expand All @@ -89,7 +89,7 @@
"Beta1": 0.9,
"Beta2": 0.999,
"Batch": 1,
"Clip": -1.0
"Clip": 0.5
}
}
],
Expand Down

0 comments on commit 3babb20

Please sign in to comment.