Skip to content

Commit

Permalink
Added missing Python docs
Browse files Browse the repository at this point in the history
  • Loading branch information
Eugenio Bargiacchi committed Apr 6, 2016
1 parent af1ec0c commit 864548b
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 9 deletions.
73 changes: 70 additions & 3 deletions src/Python/MDP/Policies/EpsilonPolicy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,76 @@ void exportEpsilonPolicy() {
using namespace AIToolbox::MDP;
using namespace boost::python;

class_<EpsilonPolicy, bases<AIToolbox::PolicyInterface<size_t>>>{"EpsilonPolicy", init<const AIToolbox::PolicyInterface<size_t> &, optional<double>>()}
.def("setEpsilon", &EpsilonPolicy::setEpsilon)
.def("getEpsilon", &EpsilonPolicy::getEpsilon);
class_<EpsilonPolicy, bases<AIToolbox::PolicyInterface<size_t>>>{"EpsilonPolicy",

"This class is a policy wrapper for epsilon action choice.\n"
"\n"
"This class is used to wrap already existing policies to implement\n"
"automatic exploratory behaviour (e.g. epsilon-greedy policies).\n"
"\n"
"An epsilon-greedy policy is a policy that takes a greedy action a\n"
"certain percentage of the time, and otherwise takes a random action.\n"
"They are useful to force the agent to explore an unknown model, in order\n"
"to gain new information to refine it and thus gain more reward.\n"
"\n"
"Please note that to obtain an epsilon-greedy policy the wrapped\n"
"policy needs to already be greedy with respect to the model.", no_init}

.def(init<const AIToolbox::PolicyInterface<size_t> &, optional<double>>(
"Basic constructor.\n"
"\n"
"This constructor saves the input policy and the epsilon\n"
"parameter for later use.\n"
"\n"
"The epsilon parameter must be >= 0.0 and <= 1.0,\n"
"otherwise the constructor will throw an std::invalid_argument.\n"
"\n"
"@param p The policy that is being extended.\n"
"@param epsilon The parameter that controls the amount of exploration."
, (arg("self"), "p", "epsilon")))

.def("sampleAction", &EpsilonPolicy::sampleAction,
"This function chooses a random action for state s, following the policy distribution and epsilon.\n"
"\n"
"This function has a probability of (1 - epsilon) of selecting\n"
"a random action. Otherwise, it selects an action according\n"
"to the distribution specified by the wrapped policy.\n"
"\n"
"@param s The sampled state of the policy.\n"
"\n"
"@return The chosen action."
, (arg("self"), "s"))

.def("getActionProbability", &EpsilonPolicy::getActionProbability,
"This function returns the probability of taking the specified action in the specified state.\n"
"\n"
"This function takes into account parameter epsilon\n"
"while computing the final probability.\n"
"\n"
"@param s The selected state.\n"
"@param a The selected action.\n"
"\n"
"@return The probability of taking the selected action in the specified state."
, (arg("self"), "s", "a"))

.def("setEpsilon", &EpsilonPolicy::setEpsilon,
"This function sets the epsilon parameter.\n"
"\n"
"The epsilon parameter determines the amount of exploration this\n"
"policy will enforce when selecting actions. In particular\n"
"actions are going to selected randomly with probability\n"
"(1-epsilon), and are going to be selected following the\n"
"underlying policy with probability epsilon.\n"
"\n"
"The epsilon parameter must be >= 0.0 and <= 1.0,\n"
"otherwise the function will do throw std::invalid_argument.\n"
"\n"
"@param e The new epsilon parameter."
, (arg("self"), "e"))

.def("getEpsilon", &EpsilonPolicy::getEpsilon,
"This function will return the currently set epsilon parameter."
, (arg("self")));
}


52 changes: 49 additions & 3 deletions src/Python/MDP/Policies/Policy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,55 @@ void exportPolicy() {
using namespace AIToolbox::MDP;
using namespace boost::python;

class_<Policy, bases<AIToolbox::PolicyInterface<size_t>>>{"Policy", init<const AIToolbox::PolicyInterface<size_t> &>()}
.def(init<size_t, size_t, const ValueFunction &>())
.def("getPolicyTable", &Policy::getPolicyTable, return_internal_reference<>());
class_<Policy, bases<AIToolbox::PolicyInterface<size_t>>>{"Policy",

"This class represents an MDP Policy.\n"
"\n"
"This class is one of the many ways to represent an MDP Policy. In\n"
"particular, it maintains a 2 dimensional table of probabilities\n"
"determining the probability of choosing an action in a given state.\n"
"\n"
"The class offers facilities to sample from these distributions, so\n"
"that you can directly embed it into a decision-making process.\n"
"\n"
"Building this object is somewhat expensive, so it should be done\n"
"mostly when it is known that the final solution won't change again.\n"
"Otherwise you may want to build a wrapper around some data to\n"
"extract the policy dynamically.", no_init}

.def(init<const AIToolbox::PolicyInterface<size_t> &>(
"Basic constructor.\n"
"\n"
"This constructor simply copies policy probability values\n"
"from any other compatible PolicyInterface, and stores them\n"
"internally. This is probably the main way you may want to use\n"
"this class.\n"
"\n"
"This may be a useful thing to do in case the policy that is\n"
"being copied is very costly to use (for example, QGreedyPolicy)\n"
"and it is known that it will not change anymore.\n"
"\n"
"@param p The policy which is being copied."
, (arg("self"), "p")))

.def(init<size_t, size_t, const ValueFunction &>(
"Basic constructor.\n"
"\n"
"This constructor copies the implied policy contained in a ValueFunction.\n"
"Keep in mind that the policy stored within a ValueFunction is\n"
"non-stochastic in nature, since for each state it can only\n"
"save a single action.\n"
"\n"
"@param s The number of states of the world.\n"
"@param a The number of actions available to the agent.\n"
"@param v The ValueFunction used as a basis for the Policy."
, (arg("self"), "s", "a", "v")))

.def("getPolicyTable", &Policy::getPolicyTable, return_internal_reference<>(),
"This function enables inspection of the internal policy.\n"
"\n"
"@return A constant reference to the internal policy."
, (arg("self")));
}


14 changes: 12 additions & 2 deletions src/Python/MDP/Policies/QGreedyPolicy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,16 @@ void exportQGreedyPolicy() {
using namespace AIToolbox::MDP;
using namespace boost::python;

class_<QGreedyPolicy, bases<QPolicyInterface>>{"QGreedyPolicy", init<const QFunction &>()};
}
class_<QGreedyPolicy, bases<QPolicyInterface>>{"QGreedyPolicy",

"This class models a greedy policy through a QFunction.\n"
"\n"
"This class allows you to select effortlessly the best greedy actions\n"
"from a given QFunction.", no_init}

.def(init<const QFunction &>(
"Basic constructor.\n"
"\n"
"@param q The QFunction this policy is linked with."
, (arg("self"), "q")));
}
12 changes: 11 additions & 1 deletion src/Python/MDP/Policies/QPolicyInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,17 @@ void exportQPolicyInterface() {

using P = AIToolbox::PolicyInterface<size_t>;

class_<QPolicyInterface, bases<P>, boost::noncopyable>{"QPolicyInterface", no_init};
class_<QPolicyInterface, bases<P>, boost::noncopyable>{"QPolicyInterface",
"This class is an interface to specify a policy through a QFunction.\n"
"\n"
"This class provides a way to sample actions without the\n"
"need to compute a full Policy from a QFunction. This is useful\n"
"because often many methods need to modify small parts of a Qfunction\n"
"for progressive improvement, and computing a full Policy at each\n"
"step can become too expensive to do.\n"
"\n"
"The type of policy obtained from such sampling is left to the implementation,\n"
"since there are many ways in which such a policy may be formed.", no_init};
}


Expand Down

0 comments on commit 864548b

Please sign in to comment.