Added missing Python docs

gaperez64 · Apr 6, 2016 · 864548b · 864548b
1 parent af1ec0c
commit 864548b
Show file tree

Hide file tree

Showing 4 changed files with 142 additions and 9 deletions.
diff --git a/src/Python/MDP/Policies/EpsilonPolicy.cpp b/src/Python/MDP/Policies/EpsilonPolicy.cpp
@@ -6,9 +6,76 @@ void exportEpsilonPolicy() {
     using namespace AIToolbox::MDP;
     using namespace boost::python;
 
-    class_<EpsilonPolicy, bases<AIToolbox::PolicyInterface<size_t>>>{"EpsilonPolicy", init<const AIToolbox::PolicyInterface<size_t> &, optional<double>>()}
-        .def("setEpsilon",              &EpsilonPolicy::setEpsilon)
-        .def("getEpsilon",              &EpsilonPolicy::getEpsilon);
+    class_<EpsilonPolicy, bases<AIToolbox::PolicyInterface<size_t>>>{"EpsilonPolicy", 
+
+         "This class is a policy wrapper for epsilon action choice.\n"
+         "\n"
+         "This class is used to wrap already existing policies to implement\n"
+         "automatic exploratory behaviour (e.g. epsilon-greedy policies).\n"
+         "\n"
+         "An epsilon-greedy policy is a policy that takes a greedy action a\n"
+         "certain percentage of the time, and otherwise takes a random action.\n"
+         "They are useful to force the agent to explore an unknown model, in order\n"
+         "to gain new information to refine it and thus gain more reward.\n"
+         "\n"
+         "Please note that to obtain an epsilon-greedy policy the wrapped\n"
+         "policy needs to already be greedy with respect to the model.", no_init}
+
+        .def(init<const AIToolbox::PolicyInterface<size_t> &, optional<double>>(
+             "Basic constructor.\n"
+             "\n"
+             "This constructor saves the input policy and the epsilon\n"
+             "parameter for later use.\n"
+             "\n"
+             "The epsilon parameter must be >= 0.0 and <= 1.0,\n"
+             "otherwise the constructor will throw an std::invalid_argument.\n"
+             "\n"
+             "@param p The policy that is being extended.\n"
+             "@param epsilon The parameter that controls the amount of exploration."
+        , (arg("self"), "p", "epsilon")))
+
+        .def("sampleAction",            &EpsilonPolicy::sampleAction,
+             "This function chooses a random action for state s, following the policy distribution and epsilon.\n"
+             "\n"
+             "This function has a probability of (1 - epsilon) of selecting\n"
+             "a random action. Otherwise, it selects an action according\n"
+             "to the distribution specified by the wrapped policy.\n"
+             "\n"
+             "@param s The sampled state of the policy.\n"
+             "\n"
+             "@return The chosen action."
+        , (arg("self"), "s"))
+
+        .def("getActionProbability",    &EpsilonPolicy::getActionProbability,
+             "This function returns the probability of taking the specified action in the specified state.\n"
+             "\n"
+             "This function takes into account parameter epsilon\n"
+             "while computing the final probability.\n"
+             "\n"
+             "@param s The selected state.\n"
+             "@param a The selected action.\n"
+             "\n"
+             "@return The probability of taking the selected action in the specified state."
+        , (arg("self"), "s", "a"))
+
+        .def("setEpsilon",              &EpsilonPolicy::setEpsilon,
+             "This function sets the epsilon parameter.\n"
+             "\n"
+             "The epsilon parameter determines the amount of exploration this\n"
+             "policy will enforce when selecting actions. In particular\n"
+             "actions are going to selected randomly with probability\n"
+             "(1-epsilon), and are going to be selected following the\n"
+             "underlying policy with probability epsilon.\n"
+             "\n"
+             "The epsilon parameter must be >= 0.0 and <= 1.0,\n"
+             "otherwise the function will do throw std::invalid_argument.\n"
+             "\n"
+             "@param e The new epsilon parameter."
+        , (arg("self"), "e"))
+
+        .def("getEpsilon",              &EpsilonPolicy::getEpsilon,
+             "This function will return the currently set epsilon parameter."
+        , (arg("self")));
 }
 
 
diff --git a/src/Python/MDP/Policies/Policy.cpp b/src/Python/MDP/Policies/Policy.cpp
@@ -6,9 +6,55 @@ void exportPolicy() {
     using namespace AIToolbox::MDP;
     using namespace boost::python;
 
-    class_<Policy, bases<AIToolbox::PolicyInterface<size_t>>>{"Policy", init<const AIToolbox::PolicyInterface<size_t> &>()}
-        .def(init<size_t, size_t, const ValueFunction &>())
-        .def("getPolicyTable",      &Policy::getPolicyTable, return_internal_reference<>());
+    class_<Policy, bases<AIToolbox::PolicyInterface<size_t>>>{"Policy", 
+
+         "This class represents an MDP Policy.\n"
+         "\n"
+         "This class is one of the many ways to represent an MDP Policy. In\n"
+         "particular, it maintains a 2 dimensional table of probabilities\n"
+         "determining the probability of choosing an action in a given state.\n"
+         "\n"
+         "The class offers facilities to sample from these distributions, so\n"
+         "that you can directly embed it into a decision-making process.\n"
+         "\n"
+         "Building this object is somewhat expensive, so it should be done\n"
+         "mostly when it is known that the final solution won't change again.\n"
+         "Otherwise you may want to build a wrapper around some data to\n"
+         "extract the policy dynamically.", no_init}
+
+        .def(init<const AIToolbox::PolicyInterface<size_t> &>(
+                 "Basic constructor.\n"
+                 "\n"
+                 "This constructor simply copies policy probability values\n"
+                 "from any other compatible PolicyInterface, and stores them\n"
+                 "internally. This is probably the main way you may want to use\n"
+                 "this class.\n"
+                 "\n"
+                 "This may be a useful thing to do in case the policy that is\n"
+                 "being copied is very costly to use (for example, QGreedyPolicy)\n"
+                 "and it is known that it will not change anymore.\n"
+                 "\n"
+                 "@param p The policy which is being copied."
+        , (arg("self"), "p")))
+
+        .def(init<size_t, size_t, const ValueFunction &>(
+                 "Basic constructor.\n"
+                 "\n"
+                 "This constructor copies the implied policy contained in a ValueFunction.\n"
+                 "Keep in mind that the policy stored within a ValueFunction is\n"
+                 "non-stochastic in nature, since for each state it can only\n"
+                 "save a single action.\n"
+                 "\n"
+                 "@param s The number of states of the world.\n"
+                 "@param a The number of actions available to the agent.\n"
+                 "@param v The ValueFunction used as a basis for the Policy."
+        , (arg("self"), "s", "a", "v")))
+
+        .def("getPolicyTable",      &Policy::getPolicyTable, return_internal_reference<>(),
+                 "This function enables inspection of the internal policy.\n"
+                 "\n"
+                 "@return A constant reference to the internal policy."
+        , (arg("self")));
 }
 
 
diff --git a/src/Python/MDP/Policies/QGreedyPolicy.cpp b/src/Python/MDP/Policies/QGreedyPolicy.cpp
@@ -6,6 +6,16 @@ void exportQGreedyPolicy() {
     using namespace AIToolbox::MDP;
     using namespace boost::python;
 
-    class_<QGreedyPolicy, bases<QPolicyInterface>>{"QGreedyPolicy", init<const QFunction &>()};
-}
+    class_<QGreedyPolicy, bases<QPolicyInterface>>{"QGreedyPolicy",
+
+         "This class models a greedy policy through a QFunction.\n"
+         "\n"
+         "This class allows you to select effortlessly the best greedy actions\n"
+         "from a given QFunction.", no_init}
 
+        .def(init<const QFunction &>(
+                 "Basic constructor.\n"
+                 "\n"
+                 "@param q The QFunction this policy is linked with."
+        , (arg("self"), "q")));
+}
diff --git a/src/Python/MDP/Policies/QPolicyInterface.cpp b/src/Python/MDP/Policies/QPolicyInterface.cpp
@@ -8,7 +8,17 @@ void exportQPolicyInterface() {
 
     using P = AIToolbox::PolicyInterface<size_t>;
 
-    class_<QPolicyInterface, bases<P>, boost::noncopyable>{"QPolicyInterface", no_init};
+    class_<QPolicyInterface, bases<P>, boost::noncopyable>{"QPolicyInterface",
+         "This class is an interface to specify a policy through a QFunction.\n"
+         "\n"
+         "This class provides a way to sample actions without the\n"
+         "need to compute a full Policy from a QFunction. This is useful\n"
+         "because often many methods need to modify small parts of a Qfunction\n"
+         "for progressive improvement, and computing a full Policy at each\n"
+         "step can become too expensive to do.\n"
+         "\n"
+         "The type of policy obtained from such sampling is left to the implementation,\n"
+         "since there are many ways in which such a policy may be formed.", no_init};
 }