maxent: OOP

xuh5156 · Jul 7, 2013 · c822611 · c822611
1 parent 410a2a0
commit c822611
Show file tree

Hide file tree

Showing 18 changed files with 758 additions and 506 deletions.
diff --git a/mltk/.DS_Store b/mltk/.DS_Store
diff --git a/mltk/common/.vocabulary.h.swp b/mltk/common/.vocabulary.h.swp
diff --git a/mltk/common/CMakeLists.txt b/mltk/common/CMakeLists.txt
@@ -2,12 +2,14 @@
 SET(LIBRARY_OUTPUT_PATH ${MLTK_SOURCE_DIR}/lib)
 SET(EXECUTABLE_OUTPUT_PATH ${MLTK_SOURCE_DIR}/bin/mltk/common)
 
-ADD_LIBRARY(model_data SHARED model_data.cc)
-SET_TARGET_PROPERTIES(model_data PROPERTIES CLEAN_DIRECT_OUTPUT 1)
+SET(SRC_LIST model_data.cc)
 
-ADD_LIBRARY(model_data_static STATIC model_data.cc)
-SET_TARGET_PROPERTIES(model_data_static PROPERTIES OUTPUT_NAME "model_data")
-SET_TARGET_PROPERTIES(model_data_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
+ADD_LIBRARY(mltk_common SHARED ${SRC_LIST})
+SET_TARGET_PROPERTIES(mltk_common PROPERTIES CLEAN_DIRECT_OUTPUT 1)
+
+ADD_LIBRARY(mltk_common_static STATIC ${SRC_LIST})
+SET_TARGET_PROPERTIES(mltk_common_static PROPERTIES OUTPUT_NAME "mltk_common")
+SET_TARGET_PROPERTIES(mltk_common_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
 
 IF (test)
     INCLUDE_DIRECTORIES($ENV{GTEST_ROOT}/include)

diff --git a/mltk/common/model_data.cc b/mltk/common/model_data.cc
@@ -114,6 +114,47 @@ void ModelData::FormatInstance(const Instance& instance,
   }
 }
 
+int32_t ModelData::CalcConditionalProbability(
+    const MemInstance& mem_instance, std::vector<double>* prob_dist) const {
+  std::vector<double> powv(NumClasses(), 0.0);
+
+  for (MemInstance::ConstIterator citer(mem_instance);
+       !citer.Done(); citer.Next()) {
+    const std::vector<int32_t>& feature_ids = FeatureIds(citer.FeatureNameId());
+    for (size_t i = 0; i < feature_ids.size(); ++i) {
+      const int32_t feature_id = feature_ids[i];
+      powv[FeatureAt(feature_id).LabelId()]
+          += lambdas_[feature_id] * citer.FeatureValue();
+    }
+  }
+
+  std::vector<double>::const_iterator pmax
+      = max_element(powv.begin(), powv.end());
+  double sum = 0.0;
+  double offset = std::max(0.0, *pmax - 700);  // to avoid overflow
+  for (int32_t label_id = 0; label_id < NumClasses(); ++label_id) {
+    double pow_value = powv[label_id] - offset;
+    double prod = exp(pow_value);  // exp(w * x)
+    assert(prod != 0);
+
+    (*prob_dist)[label_id] = prod;
+    sum += prod;
+  }
+
+  int32_t max_label = 0;
+  if (sum > 0.0) {
+    for (int32_t label_id = 0; label_id < NumClasses(); ++label_id) {
+      (*prob_dist)[label_id] /= sum;
+      if ((*prob_dist)[label_id] > (*prob_dist)[max_label]) {
+        max_label = label_id;
+      }
+    }
+  }
+  assert(max_label >= 0);
+
+  return max_label;
+}
+
 }  // namespace common
 }  // namespace mltk
 
diff --git a/mltk/common/model_data.h b/mltk/common/model_data.h
@@ -101,6 +101,9 @@ class ModelData {
     return num_active;
   }
 
+  int32_t CalcConditionalProbability(const MemInstance& mem_instance,
+                                     std::vector<double>* prob_dist) const;
+
  private:
   void InitAllFeatures() {
     for (int32_t feature_name_id = 0;

diff --git a/mltk/maxent/CMakeLists.txt b/mltk/maxent/CMakeLists.txt
@@ -2,16 +2,16 @@
 SET(LIBRARY_OUTPUT_PATH ${MLTK_SOURCE_DIR}/lib)
 SET(EXECUTABLE_OUTPUT_PATH ${MLTK_SOURCE_DIR}/bin/mltk/maxent)
 
-SET(SRC_LIST maxent.cc lbfgs.cc owlqn.cc sgd.cc)
+SET(SRC_LIST maxent.cc optimizer.cc lbfgs.cc owlqn.cc sgd.cc)
 
 ADD_LIBRARY(maxent SHARED ${SRC_LIST})
 SET_TARGET_PROPERTIES(maxent PROPERTIES CLEAN_DIRECT_OUTPUT 1)
-TARGET_LINK_LIBRARIES(maxent model_data)
+TARGET_LINK_LIBRARIES(maxent mltk_common)
 
 ADD_LIBRARY(maxent_static STATIC ${SRC_LIST})
 SET_TARGET_PROPERTIES(maxent_static PROPERTIES OUTPUT_NAME "maxent")
 SET_TARGET_PROPERTIES(maxent_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
-TARGET_LINK_LIBRARIES(maxent_static model_data)
+TARGET_LINK_LIBRARIES(maxent_static mltk_common)
 
 IF (test)
     INCLUDE_DIRECTORIES($ENV{GTEST_ROOT}/include)

diff --git a/mltk/maxent/lbfgs.cc b/mltk/maxent/lbfgs.cc
@@ -1,23 +1,23 @@
 // Copyright (c) 2013 MLTK Project.
 // Author: Lifeng Wang ([email protected])
-//
-// Implementation of LBFGS algorithm.
-//
-// Pls refer to 'Jorge Nocedal, "Updating Quasi-Newton Matrices With Limited
-// Storage", Mathematics of Computation, 1980.'
 
-#include "mltk/maxent/maxent.h"
+#include "mltk/maxent/lbfgs.h"
 
+#include <assert.h>
 #include <math.h>
 #include <iostream>
 #include <vector>
 
 #include "mltk/common/double_vector.h"
+#include "mltk/common/instance.h"
+#include "mltk/common/model_data.h"
 
 namespace mltk {
 namespace maxent {
 
 using mltk::common::DoubleVector;
+using mltk::common::Instance;
+using mltk::common::ModelData;
 
 const static int32_t LBFGS_M = 10;
 const static double LINE_SEARCH_ALPHA = 0.1;
@@ -27,43 +27,29 @@ const static double LINE_SEARCH_BETA = 0.5;
 const static int32_t LBFGS_MAX_ITER = 300;
 const static double MIN_GRAD_NORM = 0.0001;
 
-DoubleVector ApproximateHg(const int32_t iter,
-                           const DoubleVector& grad,
-                           const DoubleVector s[],
-                           const DoubleVector y[],
-                           const double z[]) {
-  int32_t offset, bound;
-  if (iter <= LBFGS_M) {
-    offset = 0;
-    bound = iter;
-  }
-  else {
-    offset = iter - LBFGS_M;
-    bound = LBFGS_M;
+void LBFGS::EstimateParamater(const std::vector<Instance>& instances,
+                              int32_t num_heldout,
+                              ModelData* model_data) {
+  std::cerr << "performing LBFGS" << std::endl;
+  if (l1reg_ > 0) {
+    std::cerr << "error: L1 regularization is not supported in LBFGS,"
+        << "you can use OWLQN method instead." << std::endl;
+    exit(1);
   }
 
-  DoubleVector q = grad;
-  double alpha[LBFGS_M], beta[LBFGS_M];
-  for (int32_t i = bound - 1; i >= 0; --i) {
-    const int32_t j = (i + offset) % LBFGS_M;
-    alpha[i] = z[j] * DotProduct(s[j], q);
-    q += -alpha[i] * y[j];
-  }
-  if (iter > 0) {
-    const int32_t j = (iter - 1) % LBFGS_M;
-    const double gamma = ((1.0 / z[j]) / DotProduct(y[j], y[j]));
-    q *= gamma;
-  }
-  for (int32_t i = 0; i <= bound - 1; ++i) {
-    const int32_t j = (i + offset) % LBFGS_M;
-    beta[i] = z[j] * DotProduct(y[j], q);
-    q += s[j] * (alpha[i] - beta[i]);
+  InitFromInstances(instances, num_heldout, model_data);
+
+  const std::vector<double> lambdas = model_data_->Lambdas();
+  std::vector<double> x0(model_data_->NumFeatures());
+  for (int32_t i = 0; i < model_data_->NumFeatures(); ++i) {
+    x0[i] = lambdas[i];
   }
 
-  return q;
+  std::vector<double> x = PerformLBFGS(x0);
+  model_data_->UpdateLambdas(x);
 }
 
-std::vector<double> MaxEnt::PerformLBFGS(const std::vector<double>& x0) {
+std::vector<double> LBFGS::PerformLBFGS(const std::vector<double>& x0) {
   const size_t dim = x0.size();
   DoubleVector x(x0);
   DoubleVector grad(dim), dx(dim);
@@ -79,7 +65,7 @@ std::vector<double> MaxEnt::PerformLBFGS(const std::vector<double>& x0) {
         << ", obj(err) = " << f
         << ", accuracy = " << train_accuracy_ << std::endl;
 
-    if (heldout_.size() > 0) {
+    if (heldout_data_.size() > 0) {
       const double heldout_logl = CalcHeldoutLikelihood();
       std::cerr << "\theldout_logl(err) = " << -1 * heldout_logl
           << ", accuracy = " << heldout_accuracy_ << std::endl;
@@ -103,12 +89,48 @@ std::vector<double> MaxEnt::PerformLBFGS(const std::vector<double>& x0) {
   return x.STLVector();
 }
 
-double MaxEnt::BacktrackingLineSearch(const DoubleVector& x0,
-                                      const DoubleVector& grad0,
-                                      const double f0,
-                                      const DoubleVector& dx,
-                                      DoubleVector* x,
-                                      DoubleVector* grad1) {
+DoubleVector LBFGS::ApproximateHg(const int32_t iter,
+                                  const DoubleVector& grad,
+                                  const DoubleVector s[],
+                                  const DoubleVector y[],
+                                  const double z[]) {
+  int32_t offset, bound;
+  if (iter <= LBFGS_M) {
+    offset = 0;
+    bound = iter;
+  }
+  else {
+    offset = iter - LBFGS_M;
+    bound = LBFGS_M;
+  }
+
+  DoubleVector q = grad;
+  double alpha[LBFGS_M], beta[LBFGS_M];
+  for (int32_t i = bound - 1; i >= 0; --i) {
+    const int32_t j = (i + offset) % LBFGS_M;
+    alpha[i] = z[j] * DotProduct(s[j], q);
+    q += -alpha[i] * y[j];
+  }
+  if (iter > 0) {
+    const int32_t j = (iter - 1) % LBFGS_M;
+    const double gamma = ((1.0 / z[j]) / DotProduct(y[j], y[j]));
+    q *= gamma;
+  }
+  for (int32_t i = 0; i <= bound - 1; ++i) {
+    const int32_t j = (i + offset) % LBFGS_M;
+    beta[i] = z[j] * DotProduct(y[j], q);
+    q += s[j] * (alpha[i] - beta[i]);
+  }
+
+  return q;
+}
+
+double LBFGS::BacktrackingLineSearch(const DoubleVector& x0,
+                                     const DoubleVector& grad0,
+                                     const double f0,
+                                     const DoubleVector& dx,
+                                     DoubleVector* x,
+                                     DoubleVector* grad1) {
   double t = 1.0 / LINE_SEARCH_BETA;
   double f;
 

diff --git a/mltk/maxent/lbfgs.h b/mltk/maxent/lbfgs.h
@@ -0,0 +1,56 @@
+// Copyright (c) 2013 MLTK Project.
+// Author: Lifeng Wang ([email protected])
+//
+// Implementation of LBFGS algorithm.
+//
+// Pls refer to 'Jorge Nocedal, "Updating Quasi-Newton Matrices With Limited
+// Storage", Mathematics of Computation, 1980.'
+
+#ifndef MLTK_MAXENT_LBFGS_H_
+#define MLTK_MAXENT_LBFGS_H_
+
+#include "mltk/maxent/optimizer.h"
+
+#include <vector>
+
+namespace mltk {
+
+namespace common {
+class DoubleVector;
+class Instance;
+class ModelData;
+}  // namespace common
+
+namespace maxent {
+
+class LBFGS : public Optimizer {
+ public:
+  LBFGS() {}
+  virtual ~LBFGS() {}
+
+  virtual void EstimateParamater(const std::vector<common::Instance>& instances,
+                                 int32_t num_heldout,
+                                 common::ModelData* model_data);
+
+ private:
+  std::vector<double> PerformLBFGS(const std::vector<double>& x0);
+
+  common::DoubleVector ApproximateHg(const int32_t iter,
+                                     const common::DoubleVector& grad,
+                                     const common::DoubleVector s[],
+                                     const common::DoubleVector y[],
+                                     const double z[]);
+
+  double BacktrackingLineSearch(const common::DoubleVector& x0,
+                                const common::DoubleVector& grad0,
+                                const double f0,
+                                const common::DoubleVector& dx,
+                                common::DoubleVector* x,
+                                common::DoubleVector* grad1);
+};
+
+}  // namespace maxent
+}  // namespace mltk
+
+#endif  // MLTK_MAXENT_LBFGS_H_
+