Merge branch 'master' into master

stevenlu86 · May 24, 2016 · 92071ff · 92071ff
2 parents 9774499 + d3aa940
commit 92071ff
Show file tree

Hide file tree

Showing 14 changed files with 385 additions and 229 deletions.
diff --git a/test/RunTests b/test/RunTests
@@ -1164,7 +1164,7 @@ echo "" | {VW}
 
 # Test 68: Search for multiclass classification
 {VW} -k -c -d train-sets/multiclass.sch --passes 20 --search_task multiclasstask --search 10 --search_alpha 1e-4 --holdout_off
-	train-sets/ref/search_multiclass.stderr
+    train-sets/ref/search_multiclass.stderr
 
 # Test 69: (see Test 43/Test 44): search sequence labeling, with selective branching
 {VW} -d train-sets/sequence_data -t -i models/sequence_data.model -p sequence_data.nonldf.beam.test.predict --search_metatask selective_branching --search_max_branch 10 --search_kbest 10
@@ -1429,3 +1429,7 @@ printf '3 |f a b c |e x y z\n2 |f a y c |e x\n' | \
 {VW} train-sets/cb_eval2 -i mwt.model -p cb_eval2.preds
     train-sets/ref/cb_eval2.stderr
     pred-sets/ref/cb_eval2.preds
+
+# Test 125: arc-eager trasition-based dependency parser
+{VW} -k -c -d train-sets/wsj_small.dparser.vw.gz -b 20 --search_task dep_parser --search 26 --search_alpha 1e-5 --search_rollin mix_per_roll --search_rollout oracle --one_learner --search_history_length 3 --root_label 8 --transition_system 2 --passes 8
+    train-sets/ref/search_dep_parser_arceager.stderr
diff --git a/test/pred-sets/ref/cb_eval.preds b/test/pred-sets/ref/cb_eval.preds
@@ -1,8 +0,0 @@
-0 0
-0 1
-0 0.666667
-0 1
-0 0.8
-0 1
-0 0.857143
-0 1

diff --git a/test/pred-sets/ref/cb_eval1.preds b/test/pred-sets/ref/cb_eval1.preds
@@ -1,4 +0,0 @@
-0 0
-0 1
-0 0.666667
-0 1

diff --git a/test/pred-sets/ref/cb_eval2.preds b/test/pred-sets/ref/cb_eval2.preds
@@ -1,4 +0,0 @@
-0 0.8
-0 1
-0 0.857143
-0 1

diff --git a/test/train-sets/ref/cb_eval.stderr b/test/train-sets/ref/cb_eval.stderr
@@ -8,15 +8,11 @@ Reading datafile = train-sets/cb_eval
 num sources = 1
 average  since         example        example  current  current  current
 loss     last          counter         weight    label  predict features
-0.000000 0.000000            1            1.0  unknown        0        3
-0.000000 0.000000            2            2.0  unknown        0        3
-0.000000 0.000000            4            4.0  unknown        0        3
-0.000000 0.000000            8            8.0  unknown        0        3
 
 finished run
-number of examples per pass = 8
+number of examples per pass = 0
 passes used = 1
-weighted example sum = 8.000000
-weighted label sum = 0.000000
-average loss = 0.000000
-total feature number = 24
+weighted example sum = 0
+weighted label sum = 0
+average loss = -nan
+total feature number = 0
diff --git a/test/train-sets/ref/cb_eval1.stderr b/test/train-sets/ref/cb_eval1.stderr
@@ -9,14 +9,11 @@ Reading datafile = train-sets/cb_eval1
 num sources = 1
 average  since         example        example  current  current  current
 loss     last          counter         weight    label  predict features
-0.000000 0.000000            1            1.0  unknown        0        3
-0.000000 0.000000            2            2.0  unknown        0        3
-0.000000 0.000000            4            4.0  unknown        0        3
 
 finished run
-number of examples per pass = 4
+number of examples per pass = 0
 passes used = 1
-weighted example sum = 4.000000
-weighted label sum = 0.000000
-average loss = 0.000000
-total feature number = 12
+weighted example sum = 0
+weighted label sum = 0
+average loss = -nan
+total feature number = 0
diff --git a/test/train-sets/ref/cb_eval2.stderr b/test/train-sets/ref/cb_eval2.stderr
@@ -8,14 +8,11 @@ Reading datafile = train-sets/cb_eval2
 num sources = 1
 average  since         example        example  current  current  current
 loss     last          counter         weight    label  predict features
-0.000000 0.000000            1            1.0  unknown        0        3
-0.000000 0.000000            2            2.0  unknown        0        3
-0.000000 0.000000            4            4.0  unknown        0        3
 
 finished run
-number of examples per pass = 4
+number of examples per pass = 0
 passes used = 1
-weighted example sum = 4.000000
-weighted label sum = 0.000000
-average loss = 0.000000
-total feature number = 12
+weighted example sum = 0
+weighted label sum = 0
+average loss = -nan
+total feature number = 0
diff --git a/test/train-sets/ref/search_dep_parser.stderr b/test/train-sets/ref/search_dep_parser.stderr
@@ -9,14 +9,14 @@ num sources = 1
 average    since      instance            current true      current predicted   cur   cur   predic    cache  examples          
 loss       last        counter           output prefix          output prefix  pass   pol     made     hits    gener  beta    
 88.000000  88.000000         1  [43:1 5:2 5:2 5:2 1..] [0:8 1:1 2:1 3:1 4:..]     0     0      144        0      144  0.014199
-48.000000  8.000000          2  [2:2 3:5 0:8 3:7 3:4 ] [0:8 1:4 2:4 3:4 1:4 ]     0     0      157        0      156  0.015381
-34.000000  20.000000         4  [2:2 3:5 0:8 3:7 3:4 ] [2:2 3:5 0:8 3:4 3:3 ]     1     0      319        0      312  0.030623
-25.500000  17.000000         8  [2:2 3:5 0:8 3:7 3:4 ] [2:2 3:2 0:8 3:2 3:3 ]     3     0      642        0      624  0.060402
+48.000000  8.000000          2  [2:2 3:5 0:8 3:7 3:4 ] [0:8 1:1 2:1 3:3 1:4 ]     0     0      157        0      156  0.015381
+28.750000  9.500000          4  [2:2 3:5 0:8 3:7 3:4 ] [2:2 3:5 0:8 3:7 3:4 ]     1     0      319        0      312  0.030623
+14.625000  0.500000          8  [2:2 3:5 0:8 3:7 3:4 ] [2:2 3:5 0:8 3:7 3:4 ]     3     0      642        0      624  0.060402
 
 finished run
 number of examples per pass = 2
 passes used = 6
 weighted example sum = 12
 weighted label sum = 0
-average loss = 22.4167
-total feature number = 275075
+average loss = 9.91667
+total feature number = 275092
diff --git a/test/train-sets/ref/search_dep_parser_arceager.stderr b/test/train-sets/ref/search_dep_parser_arceager.stderr
@@ -0,0 +1,22 @@
+Num weight bits = 20
+learning rate = 0.5
+initial_t = 0
+power_t = 0.5
+decay_learning_rate = 1
+creating cache_file = train-sets/wsj_small.dparser.vw.gz.cache
+Reading datafile = train-sets/wsj_small.dparser.vw.gz
+num sources = 1
+average    since      instance            current true      current predicted   cur   cur   predic    cache  examples          
+loss       last        counter           output prefix          output prefix  pass   pol     made     hits    gener  beta    
+97.000000  97.000000         1  [43:1 5:2 5:2 5:2 1..] [0:1 0:1 0:1 0:1 0:..]     0     0       97        0       97  0.000960
+52.500000  8.000000          2  [2:2 3:5 0:8 3:7 3:4 ] [2:2 0:8 4:2 2:3 2:4 ]     0     0      106        0      106  0.001049
+26.250000  0.000000          4  [2:2 3:5 0:8 3:7 3:4 ] [2:2 3:5 0:8 3:7 3:4 ]     1     0      115        0      212  0.002108
+13.125000  0.000000          8  [2:2 3:5 0:8 3:7 3:4 ] [2:2 3:5 0:8 3:7 3:4 ]     3     0      124        0      424  0.004221
+
+finished run
+number of examples per pass = 2
+passes used = 4
+weighted example sum = 8
+weighted label sum = 0
+average loss = undefined (no holdout)
+total feature number = 32302
diff --git a/test/train-sets/ref/search_dep_parser_cost_to_go.stderr b/test/train-sets/ref/search_dep_parser_cost_to_go.stderr
@@ -11,12 +11,12 @@ loss       last        counter           output prefix          output prefix  p
 88.000000  88.000000         1  [43:1 5:2 5:2 5:2 1..] [0:8 1:1 2:1 3:1 4:..]     0     0      144        0      144  0.000000
 47.500000  7.000000          2  [2:2 3:5 0:8 3:7 3:4 ] [2:2 0:8 2:4 2:4 2:4 ]     0     0      156        0      156  0.001439
 29.250000  11.000000         4  [2:2 3:5 0:8 3:7 3:4 ] [2:2 3:5 0:8 3:7 3:4 ]     1     0      312        0      312  0.002996
-15.875000  2.500000          8  [2:2 3:5 0:8 3:7 3:4 ] [2:2 3:5 0:8 3:7 3:4 ]     3     0      624        0      624  0.006101
+16.375000  3.500000          8  [2:2 3:5 0:8 3:7 3:4 ] [2:2 3:5 0:8 3:7 3:4 ]     3     0      624        0      624  0.006101
 
 finished run
 number of examples per pass = 2
 passes used = 6
 weighted example sum = 12
 weighted label sum = 0
-average loss = 10.5833
-total feature number = 276589
+average loss = 10.9167
+total feature number = 276631
diff --git a/vowpalwabbit/gd.cc b/vowpalwabbit/gd.cc
@@ -968,6 +968,8 @@ base_learner* setup(vw& all)
     stride = set_learn<true>(all, feature_mask_off, g);
   else
     stride = set_learn<false>(all, feature_mask_off, g);
+  if (!all.training)
+    stride = 1;
   all.reg.stride_shift = (uint32_t)ceil_log_2(stride-1);
 
   learner<gd>& ret = init_learner(&g, g.learn, ((uint64_t)1 << all.reg.stride_shift));

diff --git a/vowpalwabbit/mwt.cc b/vowpalwabbit/mwt.cc
@@ -26,6 +26,7 @@ namespace MWT {
     v_array<uint64_t> policies;
     double total;
     uint32_t num_classes;
+    bool learn;
 
     v_array<namespace_index> indices;// excluded namespaces
     features feature_space[256];
@@ -149,19 +150,22 @@ namespace MWT {
 
   void finish_example(vw& all, mwt& c, example& ec)
   {
-    float loss = 0.;
-    if (c.observation != nullptr)
-      loss = get_unbiased_cost(c.observation, ec.pred.scalars[0]);
-
-    all.sd->update(ec.test_only, loss, 1.f, ec.num_features);
-
-    for (int sink : all.final_prediction_sink)
-      print_scalars(sink, ec.pred.scalars, ec.tag);
-
-    v_array<float> temp = ec.pred.scalars;
-    ec.pred.multiclass = (uint32_t)temp[0];
-    CB::print_update(all, c.observation != nullptr, ec, nullptr, false);
-    ec.pred.scalars = temp;
+    if (c.learn)
+      {
+	float loss = 0.;
+	if (c.observation != nullptr)
+	  loss = get_unbiased_cost(c.observation, ec.pred.scalars[0]);
+
+	all.sd->update(ec.test_only, loss, 1.f, ec.num_features);
+
+	for (int sink : all.final_prediction_sink)
+	  print_scalars(sink, ec.pred.scalars, ec.tag);
+
+	v_array<float> temp = ec.pred.scalars;
+	ec.pred.multiclass = (uint32_t)temp[0];
+	CB::print_update(all, c.observation != nullptr, ec, nullptr, false);
+	ec.pred.scalars = temp;
+      }
     VW::finish_example(all, &ec);
   }
 
@@ -241,6 +245,7 @@ base_learner* mwt_setup(vw& all)
   if (all.vm.count("learn"))
     {
       c.num_classes = all.vm["learn"].as<uint32_t>();
+      c.learn = true;
 
       if (count(all.args.begin(), all.args.end(),"--cb") == 0)
 	{ all.args.push_back("--cb");
@@ -251,7 +256,7 @@ base_learner* mwt_setup(vw& all)
     }
 
   learner<mwt>* l;
-  if (all.vm.count("learn"))
+  if (c.learn)
     if (all.vm.count("exclude_eval"))
       l = &init_learner(&c, setup_base(all), predict_or_learn<true, true, true>, predict_or_learn<true, true, false>, 1);
     else

diff --git a/vowpalwabbit/search.cc b/vowpalwabbit/search.cc
@@ -94,14 +94,14 @@ struct action_repr
 { action a;
   features *repr;
   action_repr(action _a, features* _repr) : a(_a)
-  { if(_repr!=NULL)
+  { if(_repr!=nullptr)
     { repr = new features(); 
       repr->deep_copy_from(*_repr);
     }
     else
-      repr = NULL;
+      repr = nullptr;
   }
-  action_repr(action _a) : a(_a), repr(NULL) {}
+  action_repr(action _a) : a(_a), repr(nullptr) {}
 };
 
 struct action_cache
@@ -549,11 +549,9 @@ void reset_search_structure(search_private& priv)
     if (priv.beta > 1) priv.beta = 1;
   }
   for (Search::action_repr& ar : priv.ptag_to_action)
-  { if(ar.repr !=NULL)
-    { ar.repr->values.delete_v();
-      ar.repr->indicies.delete_v();
-      ar.repr->space_names.delete_v();
-      cdbg << "delete_v" << endl;
+  { if(ar.repr !=nullptr)
+    { ar.repr->delete_v();
+      delete ar.repr;
     }
   }
   priv.ptag_to_action.erase();
@@ -640,26 +638,29 @@ void add_example_conditioning(search_private& priv, example& ec, size_t conditio
   }
 
   if (priv.acset.use_passthrough_repr)
+  { cdbg << "BEGIN adding passthrough features" << endl;
     for (size_t i=0; i<I; i++)
-      {
-        features& fs = *(condition_on_actions[i].repr);
-        char name = condition_on_names[i];
-        for (size_t k=0; k<fs.size(); k++)
-          if ((fs.values[k] > 1e-10) || (fs.values[k] < -1e-10))
-            { uint64_t fid = 84913 + 48371803 * (extra_offset + 8392817 * name) + 840137 * (4891 + fs.indicies[k]);
-              if (priv.all->audit)
-                { priv.dat_new_feature_audit_ss.str("");
-                  priv.dat_new_feature_audit_ss.clear();
-                  priv.dat_new_feature_audit_ss << "passthrough_repr_" << i << '_' << k;
-                }
-
-              priv.dat_new_feature_ec  = &ec;
-              priv.dat_new_feature_idx = fid;
-              priv.dat_new_feature_namespace = conditioning_namespace;
-              priv.dat_new_feature_value = fs.values[k];
-              add_new_feature(priv, 1., 4398201 << priv.all->reg.stride_shift);
-            }
-      }
+    { if (condition_on_actions[i].repr == nullptr) continue;
+      features& fs = *(condition_on_actions[i].repr);
+      char name = condition_on_names[i];
+      for (size_t k=0; k<fs.size(); k++)
+        if ((fs.values[k] > 1e-10) || (fs.values[k] < -1e-10))
+        { uint64_t fid = 84913 + 48371803 * (extra_offset + 8392817 * name) + 840137 * (4891 + fs.indicies[k]);
+          if (priv.all->audit)
+          { priv.dat_new_feature_audit_ss.str("");
+            priv.dat_new_feature_audit_ss.clear();
+            priv.dat_new_feature_audit_ss << "passthrough_repr_" << i << '_' << k;
+          }
+
+          priv.dat_new_feature_ec  = &ec;
+          priv.dat_new_feature_idx = fid;
+          priv.dat_new_feature_namespace = conditioning_namespace;
+          priv.dat_new_feature_value = fs.values[k];
+          add_new_feature(priv, 1., 4398201 << priv.all->reg.stride_shift);
+        }
+    }
+    cdbg << "END adding passthrough features" << endl;
+  }
 
   features& con_fs = ec.feature_space[conditioning_namespace];
   if ((con_fs.size() > 0) && (con_fs.sum_feat_sq > 0.))
@@ -785,7 +786,7 @@ void allowed_actions_to_label(search_private& priv, size_t ec_cnt, const action*
     }
   }
   else     // non-LDF, no action costs
-  { if ((allowed_actions == NULL) || (allowed_actions_cnt == 0))   // any action is allowed
+  { if ((allowed_actions == nullptr) || (allowed_actions_cnt == 0))   // any action is allowed
     { bool set_to_one = false;
       if (cs_get_costs_size(isCB, lab) != priv.A)
       { cs_costs_erase(isCB, lab);
@@ -830,6 +831,7 @@ template<class T> void push_at(v_array<T>& v, T item, size_t pos)
   else
   { if (v.end_array > v.begin() + pos)
     { // there's enough memory, just not enough filler
+      memset(v.end(), 0, sizeof(T) * (pos - v.size()));
       v.begin()[pos] = item;
       v.end() = v.begin() + pos + 1;
     }
@@ -1968,8 +1970,12 @@ void search_finish(search& sch)
 
   priv.train_trajectory.delete_v();
   for (Search::action_repr& ar : priv.ptag_to_action)
-    if(ar.repr != NULL)
-      ar.repr->delete_v();
+  { if(ar.repr !=nullptr)
+    { ar.repr->delete_v();
+      delete ar.repr;
+      cdbg << "delete_v" << endl;
+    }
+  }
   priv.ptag_to_action.delete_v();
   clear_memo_foreach_action(priv);
   priv.memo_foreach_action.delete_v();
@@ -2389,14 +2395,17 @@ action search::predict(example& ec, ptag mytag, const action* oracle_actions, si
   if (mytag != 0)
   { if (mytag < priv->ptag_to_action.size())
     { cdbg << "delete_v at " << mytag << endl;
-      if(priv->ptag_to_action[mytag].repr != NULL)
-        priv->ptag_to_action[mytag].repr->delete_v();
+      if(priv->ptag_to_action[mytag].repr != nullptr)
+      { priv->ptag_to_action[mytag].repr->delete_v();
+        delete priv->ptag_to_action[mytag].repr;
+      }
     }
     if (priv->acset.use_passthrough_repr)
+    { assert((mytag >= priv->ptag_to_action.size()) || (priv->ptag_to_action[mytag].repr ==  nullptr));
       push_at(priv->ptag_to_action, action_repr(a, &(priv->last_action_repr)), mytag);
-    else
-      push_at(priv->ptag_to_action, action_repr(a, (features*)NULL), mytag);
-      cdbg << "push_at " << mytag << endl;
+    } else
+      push_at(priv->ptag_to_action, action_repr(a, (features*)nullptr), mytag);
+    cdbg << "push_at " << mytag << endl;
   }
   if (priv->auto_hamming_loss)
     loss( priv->use_action_costs
@@ -2414,8 +2423,10 @@ action search::predictLDF(example* ecs, size_t ec_cnt, ptag mytag, const action*
   if ((mytag != 0) && ecs[a].l.cs.costs.size() > 0)
   { if (mytag < priv->ptag_to_action.size())
     { cdbg << "delete_v at " << mytag << endl;
-      if(priv->ptag_to_action[mytag].repr != NULL)
-        priv->ptag_to_action[mytag].repr->delete_v();
+      if(priv->ptag_to_action[mytag].repr != nullptr)
+      { priv->ptag_to_action[mytag].repr->delete_v();
+        delete priv->ptag_to_action[mytag].repr;
+      }
     }
     push_at(priv->ptag_to_action, action_repr(ecs[a].l.cs.costs[0].class_index, &(priv->last_action_repr)), mytag);
   }