Skip to content

Commit

Permalink
Merge branch 'master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
eisber committed May 24, 2016
2 parents 9774499 + d3aa940 commit 92071ff
Show file tree
Hide file tree
Showing 14 changed files with 385 additions and 229 deletions.
6 changes: 5 additions & 1 deletion test/RunTests
Original file line number Diff line number Diff line change
Expand Up @@ -1164,7 +1164,7 @@ echo "" | {VW}
# Test 68: Search for multiclass classification
{VW} -k -c -d train-sets/multiclass.sch --passes 20 --search_task multiclasstask --search 10 --search_alpha 1e-4 --holdout_off
train-sets/ref/search_multiclass.stderr
train-sets/ref/search_multiclass.stderr
# Test 69: (see Test 43/Test 44): search sequence labeling, with selective branching
{VW} -d train-sets/sequence_data -t -i models/sequence_data.model -p sequence_data.nonldf.beam.test.predict --search_metatask selective_branching --search_max_branch 10 --search_kbest 10
Expand Down Expand Up @@ -1429,3 +1429,7 @@ printf '3 |f a b c |e x y z\n2 |f a y c |e x\n' | \
{VW} train-sets/cb_eval2 -i mwt.model -p cb_eval2.preds
train-sets/ref/cb_eval2.stderr
pred-sets/ref/cb_eval2.preds
# Test 125: arc-eager trasition-based dependency parser
{VW} -k -c -d train-sets/wsj_small.dparser.vw.gz -b 20 --search_task dep_parser --search 26 --search_alpha 1e-5 --search_rollin mix_per_roll --search_rollout oracle --one_learner --search_history_length 3 --root_label 8 --transition_system 2 --passes 8
train-sets/ref/search_dep_parser_arceager.stderr
8 changes: 0 additions & 8 deletions test/pred-sets/ref/cb_eval.preds
Original file line number Diff line number Diff line change
@@ -1,8 +0,0 @@
0 0
0 1
0 0.666667
0 1
0 0.8
0 1
0 0.857143
0 1
4 changes: 0 additions & 4 deletions test/pred-sets/ref/cb_eval1.preds
Original file line number Diff line number Diff line change
@@ -1,4 +0,0 @@
0 0
0 1
0 0.666667
0 1
4 changes: 0 additions & 4 deletions test/pred-sets/ref/cb_eval2.preds
Original file line number Diff line number Diff line change
@@ -1,4 +0,0 @@
0 0.8
0 1
0 0.857143
0 1
14 changes: 5 additions & 9 deletions test/train-sets/ref/cb_eval.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,11 @@ Reading datafile = train-sets/cb_eval
num sources = 1
average since example example current current current
loss last counter weight label predict features
0.000000 0.000000 1 1.0 unknown 0 3
0.000000 0.000000 2 2.0 unknown 0 3
0.000000 0.000000 4 4.0 unknown 0 3
0.000000 0.000000 8 8.0 unknown 0 3

finished run
number of examples per pass = 8
number of examples per pass = 0
passes used = 1
weighted example sum = 8.000000
weighted label sum = 0.000000
average loss = 0.000000
total feature number = 24
weighted example sum = 0
weighted label sum = 0
average loss = -nan
total feature number = 0
13 changes: 5 additions & 8 deletions test/train-sets/ref/cb_eval1.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,11 @@ Reading datafile = train-sets/cb_eval1
num sources = 1
average since example example current current current
loss last counter weight label predict features
0.000000 0.000000 1 1.0 unknown 0 3
0.000000 0.000000 2 2.0 unknown 0 3
0.000000 0.000000 4 4.0 unknown 0 3

finished run
number of examples per pass = 4
number of examples per pass = 0
passes used = 1
weighted example sum = 4.000000
weighted label sum = 0.000000
average loss = 0.000000
total feature number = 12
weighted example sum = 0
weighted label sum = 0
average loss = -nan
total feature number = 0
13 changes: 5 additions & 8 deletions test/train-sets/ref/cb_eval2.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,11 @@ Reading datafile = train-sets/cb_eval2
num sources = 1
average since example example current current current
loss last counter weight label predict features
0.000000 0.000000 1 1.0 unknown 0 3
0.000000 0.000000 2 2.0 unknown 0 3
0.000000 0.000000 4 4.0 unknown 0 3

finished run
number of examples per pass = 4
number of examples per pass = 0
passes used = 1
weighted example sum = 4.000000
weighted label sum = 0.000000
average loss = 0.000000
total feature number = 12
weighted example sum = 0
weighted label sum = 0
average loss = -nan
total feature number = 0
10 changes: 5 additions & 5 deletions test/train-sets/ref/search_dep_parser.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@ num sources = 1
average since instance current true current predicted cur cur predic cache examples
loss last counter output prefix output prefix pass pol made hits gener beta
88.000000 88.000000 1 [43:1 5:2 5:2 5:2 1..] [0:8 1:1 2:1 3:1 4:..] 0 0 144 0 144 0.014199
48.000000 8.000000 2 [2:2 3:5 0:8 3:7 3:4 ] [0:8 1:4 2:4 3:4 1:4 ] 0 0 157 0 156 0.015381
34.000000 20.000000 4 [2:2 3:5 0:8 3:7 3:4 ] [2:2 3:5 0:8 3:4 3:3 ] 1 0 319 0 312 0.030623
25.500000 17.000000 8 [2:2 3:5 0:8 3:7 3:4 ] [2:2 3:2 0:8 3:2 3:3 ] 3 0 642 0 624 0.060402
48.000000 8.000000 2 [2:2 3:5 0:8 3:7 3:4 ] [0:8 1:1 2:1 3:3 1:4 ] 0 0 157 0 156 0.015381
28.750000 9.500000 4 [2:2 3:5 0:8 3:7 3:4 ] [2:2 3:5 0:8 3:7 3:4 ] 1 0 319 0 312 0.030623
14.625000 0.500000 8 [2:2 3:5 0:8 3:7 3:4 ] [2:2 3:5 0:8 3:7 3:4 ] 3 0 642 0 624 0.060402

finished run
number of examples per pass = 2
passes used = 6
weighted example sum = 12
weighted label sum = 0
average loss = 22.4167
total feature number = 275075
average loss = 9.91667
total feature number = 275092
22 changes: 22 additions & 0 deletions test/train-sets/ref/search_dep_parser_arceager.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
Num weight bits = 20
learning rate = 0.5
initial_t = 0
power_t = 0.5
decay_learning_rate = 1
creating cache_file = train-sets/wsj_small.dparser.vw.gz.cache
Reading datafile = train-sets/wsj_small.dparser.vw.gz
num sources = 1
average since instance current true current predicted cur cur predic cache examples
loss last counter output prefix output prefix pass pol made hits gener beta
97.000000 97.000000 1 [43:1 5:2 5:2 5:2 1..] [0:1 0:1 0:1 0:1 0:..] 0 0 97 0 97 0.000960
52.500000 8.000000 2 [2:2 3:5 0:8 3:7 3:4 ] [2:2 0:8 4:2 2:3 2:4 ] 0 0 106 0 106 0.001049
26.250000 0.000000 4 [2:2 3:5 0:8 3:7 3:4 ] [2:2 3:5 0:8 3:7 3:4 ] 1 0 115 0 212 0.002108
13.125000 0.000000 8 [2:2 3:5 0:8 3:7 3:4 ] [2:2 3:5 0:8 3:7 3:4 ] 3 0 124 0 424 0.004221

finished run
number of examples per pass = 2
passes used = 4
weighted example sum = 8
weighted label sum = 0
average loss = undefined (no holdout)
total feature number = 32302
6 changes: 3 additions & 3 deletions test/train-sets/ref/search_dep_parser_cost_to_go.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ loss last counter output prefix output prefix p
88.000000 88.000000 1 [43:1 5:2 5:2 5:2 1..] [0:8 1:1 2:1 3:1 4:..] 0 0 144 0 144 0.000000
47.500000 7.000000 2 [2:2 3:5 0:8 3:7 3:4 ] [2:2 0:8 2:4 2:4 2:4 ] 0 0 156 0 156 0.001439
29.250000 11.000000 4 [2:2 3:5 0:8 3:7 3:4 ] [2:2 3:5 0:8 3:7 3:4 ] 1 0 312 0 312 0.002996
15.875000 2.500000 8 [2:2 3:5 0:8 3:7 3:4 ] [2:2 3:5 0:8 3:7 3:4 ] 3 0 624 0 624 0.006101
16.375000 3.500000 8 [2:2 3:5 0:8 3:7 3:4 ] [2:2 3:5 0:8 3:7 3:4 ] 3 0 624 0 624 0.006101

finished run
number of examples per pass = 2
passes used = 6
weighted example sum = 12
weighted label sum = 0
average loss = 10.5833
total feature number = 276589
average loss = 10.9167
total feature number = 276631
2 changes: 2 additions & 0 deletions vowpalwabbit/gd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -968,6 +968,8 @@ base_learner* setup(vw& all)
stride = set_learn<true>(all, feature_mask_off, g);
else
stride = set_learn<false>(all, feature_mask_off, g);
if (!all.training)
stride = 1;
all.reg.stride_shift = (uint32_t)ceil_log_2(stride-1);

learner<gd>& ret = init_learner(&g, g.learn, ((uint64_t)1 << all.reg.stride_shift));
Expand Down
33 changes: 19 additions & 14 deletions vowpalwabbit/mwt.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ namespace MWT {
v_array<uint64_t> policies;
double total;
uint32_t num_classes;
bool learn;

v_array<namespace_index> indices;// excluded namespaces
features feature_space[256];
Expand Down Expand Up @@ -149,19 +150,22 @@ namespace MWT {

void finish_example(vw& all, mwt& c, example& ec)
{
float loss = 0.;
if (c.observation != nullptr)
loss = get_unbiased_cost(c.observation, ec.pred.scalars[0]);

all.sd->update(ec.test_only, loss, 1.f, ec.num_features);

for (int sink : all.final_prediction_sink)
print_scalars(sink, ec.pred.scalars, ec.tag);

v_array<float> temp = ec.pred.scalars;
ec.pred.multiclass = (uint32_t)temp[0];
CB::print_update(all, c.observation != nullptr, ec, nullptr, false);
ec.pred.scalars = temp;
if (c.learn)
{
float loss = 0.;
if (c.observation != nullptr)
loss = get_unbiased_cost(c.observation, ec.pred.scalars[0]);

all.sd->update(ec.test_only, loss, 1.f, ec.num_features);

for (int sink : all.final_prediction_sink)
print_scalars(sink, ec.pred.scalars, ec.tag);

v_array<float> temp = ec.pred.scalars;
ec.pred.multiclass = (uint32_t)temp[0];
CB::print_update(all, c.observation != nullptr, ec, nullptr, false);
ec.pred.scalars = temp;
}
VW::finish_example(all, &ec);
}

Expand Down Expand Up @@ -241,6 +245,7 @@ base_learner* mwt_setup(vw& all)
if (all.vm.count("learn"))
{
c.num_classes = all.vm["learn"].as<uint32_t>();
c.learn = true;

if (count(all.args.begin(), all.args.end(),"--cb") == 0)
{ all.args.push_back("--cb");
Expand All @@ -251,7 +256,7 @@ base_learner* mwt_setup(vw& all)
}

learner<mwt>* l;
if (all.vm.count("learn"))
if (c.learn)
if (all.vm.count("exclude_eval"))
l = &init_learner(&c, setup_base(all), predict_or_learn<true, true, true>, predict_or_learn<true, true, false>, 1);
else
Expand Down
85 changes: 48 additions & 37 deletions vowpalwabbit/search.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,14 @@ struct action_repr
{ action a;
features *repr;
action_repr(action _a, features* _repr) : a(_a)
{ if(_repr!=NULL)
{ if(_repr!=nullptr)
{ repr = new features();
repr->deep_copy_from(*_repr);
}
else
repr = NULL;
repr = nullptr;
}
action_repr(action _a) : a(_a), repr(NULL) {}
action_repr(action _a) : a(_a), repr(nullptr) {}
};

struct action_cache
Expand Down Expand Up @@ -549,11 +549,9 @@ void reset_search_structure(search_private& priv)
if (priv.beta > 1) priv.beta = 1;
}
for (Search::action_repr& ar : priv.ptag_to_action)
{ if(ar.repr !=NULL)
{ ar.repr->values.delete_v();
ar.repr->indicies.delete_v();
ar.repr->space_names.delete_v();
cdbg << "delete_v" << endl;
{ if(ar.repr !=nullptr)
{ ar.repr->delete_v();
delete ar.repr;
}
}
priv.ptag_to_action.erase();
Expand Down Expand Up @@ -640,26 +638,29 @@ void add_example_conditioning(search_private& priv, example& ec, size_t conditio
}

if (priv.acset.use_passthrough_repr)
{ cdbg << "BEGIN adding passthrough features" << endl;
for (size_t i=0; i<I; i++)
{
features& fs = *(condition_on_actions[i].repr);
char name = condition_on_names[i];
for (size_t k=0; k<fs.size(); k++)
if ((fs.values[k] > 1e-10) || (fs.values[k] < -1e-10))
{ uint64_t fid = 84913 + 48371803 * (extra_offset + 8392817 * name) + 840137 * (4891 + fs.indicies[k]);
if (priv.all->audit)
{ priv.dat_new_feature_audit_ss.str("");
priv.dat_new_feature_audit_ss.clear();
priv.dat_new_feature_audit_ss << "passthrough_repr_" << i << '_' << k;
}

priv.dat_new_feature_ec = &ec;
priv.dat_new_feature_idx = fid;
priv.dat_new_feature_namespace = conditioning_namespace;
priv.dat_new_feature_value = fs.values[k];
add_new_feature(priv, 1., 4398201 << priv.all->reg.stride_shift);
}
}
{ if (condition_on_actions[i].repr == nullptr) continue;
features& fs = *(condition_on_actions[i].repr);
char name = condition_on_names[i];
for (size_t k=0; k<fs.size(); k++)
if ((fs.values[k] > 1e-10) || (fs.values[k] < -1e-10))
{ uint64_t fid = 84913 + 48371803 * (extra_offset + 8392817 * name) + 840137 * (4891 + fs.indicies[k]);
if (priv.all->audit)
{ priv.dat_new_feature_audit_ss.str("");
priv.dat_new_feature_audit_ss.clear();
priv.dat_new_feature_audit_ss << "passthrough_repr_" << i << '_' << k;
}

priv.dat_new_feature_ec = &ec;
priv.dat_new_feature_idx = fid;
priv.dat_new_feature_namespace = conditioning_namespace;
priv.dat_new_feature_value = fs.values[k];
add_new_feature(priv, 1., 4398201 << priv.all->reg.stride_shift);
}
}
cdbg << "END adding passthrough features" << endl;
}

features& con_fs = ec.feature_space[conditioning_namespace];
if ((con_fs.size() > 0) && (con_fs.sum_feat_sq > 0.))
Expand Down Expand Up @@ -785,7 +786,7 @@ void allowed_actions_to_label(search_private& priv, size_t ec_cnt, const action*
}
}
else // non-LDF, no action costs
{ if ((allowed_actions == NULL) || (allowed_actions_cnt == 0)) // any action is allowed
{ if ((allowed_actions == nullptr) || (allowed_actions_cnt == 0)) // any action is allowed
{ bool set_to_one = false;
if (cs_get_costs_size(isCB, lab) != priv.A)
{ cs_costs_erase(isCB, lab);
Expand Down Expand Up @@ -830,6 +831,7 @@ template<class T> void push_at(v_array<T>& v, T item, size_t pos)
else
{ if (v.end_array > v.begin() + pos)
{ // there's enough memory, just not enough filler
memset(v.end(), 0, sizeof(T) * (pos - v.size()));
v.begin()[pos] = item;
v.end() = v.begin() + pos + 1;
}
Expand Down Expand Up @@ -1968,8 +1970,12 @@ void search_finish(search& sch)

priv.train_trajectory.delete_v();
for (Search::action_repr& ar : priv.ptag_to_action)
if(ar.repr != NULL)
ar.repr->delete_v();
{ if(ar.repr !=nullptr)
{ ar.repr->delete_v();
delete ar.repr;
cdbg << "delete_v" << endl;
}
}
priv.ptag_to_action.delete_v();
clear_memo_foreach_action(priv);
priv.memo_foreach_action.delete_v();
Expand Down Expand Up @@ -2389,14 +2395,17 @@ action search::predict(example& ec, ptag mytag, const action* oracle_actions, si
if (mytag != 0)
{ if (mytag < priv->ptag_to_action.size())
{ cdbg << "delete_v at " << mytag << endl;
if(priv->ptag_to_action[mytag].repr != NULL)
priv->ptag_to_action[mytag].repr->delete_v();
if(priv->ptag_to_action[mytag].repr != nullptr)
{ priv->ptag_to_action[mytag].repr->delete_v();
delete priv->ptag_to_action[mytag].repr;
}
}
if (priv->acset.use_passthrough_repr)
{ assert((mytag >= priv->ptag_to_action.size()) || (priv->ptag_to_action[mytag].repr == nullptr));
push_at(priv->ptag_to_action, action_repr(a, &(priv->last_action_repr)), mytag);
else
push_at(priv->ptag_to_action, action_repr(a, (features*)NULL), mytag);
cdbg << "push_at " << mytag << endl;
} else
push_at(priv->ptag_to_action, action_repr(a, (features*)nullptr), mytag);
cdbg << "push_at " << mytag << endl;
}
if (priv->auto_hamming_loss)
loss( priv->use_action_costs
Expand All @@ -2414,8 +2423,10 @@ action search::predictLDF(example* ecs, size_t ec_cnt, ptag mytag, const action*
if ((mytag != 0) && ecs[a].l.cs.costs.size() > 0)
{ if (mytag < priv->ptag_to_action.size())
{ cdbg << "delete_v at " << mytag << endl;
if(priv->ptag_to_action[mytag].repr != NULL)
priv->ptag_to_action[mytag].repr->delete_v();
if(priv->ptag_to_action[mytag].repr != nullptr)
{ priv->ptag_to_action[mytag].repr->delete_v();
delete priv->ptag_to_action[mytag].repr;
}
}
push_at(priv->ptag_to_action, action_repr(ecs[a].l.cs.costs[0].class_index, &(priv->last_action_repr)), mytag);
}
Expand Down
Loading

0 comments on commit 92071ff

Please sign in to comment.