diff --git a/LightCTR/common/avx.h b/LightCTR/common/avx.h index aa4e9c6..9aef337 100644 --- a/LightCTR/common/avx.h +++ b/LightCTR/common/avx.h @@ -206,7 +206,7 @@ inline void avx_vecRcp(const float* x, float *res, size_t len) { } } -inline void avx_vecScale(const float* x, float *res, size_t len, float scalar) { +inline void avx_vecScale(const float* x, float *res, size_t len, const float scalar) { const __m256 _scalar = _mm256_broadcast_ss(&scalar); if (len > 7) { for (; len > 7; len -= 8) { diff --git a/LightCTR/dl_algo_abst.h b/LightCTR/dl_algo_abst.h index 30ced93..4e3a7e1 100644 --- a/LightCTR/dl_algo_abst.h +++ b/LightCTR/dl_algo_abst.h @@ -64,7 +64,7 @@ class DL_Algo_Abst { vector pred = Predict(rid, dataSet); assert(pred.size() == multiclass_output_cnt); - outputActivFun.forward(&pred); + outputActivFun.forward(pred.data(), pred.size()); // init threadLocal var vector& grad = *tl_grad; @@ -84,11 +84,12 @@ class DL_Algo_Abst { } else { onehot[label[rid]] = 1; // label should begin from 0 } - lossFun.gradient(&pred, &onehot, &grad); + lossFun.gradient(pred.data(), onehot.data(), grad.data(), pred.size()); if (multiclass_output_cnt > 1) { // Notice when LossFunction is Logistic annotation next line, // otherwise run this line like square + softmax - outputActivFun.backward(&grad, &pred, &grad); + outputActivFun.backward(grad.data(), pred.data(), + grad.data(), grad.size()); } grad_Matrix->loadDataPtr(&grad); wrapper[0] = grad_Matrix; @@ -112,13 +113,13 @@ class DL_Algo_Abst { GradientUpdater::__global_bTraining = false; // Validate Loss - std::atomic loss(0.0f); - std::atomic correct(0); + float loss = 0.0f; + int correct = 0; for (size_t rid = 0; rid < dataRow_cnt; rid++) { auto task = [&, rid]() { vector pred = Predict(rid, dataSet); - outputActivFun.forward(&pred); + outputActivFun.forward(pred.data(), pred.size()); // init threadLocal var vector& grad = *tl_grad; @@ -137,7 +138,7 @@ class DL_Algo_Abst { } else { onehot[label[rid]] = 1; // label should begin from 0 } - loss = loss + lossFun.loss(&pred, &onehot); + loss += lossFun.loss(pred.data(), onehot.data(), pred.size()); }; if (dl_algo == RNN) { task(); @@ -147,7 +148,7 @@ class DL_Algo_Abst { } threadpool->wait(); printf("Epoch %zu Loss = %f correct = %.3f\n", - p, loss.load(), 1.0f * correct / dataRow_cnt); + p, loss, 1.0f * correct / dataRow_cnt); } } } diff --git a/LightCTR/predict/gbm_predict.cpp b/LightCTR/predict/gbm_predict.cpp index 8807264..bc0d1aa 100644 --- a/LightCTR/predict/gbm_predict.cpp +++ b/LightCTR/predict/gbm_predict.cpp @@ -39,8 +39,8 @@ void GBM_Predict::Predict(string savePath) { pCTR = sigmoid.forward(tmp[0]); pLabel.emplace_back(pCTR > 0.5 ? 1 : 0); } else { - softmax.forward(&tmp); - size_t idx = softmax.forward_max(&tmp); + softmax.forward(tmp.data(), tmp.size()); + size_t idx = softmax.forward_max(tmp.data(), tmp.size()); pCTR = tmp[idx]; pLabel.emplace_back(idx); } diff --git a/LightCTR/train/layer/convLayer.h b/LightCTR/train/layer/convLayer.h index 3c1cc9d..a2ffa31 100644 --- a/LightCTR/train/layer/convLayer.h +++ b/LightCTR/train/layer/convLayer.h @@ -155,7 +155,7 @@ class Conv_Layer : public Layer_Base { // apply Activation Function m_ptr->operate([this](vector* matrix) { assert(matrix); - this->getActiveFun().forward(matrix); + this->getActiveFun().forward(matrix->data(), matrix->size()); }); output_act[filid] = m_ptr; } @@ -196,8 +196,9 @@ class Conv_Layer : public Layer_Base { } } m_ptr->operate([&, i](vector* matrix) { - this->prevLayer->getActiveFun().backward(matrix, - this->prevLayer->output()[i]->pointer(), matrix); + this->prevLayer->getActiveFun().backward(matrix->data(), + this->prevLayer->output()[i]->pointer()->data(), + matrix->data(), matrix->size()); }); input_delta[i] = m_ptr; } diff --git a/LightCTR/train/layer/fullyconnLayer.h b/LightCTR/train/layer/fullyconnLayer.h index 9bc6518..e6e766c 100644 --- a/LightCTR/train/layer/fullyconnLayer.h +++ b/LightCTR/train/layer/fullyconnLayer.h @@ -108,7 +108,7 @@ class Fully_Conn_Layer : public Layer_Base { wrapper.resize(1); if (this->nextLayer) { - this->getActiveFun().forward(output_act.pointer()); + this->getActiveFun().forward(output_act.pointer()->data(), output_act.size()); wrapper[0] = &output_act; return this->nextLayer->forward(wrapper); } else { @@ -150,7 +150,10 @@ class Fully_Conn_Layer : public Layer_Base { prev_output_act = this->prevLayer->output()[0]->pointer(); assert(prev_output_act && prev_output_act->size() == this->input_dimension); - this->prevLayer->getActiveFun().backward(input_delta.pointer(), prev_output_act, input_delta.pointer()); + this->prevLayer->getActiveFun().backward(input_delta.pointer()->data(), + prev_output_act->data(), + input_delta.pointer()->data(), + input_delta.size()); assert(input_delta.pointer()->size() == this->input_dimension); vector& wrapper = *tl_wrapper; diff --git a/LightCTR/train/layer/sampleLayer.h b/LightCTR/train/layer/sampleLayer.h index b21b6f9..a47d1b5 100644 --- a/LightCTR/train/layer/sampleLayer.h +++ b/LightCTR/train/layer/sampleLayer.h @@ -99,7 +99,10 @@ class Sample_Layer : public Layer_Base { assert(!isinf(*sigmaPtr)); } - this->prevLayer->getActiveFun().backward(input_delta.pointer(), prev_output_act, input_delta.pointer()); + this->prevLayer->getActiveFun().backward(input_delta.pointer()->data(), + prev_output_act->data(), + input_delta.pointer()->data(), + input_delta.size()); vector& wrapper = *tl_wrapper; wrapper[0] = &input_delta; diff --git a/LightCTR/train/train_gbm_algo.cpp b/LightCTR/train/train_gbm_algo.cpp index 4854f70..3da83f1 100644 --- a/LightCTR/train/train_gbm_algo.cpp +++ b/LightCTR/train/train_gbm_algo.cpp @@ -77,7 +77,7 @@ void Train_GBM_Algo::flash(RegTreeNode *root, size_t inClass) { // run per gbm t tmp->assign(&dataSet_Pred[rid * multiclass], &dataSet_Pred[rid * multiclass + multiclass]); assert(tmp->size() == multiclass); - softmax.forward(tmp); + softmax.forward(tmp->data(), tmp->size()); for (size_t c = 0; c < multiclass; c++) { float grad_t = tmp->at(c); float hess_t = grad_t * (1.0 - grad_t) * 2.0; diff --git a/LightCTR/train/train_rnn_algo.h b/LightCTR/train/train_rnn_algo.h index f84e083..23031b6 100644 --- a/LightCTR/train/train_rnn_algo.h +++ b/LightCTR/train/train_rnn_algo.h @@ -47,11 +47,9 @@ class Train_RNN_Algo : public DL_Algo_Abst& Predict(size_t rid, vector >& dataRow) { static Matrix* dataRow_Matrix = new Matrix(1, 28); static Matrix* dataRow_Matrix_fc = new Matrix(1, hidden_size, 0); - static vector *tmp = new vector(); - tmp->resize(1); - - vector *pred = NULL; - tmp->at(0) = dataRow_Matrix; + static vector tmp; + tmp.resize(1); + tmp[0] = dataRow_Matrix; auto begin = dataRow[rid].begin(); auto end = begin; @@ -59,17 +57,17 @@ class Train_RNN_Algo : public DL_Algo_Abstpointer()->assign(begin, end); - pred = this->inputLayer->forward(*tmp); + this->inputLayer->forward(tmp); } assert(end == dataRow[rid].end()); // Attention Unit - pred = attentionLayer->forward(*inputLayer->seq_output()); + vector pred = attentionLayer->forward(inputLayer->seq_output()); - assert(pred && pred->size() == hidden_size); - dataRow_Matrix_fc->loadDataPtr(pred); - tmp->at(0) = dataRow_Matrix_fc; - return this->fcLayer->forward(*tmp); + assert(pred.size() == hidden_size); + dataRow_Matrix_fc->loadDataPtr(&pred); + tmp[0] = dataRow_Matrix_fc; + return this->fcLayer->forward(tmp); } void BP(size_t rid, const vector& grad) { diff --git a/LightCTR/train/train_vae_algo.h b/LightCTR/train/train_vae_algo.h index fa0f901..1ddc644 100644 --- a/LightCTR/train/train_vae_algo.h +++ b/LightCTR/train/train_vae_algo.h @@ -68,11 +68,11 @@ class Train_VAE_Algo { dataRow_Matrix->loadDataPtr(&dataSet[rid]); tmp[0] = dataRow_Matrix; vector& pred = this->encodeLayer->forward(tmp); - outputActivFun.forward(&pred); + outputActivFun.forward(pred.data(), pred.size()); assert(pred.size() == feature_cnt); grad.resize(pred.size()); - lossFun.gradient(&pred, &dataSet[rid], &grad); - outputActivFun.backward(&grad, &pred, &grad); + lossFun.gradient(pred.data(), dataSet[rid].data(), grad.data(), grad.size()); + outputActivFun.backward(grad.data(), pred.data(), grad.data(), grad.size()); // if LossFunction is Logistic, annotation last line grad_Matrix->loadDataPtr(&grad); tmp[0] = grad_Matrix; @@ -91,19 +91,10 @@ class Train_VAE_Algo { dataRow_Matrix->loadDataPtr(&dataSet[rid]); tmp[0] = dataRow_Matrix; vector pred = this->encodeLayer->forward(tmp); - outputActivFun.forward(&pred); - loss += lossFun.loss(&pred, &dataSet[rid]); - if (rid == 4 || rid == 8) { // look like number 4 or 5 - for (size_t i = 0; i < feature_cnt; i++) { - cout.width(3); - cout << int(pred[i] * 255) << ","; - if ((i + 1) % 28 == 0) { - cout << endl; - } - } - } + outputActivFun.forward(pred.data(), pred.size()); + loss += lossFun.loss(pred.data(), dataSet[rid].data(), pred.size()); } - printf("\nepoch %zu Loss = %f\n", p, loss); + printf("Epoch %zu Loss = %f\n", p, loss); } } } @@ -171,21 +162,6 @@ class Train_VAE_Algo { } this->dataRow_cnt = this->dataSet.size(); assert(this->dataRow_cnt > 0); - - for (size_t i = 0; i < feature_cnt; i++) { - cout.width(3); - cout << int(dataSet[4][i] * 255) << ","; - if ((i + 1) % 28 == 0) { - cout << endl; - } - } - for (size_t i = 0; i < feature_cnt; i++) { - cout.width(3); - cout << int(dataSet[8][i] * 255) << ","; - if ((i + 1) % 28 == 0) { - cout << endl; - } - } } private: diff --git a/LightCTR/train/unit/attention_unit.h b/LightCTR/train/unit/attention_unit.h index f57b9bb..9ec0d51 100644 --- a/LightCTR/train/unit/attention_unit.h +++ b/LightCTR/train/unit/attention_unit.h @@ -65,7 +65,7 @@ class Attention_Unit : public Layer_Base { *fc_output_act->getEle(0, idx) = res[0]; } // Softmax normalization - softmax.forward(fc_output_act->pointer()); + softmax.forward(fc_output_act->pointer()->data(), fc_output_act->size()); attentionOutput->zeroInit(); FOR(idx, prevLOutputMatrix.size()) { @@ -103,7 +103,8 @@ class Attention_Unit : public Layer_Base { assert(res->size() == 1); scaleDelta[idx] = *cache_bp->getEle(0, 0); } - softmax.backward(&scaleDelta, fc_output_act->pointer(), &scaleDelta); + softmax.backward(scaleDelta.data(), fc_output_act->pointer()->data(), + scaleDelta.data(), scaleDelta.size()); // update transformFunc FOR(idx, input.size()) { *cache_bp->getEle(0, 0) = scaleDelta[idx]; diff --git a/LightCTR/train/unit/lstm_unit.h b/LightCTR/train/unit/lstm_unit.h index d0a44a3..14cbb90 100644 --- a/LightCTR/train/unit/lstm_unit.h +++ b/LightCTR/train/unit/lstm_unit.h @@ -33,9 +33,9 @@ using namespace std; x##_h_grad_w->zeroInit(); \ x##_grad_b->zeroInit(); -#define UPDATE(x) updater_##x##_b.update(0, hidden_size, x##_b->reference(), x##_grad_b->reference()); \ - updater_##x##_w.update(0, dimension * hidden_size, x##_w->reference(), x##_grad_w->reference()); \ - updater_##x##_h_w.update(0, hidden_size * hidden_size, x##_h_w->reference(), x##_h_grad_w->reference()); +#define UPDATE(x) updater_##x##_b.update(0, hidden_size, x##_b->pointer()->data(), x##_grad_b->pointer()->data()); \ + updater_##x##_w.update(0, dimension * hidden_size, x##_w->pointer()->data(), x##_grad_w->pointer()->data()); \ + updater_##x##_h_w.update(0, hidden_size * hidden_size, x##_h_w->pointer()->data(), x##_h_grad_w->pointer()->data()); // Bidirectional Recurrent Cell impl by Long Short Term Memory template @@ -139,7 +139,7 @@ class LSTM_Unit : public Layer_Base { } // apply ouput gate after do tanh cache = c_state[cur_seqid]->copy(cache); - inner_activeFun.forward(cache->pointer()); + inner_activeFun.forward(cache->pointer()->data(), cache->size()); c_state_act[cur_seqid] = cache->copy(c_state_act[cur_seqid]); h_output[cur_seqid] = cache->copy(h_output[cur_seqid])->dotProduct(oup_gate[cur_seqid]); @@ -182,7 +182,10 @@ class LSTM_Unit : public Layer_Base { { // output gate weight oup_gate_delta = h_output_delta->copy(oup_gate_delta)->dotProduct(c_state_act[seqid]); - sigmoid.backward(oup_gate_delta->pointer(), oup_gate[seqid]->pointer(), oup_gate_delta->pointer()); + sigmoid.backward(oup_gate_delta->pointer()->data(), + oup_gate[seqid]->pointer()->data(), + oup_gate_delta->pointer()->data(), + oup_gate_delta->size()); accumGrad(oup_grad_w, oup_gate_delta, input[seqid]); if (seqid > 0) { @@ -200,11 +203,17 @@ class LSTM_Unit : public Layer_Base { if (seqid < (int)cur_seqid - 1) { // accumulate the last time c_state's delta and h_output's delta assert(c_state_delta[seqid]); cache = h_output_delta->copy(cache)->dotProduct(oup_gate[seqid]); - inner_activeFun.backward(cache->pointer(), c_state_act[seqid]->pointer(), cache->pointer()); + inner_activeFun.backward(cache->pointer()->data(), + c_state_act[seqid]->pointer()->data(), + cache->pointer()->data(), + cache->size()); c_state_delta[seqid]->add(cache); } else { // for the first time of bp, clear memory c_state_delta[seqid] = h_output_delta->copy(c_state_delta[seqid])->dotProduct(oup_gate[seqid]); - inner_activeFun.backward(c_state_delta[seqid]->pointer(), c_state_act[seqid]->pointer(), c_state_delta[seqid]->pointer()); + inner_activeFun.backward(c_state_delta[seqid]->pointer()->data(), + c_state_act[seqid]->pointer()->data(), + c_state_delta[seqid]->pointer()->data(), + c_state_delta[seqid]->size()); } { // delta of c_state in t-1, forget gate weight and delta of extra_info @@ -214,7 +223,10 @@ class LSTM_Unit : public Layer_Base { // clear prev-time memory c_state_delta[seqid - 1] = c_state_delta[seqid]->copy(c_state_delta[seqid - 1])->dotProduct(fg_gate[seqid]); fg_gate_delta = c_state_delta[seqid]->copy(fg_gate_delta)->dotProduct(c_state[seqid - 1]); - sigmoid.backward(fg_gate_delta->pointer(), fg_gate[seqid]->pointer(), fg_gate_delta->pointer()); + sigmoid.backward(fg_gate_delta->pointer()->data(), + fg_gate[seqid]->pointer()->data(), + fg_gate_delta->pointer()->data(), + fg_gate_delta->size()); accumGrad(fg_grad_w, fg_gate_delta, input[seqid]); accumGrad(fg_h_grad_w, fg_gate_delta, h_output[seqid - 1]); @@ -227,7 +239,10 @@ class LSTM_Unit : public Layer_Base { // input gate weight inp_gate_delta = c_state_delta[seqid]->copy(inp_gate_delta)->dotProduct(info[seqid]); - sigmoid.backward(inp_gate_delta->pointer(), inp_gate[seqid]->pointer(), inp_gate_delta->pointer()); + sigmoid.backward(inp_gate_delta->pointer()->data(), + inp_gate[seqid]->pointer()->data(), + inp_gate_delta->pointer()->data(), + inp_gate_delta->size()); accumGrad(inp_grad_w, inp_gate_delta, input[seqid]); if (seqid > 0) { @@ -241,7 +256,10 @@ class LSTM_Unit : public Layer_Base { // delta of input_act transform input_act_delta = c_state_delta[seqid]->copy(input_act_delta)->dotProduct(inp_gate[seqid]); - inner_activeFun.backward(input_act_delta->pointer(), info[seqid]->pointer(), input_act_delta->pointer()); + inner_activeFun.backward(input_act_delta->pointer()->data(), + info[seqid]->pointer()->data(), + input_act_delta->pointer()->data(), + input_act_delta->size()); // input gate weight accumGrad(info_grad_w, input_act_delta, input[seqid]); if (seqid > 0) { @@ -262,9 +280,9 @@ class LSTM_Unit : public Layer_Base { wrapper[0] = h_output[cur_seqid - 1]; return wrapper; } - vector* seq_output() { // get rnn encoder output sequence for attention decoder + const vector& seq_output() { // get rnn encoder output sequence for attention decoder assert(cur_seqid == batch_size); - return &h_output; + return h_output; } void applyBatchGradient() { @@ -311,7 +329,8 @@ class LSTM_Unit : public Layer_Base { target->at(cur_seqid)->add(cache); } target->at(cur_seqid)->add(bias); - actFun->forward(target->at(cur_seqid)->pointer()); + actFun->forward(target->at(cur_seqid)->pointer()->data(), + target->at(cur_seqid)->size()); } size_t cur_seqid; diff --git a/LightCTR/util/activations.h b/LightCTR/util/activations.h index e31698e..b03b7ad 100644 --- a/LightCTR/util/activations.h +++ b/LightCTR/util/activations.h @@ -13,24 +13,23 @@ #include #include #include "assert.h" +#include "../common/avx.h" using namespace std; class Activation { public: - virtual inline void forward(vector* input) = 0; - virtual inline void backward(const vector* delta, const vector* forward_output, vector* to) = 0; + virtual inline void forward(float* input, size_t len) = 0; + virtual inline void backward(const float* delta, const float* forward_output, float* to, size_t len) = 0; }; class Identity : public Activation { public: - inline void forward(vector* input) { + inline void forward(float* input, size_t len) { return; } - inline void backward(const vector* delta, const vector* foutput, vector* to) { - assert(delta->size() == foutput->size()); - assert(to->size() == foutput->size()); - for (size_t i = 0; i < delta->size(); i++) { - to->at(i) = delta->at(i); + inline void backward(const float* delta, const float* forward_output, float* to, size_t len) { + for (size_t i = 0; i < len; i++) { + to[i] = delta[i]; } } }; @@ -42,30 +41,28 @@ class Binary_Sigmoid : public Activation { const float res = (input + 1.0f) / 2.0f; return fmax(0.0f, fmin(1.0f, res)); // clip to [0, 1] } - inline void forward(vector* input) { + inline void forward(float* input, size_t len) { float scaler = 0.0f; - for (auto it = input->begin(); it != input->end(); it++) { - scaler += abs(*it); // accumulate of L1-norm + for (size_t i = 0; i < len; i++) { + scaler += fabs(input[i]); // accumulate of L1-norm } - scaler /= input->size(); - for (auto it = input->begin(); it != input->end(); it++) { - float sign = *it > 0 ? 1 : -1; - *it = *it * scaler * sign; + scaler /= len; + for (size_t i = 0; i < len; i++) { + const float sign = input[i] > 0 ? 1 : -1; + input[i] *= scaler * sign; } } - inline void backward(const vector* delta, const vector* foutput, vector* to) { + inline void backward(const float* delta, const float* foutput, float* to, size_t len) { // standard backward propagation except binary weight - assert(delta->size() == foutput->size()); - assert(to->size() == foutput->size()); - for (size_t i = 0; i < delta->size(); i++) { - to->at(i) = delta->at(i); + for (size_t i = 0; i < len; i++) { + to[i] = delta[i]; } } }; class Sigmoid : public Activation { public: - inline float forward(float input) { + inline float forward(float input) const { if(input < -30){ return 1e-12; } else if(input > 30) { @@ -73,25 +70,22 @@ class Sigmoid : public Activation { } return 1.0f / (1.0f + exp(-input)); } - inline void forward(vector* input) { - for (auto it = input->begin(); it != input->end(); it++) { - assert(!isnan(*it)); - if(*it < -30){ - *it = 1e-12; - } else if(*it > 30) { - *it = 1.0 - 1e-12; + inline void forward(float* input, size_t len) { + for (size_t i = 0; i < len; i++) { + if(input[i] < -30){ + input[i] = 1e-12; + } else if(input[i] > 30) { + input[i] = 1.0 - 1e-12; } else { - *it = 1.0f / (1.0f + exp(- (*it))); + input[i] = 1.0f / (1.0f + exp(- input[i])); } - assert(!isnan(*it)); + assert(!isnan(input[i])); } } - inline void backward(const vector* delta, const vector* foutput, vector* to) { - assert(delta->size() == foutput->size()); - assert(to->size() == foutput->size()); - for (size_t i = 0; i < delta->size(); i++) { - to->at(i) = delta->at(i) * foutput->at(i) * (1.0f - foutput->at(i)); - assert(!isnan(to->at(i))); + inline void backward(const float* delta, const float* foutput, float* to, size_t len) { + for (size_t i = 0; i < len; i++) { + to[i] = delta[i] * foutput[i] * (1.0f - foutput[i]); + assert(!isnan(to[i])); } } }; @@ -100,39 +94,33 @@ class Softmax : public Activation { public: Softmax(float _softTargetRate = 1.0f) : softTargetRate(_softTargetRate) { } - inline size_t forward_max(vector* input) { - return max_element(input->begin(), input->end()) - input->begin(); + inline size_t forward_max(const float* input, size_t len) const { + return std::max_element(input, input + len) - input; } - inline void forward(vector* input) { + inline void forward(float* input, size_t len) { float sum = 0.0f; - auto maxV = *max_element(input->begin(), input->end()); + auto maxV = *max_element(input, input + len); // for numerical stability overflow - for (auto it = input->begin(); it != input->end(); it++) { - sum += exp((*it - maxV) / softTargetRate); - } - for (auto it = input->begin(); it != input->end(); it++) { - *it = exp((*it - maxV) / softTargetRate) / sum; - if (*it == 0) { - *it = 1e-12; - } else if (*it == 1) { - *it = 1.0 - 1e-12; + for (size_t i = 0; i < len; i++) { + sum += exp((input[i] - maxV) / softTargetRate); + } + for (size_t i = 0; i < len; i++) { + input[i] = exp((input[i] - maxV) / softTargetRate) / sum; + if (input[i] == 0) { + input[i] = 1e-12; + } else if (input[i] == 1) { + input[i] = 1.0 - 1e-12; } } } - inline void backward(const vector* delta, const vector* foutput, vector* to) { - assert(delta->size() == foutput->size()); - assert(to->size() == foutput->size()); + inline void backward(const float* delta, const float* foutput, float* to, size_t len) { // softmax Derivative (whether i == j) * softmax(input[i]) - softmax(input[i]) * softmax(input[i]) // each derivative of Z_(L) = sum_i( delta_(i) * -forward_output_(i) * forward_output_(L) ) // + delta_(L) * forward_output_(L) - float sum = 0.0f; - for (size_t i = 0; i < delta->size(); i++) { - sum += delta->at(i) * foutput->at(i); - } - for (size_t i = 0; i < delta->size(); i++) { - to->at(i) = (delta->at(i) - sum) * foutput->at(i); - to->at(i) /= softTargetRate; - } + float sum = avx_dotProduct(delta, foutput, len); + avx_vecAdd(delta, -sum, to, len); + avx_vecScale(to, to, len, foutput); + avx_vecScale(to, to, len, 1.0 / softTargetRate); } private: // used in distillation soft target softmax, when larger than 1 makes smooth classification @@ -141,39 +129,35 @@ class Softmax : public Activation { class Tanh : public Activation { public: - inline void forward(vector* input) { + inline void forward(float* input, size_t len) { float t1, t2; - for (auto it = input->begin(); it != input->end(); it++) { - t1 = exp(*it), t2 = exp(- (*it)); - *it = (t1 - t2) / (t1 + t2); + for (size_t i = 0; i < len; i++) { + t1 = exp(input[i]), t2 = exp(- input[i]); + input[i] = (t1 - t2) / (t1 + t2); } } - inline void backward(const vector* delta, const vector* foutput, vector* to) { - assert(delta->size() == foutput->size()); - assert(to->size() == foutput->size()); - for (size_t i = 0; i < delta->size(); i++) { - to->at(i) = delta->at(i) * (1.0f - foutput->at(i) * foutput->at(i)); + inline void backward(const float* delta, const float* foutput, float* to, size_t len) { + for (size_t i = 0; i < len; i++) { + to[i] = delta[i] * (1.0f - foutput[i] * foutput[i]); } } }; class ReLU : public Activation { // Local Response Normalization public: - inline void forward(vector* input) { - for (auto it = input->begin(); it != input->end(); it++) { - if (*it < 0.0f) { - *it = 0.0f; // negative slope is 0 + inline void forward(float* input, size_t len) { + for (size_t i = 0; i < len; i++) { + if (input[i] < 0.0f) { + input[i] = 0.0f; // negative slope is 0 } } } - inline void backward(const vector* delta, const vector* foutput, vector* to) { - assert(delta->size() == foutput->size()); - assert(to->size() == foutput->size()); - for (size_t i = 0; i < delta->size(); i++) { - if (foutput->at(i) == 0.0f) { - to->at(i) = 0.0f; + inline void backward(const float* delta, const float* foutput, float* to, size_t len) { + for (size_t i = 0; i < len; i++) { + if (foutput[i] == 0.0f) { + to[i] = 0.0f; } else { - to->at(i) = delta->at(i); + to[i] = delta[i]; } } } @@ -181,18 +165,16 @@ class ReLU : public Activation { // Local Response Normalization class SoftPlus : public Activation { public: - inline void forward(vector* input) { - for (auto it = input->begin(); it != input->end(); it++) { - *it = log(1 + exp(*it)); + inline void forward(float* input, size_t len) { + for (size_t i = 0; i < len; i++) { + input[i] = log(1 + exp(input[i])); } } - inline void backward(const vector* delta, const vector* foutput, vector* to) { - assert(delta->size() == foutput->size()); - assert(to->size() == foutput->size()); + inline void backward(const float* delta, const float* foutput, float* to, size_t len) { float t; - for (size_t i = 0; i < delta->size(); i++) { - t = exp(foutput->at(i)); - to->at(i) = delta->at(i) * (t - 1) / t; + for (size_t i = 0; i < len; i++) { + t = exp(foutput[i]); + to[i] = delta[i] * (t - 1) / t; } } }; diff --git a/LightCTR/util/gradientUpdater.h b/LightCTR/util/gradientUpdater.h index 98c5886..f0e42fe 100644 --- a/LightCTR/util/gradientUpdater.h +++ b/LightCTR/util/gradientUpdater.h @@ -136,7 +136,7 @@ class AdagradUpdater_Num : public GradientUpdater { fill(__adagrad_accum.begin(), __adagrad_accum.end(), 0); } template - void update(size_t offset, size_t len, T& weight, T& grad) { + void update(size_t offset, size_t len, T* weight, T* grad) { assert(offset + len <= __adagrad_params_cnt); avx_vecScale(grad, grad, len, 1.0 / __global_minibatch_size); for (size_t i = 0; i < len; i++) { @@ -210,7 +210,7 @@ class RMSpropUpdater_Num : public GradientUpdater { } } template - void update(size_t offset, size_t len, T& weight, T& grad) { + void update(size_t offset, size_t len, T* weight, T* grad) { assert(offset + len <= __rms_params_cnt); for (size_t i = 0; i < len; i++) { float g = grad[i] / __global_minibatch_size, tmp; diff --git a/LightCTR/util/loss.h b/LightCTR/util/loss.h index 00e0977..b7aae59 100644 --- a/LightCTR/util/loss.h +++ b/LightCTR/util/loss.h @@ -17,25 +17,24 @@ using namespace std; template class Loss { public: - virtual T loss(const vector* pred, const vector* label) const = 0; - virtual void gradient(const vector* pred, const vector* label, vector* gradient) = 0; + virtual T loss(const T* pred, const L* label, size_t len) const = 0; + virtual void gradient(const T* pred, const L* label, T* gradient, size_t len) = 0; }; template class Square : public Loss { // Mean Squared Error public: - T loss(const vector* pred, const vector* label) const { + T loss(const T* pred, const L* label, size_t len) const { T sum = 0.0f, tmp; - for (size_t i = 0; i < pred->size(); i++) { - tmp = pred->at(i) - label->at(i); + for (size_t i = 0; i < len; i++) { + tmp = pred[i] - label[i]; sum += tmp / 2 * tmp; } return sum; } - void gradient(const vector* pred, const vector* label, vector* gradient) { - assert(pred->size() == label->size()); - for (size_t i = 0; i < pred->size(); i++) { - gradient->at(i) = pred->at(i) - label->at(i); + void gradient(const T* pred, const L* label, T* gradient, size_t len) { + for (size_t i = 0; i < len; i++) { + gradient[i] = pred[i] - label[i]; } } }; @@ -43,24 +42,21 @@ class Square : public Loss { // Mean Squared Error template class Logistic : public Loss { public: - T loss(const vector* pred, const vector* label) const { - assert(pred->size() == label->size()); + T loss(const T* pred, const L* label, size_t len) const { T sum = 0.0f, p, l; - for (size_t i = 0; i < pred->size(); i++) { - p = pred->at(i); - l = label->at(i); + for (size_t i = 0; i < len; i++) { + p = pred[i]; + l = label[i]; sum += (l - (p >= 0)) * p - log(1.0f + exp(p - 2.0f * (p >= 0) * p)); // sum += label->at(i) * log(pred->at(i)) + (1.0f - label->at(i)) * log(1.0f - pred->at(i)); } assert(!isnan(sum)); return sum; } - void gradient(const vector* pred, const vector* label, vector* gradient) { - assert(pred->size() == label->size()); - assert(gradient->size() == label->size()); + void gradient(const T* pred, const L* label, T* gradient, size_t len) { // Notice output activator must be sigmoid - for (size_t i = 0; i < pred->size(); i++) { - gradient->at(i) = pred->at(i) - label->at(i); + for (size_t i = 0; i < len; i++) { + gradient[i] = pred[i] - label[i]; } } }; @@ -68,26 +64,22 @@ class Logistic : public Loss { template class Logistic_Softmax : public Loss { public: - T loss(const vector* pred, const vector* label) const { - assert(pred->size() == label->size()); + T loss(const T* pred, const L* label, size_t len) const { T sum = 0.0f; - for (size_t i = 0; i < pred->size(); i++) { - if (label->at(i) == 1) { - sum += log(pred->at(i)); + for (size_t i = 0; i < len; i++) { + if (label[i] == 1) { + sum += log(pred[i]); } } assert(!isnan(sum)); return sum; } - void gradient(const vector* pred, const vector* label, vector* gradient) { - assert(pred->size() == label->size()); - assert(gradient->size() == label->size()); - - for (size_t i = 0; i < pred->size(); i++) { - if (label->at(i) == 1) { - gradient->at(i) = 1.0f - pred->at(i); + void gradient(const T* pred, const L* label, T* gradient, size_t len) { + for (size_t i = 0; i < len; i++) { + if (label[i] == 1) { + gradient[i] = 1.0f - pred[i]; } else { - gradient->at(i) = - pred->at(i); + gradient[i] = - pred[i]; } } }