Skip to content

Commit

Permalink
[DOCS] Improve Core API
Browse files Browse the repository at this point in the history
  • Loading branch information
andyyehoo committed Aug 21, 2017
1 parent 0e6e934 commit 7097f8f
Show file tree
Hide file tree
Showing 10 changed files with 89 additions and 82 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
* [模型切分(modelPartitioner)](./docs/design/model_partitioner.md)
* [异步控制(syncController)](./docs/design/sync_controller.md)
* [定制函数(psFunc)](./docs/design/psfFunc.md)
* [核心类的说明](./docs/apis/interface_api.md)
* [核心接口类](./docs/apis/core_api.md)


## Algorithm
Expand Down
5 changes: 4 additions & 1 deletion README_en.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,11 @@ We welcome everyone interested in machine learning to contribute code, create is

## Design

* [Interface API](./docs/apis/interface_api.md)
* [Model Partitioner](./docs/design/model_partitioner.md)
* [SyncController](./docs/design/sync_controller.md)
* [psFunc](./docs/design/psfFunc.md)
* [Core API](./docs/apis/core_api.md)


## Algorithm

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,11 @@ import scala.collection.mutable
* @param maxP: max value of y
* @param feaUsed: array of used feature of the input data
*/
class FMLearner(override val ctx: TaskContext, val minP: Double, val maxP: Double, val feaUsed:
Array[Int]) extends MLLearner(ctx) {
class FMLearner(override val ctx: TaskContext, val minP: Double, val maxP: Double, val feaUsed: Array[Int])
extends MLLearner(ctx) {

val LOG: Log = LogFactory.getLog(classOf[FMLearner])
val fmmodel = new FMModel(conf, ctx)
val fmModel = new FMModel(conf, ctx)

val learnType = conf.get(MLConf.ML_FM_LEARN_TYPE, MLConf.DEFAULT_ML_FM_LEARN_TYPE)
val feaNum: Int = conf.getInt(MLConf.ML_FEATURE_NUM, MLConf.DEFAULT_ML_FEATURE_NUM)
Expand All @@ -53,12 +54,11 @@ Array[Int]) extends MLLearner(ctx) {
val reg2: Double = conf.getDouble(MLConf.ML_FM_REG2, MLConf.DEFAULT_ML_FM_REG2)
val lr: Double = conf.getDouble(MLConf.ML_LEARN_RATE, MLConf.DEFAULT_ML_LEAR_RATE)
val vStddev: Double = conf.getDouble(MLConf.ML_FM_V_STDDEV, MLConf.DEFAULT_ML_FM_V_INIT)
// Put used feature indexes to vIndexs
//val vIndexs = feaUsed.zipWithIndex.filter((p:(Int,Int))=>p._1!=0).map((p:(Int,Int))=>p._2).array
val vIndexs = new RowIndex()
feaUsed.zipWithIndex.filter((p:(Int, Int))=>p._1!=0).map((p:(Int, Int))=>vIndexs.addRowId(p._2))
val feaUsedN = vIndexs.getRowsNumber
LOG.info("vIndexs's row's number = " + vIndexs)

val vIndexes = new RowIndex()
feaUsed.zipWithIndex.filter((p:(Int, Int))=>p._1!=0).map((p:(Int, Int))=>vIndexes.addRowId(p._2))
val feaUsedN = vIndexes.getRowsNumber
LOG.info("vIndexs's row's number = " + vIndexes)

/**
* Train a Factorization machines Model
Expand All @@ -68,8 +68,7 @@ Array[Int]) extends MLLearner(ctx) {
* @return : a learned model
*/
override
def train(trainData: DataBlock[LabeledData], vali: DataBlock[LabeledData]):
MLModel = {
def train(trainData: DataBlock[LabeledData], vali: DataBlock[LabeledData]): MLModel = {
val start = System.currentTimeMillis()
LOG.info(s"learnType=$learnType, feaNum=$feaNum, rank=$rank, #trainData=${trainData.size}")
LOG.info(s"reg0=$reg0, reg1=$reg1, reg2=$reg2, lr=$lr, vStev=$vStddev")
Expand All @@ -79,7 +78,7 @@ Array[Int]) extends MLLearner(ctx) {
val initCost = System.currentTimeMillis() - beforeInit
LOG.info(s"Init matrixes cost $initCost ms.")

globalMetrics.addMetrics(fmmodel.FM_OBJ, LossMetric(trainData.size()))
globalMetrics.addMetrics(fmModel.FM_OBJ, LossMetric(trainData.size()))

while (ctx.getIteration < epochNum) {
val startIter = System.currentTimeMillis()
Expand All @@ -90,7 +89,7 @@ Array[Int]) extends MLLearner(ctx) {
val loss = evaluate(trainData, w0.get(0), w, v)
val valiCost = System.currentTimeMillis() - startVali

globalMetrics.metrics(fmmodel.FM_OBJ, loss)
globalMetrics.metrics(fmModel.FM_OBJ, loss)
LOG.info(s"Epoch=${ctx.getIteration}, evaluate loss=${loss/trainData.size()}. " +
s"trainCost=$iterCost, " +
s"valiCost=$valiCost")
Expand All @@ -101,7 +100,7 @@ Array[Int]) extends MLLearner(ctx) {
val end = System.currentTimeMillis()
val cost = end - start
LOG.info(s"FM Learner train cost $cost ms.")
fmmodel
fmModel
}

/**
Expand All @@ -110,22 +109,24 @@ Array[Int]) extends MLLearner(ctx) {
def initModels(): Unit = {
if(ctx.getTaskId.getIndex == 0) {
for (row <- 0 until feaNum) {
fmmodel.v.update(new RandomNormal(fmmodel.v.getMatrixId(), row, 0.0, vStddev)).get()
fmModel.v.update(new RandomNormal(fmModel.v.getMatrixId(), row, 0.0, vStddev)).get()
}
}

fmmodel.v.clock().get()
fmModel.v.clock().get()
}

/**
* One iteration to train Factorization Machines
*
* @param dataBlock
* @return
*/
def oneIteration(dataBlock: DataBlock[LabeledData]): (DenseDoubleVector,
DenseDoubleVector, mutable.HashMap[Int, DenseDoubleVector]) = {
def oneIteration(dataBlock: DataBlock[LabeledData]):
(DenseDoubleVector, DenseDoubleVector, mutable.HashMap[Int, DenseDoubleVector]) = {

val startGet = System.currentTimeMillis()
val (w0, w, v) = fmmodel.pullFromPS(vIndexs)
val (w0, w, v) = fmModel.pullFromPS(vIndexes)
val getCost = System.currentTimeMillis() - startGet
LOG.info(s"Get matrixes cost $getCost ms.")

Expand Down Expand Up @@ -154,7 +155,7 @@ Array[Int]) extends MLLearner(ctx) {
v(update._1).plusBy(update._2, -1.0).timesBy(-1.0)
}

fmmodel.pushToPS(w0.plusBy(_w0, -1.0).timesBy(-1.0).asInstanceOf[DenseDoubleVector],
fmModel.pushToPS(w0.plusBy(_w0, -1.0).timesBy(-1.0).asInstanceOf[DenseDoubleVector],
w.plusBy(_w, -1.0).timesBy(-1.0).asInstanceOf[DenseDoubleVector],
v)

Expand All @@ -163,6 +164,7 @@ Array[Int]) extends MLLearner(ctx) {

/**
* Evaluate the objective value
*
* @param dataBlock
* @param w0
* @param w
Expand All @@ -188,6 +190,7 @@ Array[Int]) extends MLLearner(ctx) {

/**
* Predict an instance
*
* @param x:feature vector of instance
* @param y: label value of instance
* @param w0: w0 mat of FM
Expand Down Expand Up @@ -220,6 +223,7 @@ Array[Int]) extends MLLearner(ctx) {

/**
* \frac{\partial loss}{\partial x} = dm * \frac{\partial y}{\partial x}
*
* @param y: label of the instance
* @param pre: predict value of the instance
* @return : dm value
Expand All @@ -240,6 +244,7 @@ Array[Int]) extends MLLearner(ctx) {

/**
* Update v mat
*
* @param x: a train instance
* @param dm: dm value of the instance
* @param v: v mat
Expand Down
16 changes: 9 additions & 7 deletions docs/apis/MLLearner.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
# MLLearner
模型训练接口定义。
## train
- 定义:```def train(train: DataBlock[LabeledData], vali: DataBlock[LabeledData])```

> 模型训练的核心类,理论上,Angel所有模型训练核心逻辑,都应该写在这个类中。通过这个类来实现和调用。它是Train的核心类。
- 功能描述:使用算法对训练数据进行训练,得到模型
## 功能

* 不断读取DataBlock,训练得到MLModel模型

- 参数:train: DataBlock[LabeledData] 训练数据集; vali: DataBlock[LabeledData] 验证数据集
## 核心接口


- 返回值:训练结果
1. **train**
- 定义:```def train(train: DataBlock[LabeledData], vali: DataBlock[LabeledData]):MLModel```
- 功能描述:使用算法对训练数据进行训练,得到模型
- 参数:train: DataBlock[LabeledData] 训练数据集; vali: DataBlock[LabeledData] 验证数据集
- 返回值:MLModel
69 changes: 33 additions & 36 deletions docs/apis/MLRunner.md
Original file line number Diff line number Diff line change
@@ -1,52 +1,49 @@
# MLRunner
算法的启动类。它定义了启动Angel app和运行task的接口,需要应用程序根据需要实现这些接口。当然,它在封装了使用 [AngelClient](AngelClient.md)的使用。

包括:启动Angel ps,加载和存储模型,和启动task等过程的默认实现。一般情况下,应用程序直接调用它们就可以了。
> Angel算法的启动入口类。它定义了启动Angel app标准流程,封装了对 [AngelClient](AngelClient.md)的使用。
## train
- 定义:``` train(conf: Configuration)```
- 功能描述:启动Angel app训练模型
- 参数:conf: Configuration Angel任务相关配置和算法配置信息
- 返回值:无
## 功能

## train(default implementation)
* 启动Angel ps,加载和存储模型,和启动task等过程的默认实现
* 一般情况下,应用程序直接调用它们就可以了。

- **定义**```train(conf: Configuration, model: MLModel, taskClass: Class[_ <: BaseTask[_, _, _]]): Unit```
## 核心方法

- **功能描述**:启动Angel app训练模型。该方法封装了具体的Angel ps/worker启动以及模型加载/存储过程,子类可直接引用
- **参数**
* conf: Configuration Angel任务相关配置和算法配置信息
* model: MLModel 算法模型信息
* taskClass: Class[_ <: TrainTask[_, _, _]] 表示算法运行过程的Task 类
* 返回值:无
1. **train**
- 定义:``` train(conf: Configuration)```
- 功能描述:启动Angel app训练模型
- 参数:conf: Configuration Angel任务相关配置和算法配置信息
- 返回值:无

## incTrain
* **定义**```incTrain(conf: Configuration)```
- **功能描述**:启动Angel app 并使用增量训练的方式更新一个旧模型
- **参数**
- conf: Configuration Angel任务相关配置和算法配置信息
- **返回值**:无
2. **train(default implementation)**

## predict
- **定义**```predict(conf: Configuration)```
- **定义**: ```train(conf: Configuration, model: MLModel, taskClass: Class[_ <: BaseTask[_, _, _]]): Unit```

- **功能描述**:启动Angel app训练模型。该方法封装了具体的Angel ps/worker启动以及模型加载/存储过程,子类可直接引用

- **功能描述**:启动Angel app,计算预测结果
- **参数**
* conf: Configuration Angel任务相关配置和算法配置信息
* model: MLModel 算法模型信息
* taskClass: Class[_ <: TrainTask[_, _, _]] 表示算法运行过程的Task 类
* 返回值:无

3. **incTrain**

- **参数**:conf: Configuration Angel任务相关配置
* **定义**:```incTrain(conf: Configuration)```
- **功能描述**:使用增量训练的方式更新一个已有模型
- **参数:**conf: Configuration Angel任务相关配置和算法配置信息
- **返回值:**无

4. **predict**

- **返回值**:无
- **定义**:```predict(conf: Configuration)```
- **功能描述**:启动Angel app,计算预测结果
- **参数**:conf: Configuration Angel任务相关配置
- **返回值**:无

## predict(default implementation)
- **定义:**```predict(conf: Configuration, model: MLModel, taskClass: Class[_ <: PredictTask[_, _, _]]): Unit```
5. **predict(default implementation)**


- **功能描述:**启动Angel app 并使用增量训练的方式更新一个旧模型。该方法封装了具体的Angel ps/worker启动以及模型加载/存储过程,子类可直接引用


- **参数:**conf: Configuration Angel任务相关配置


- **返回值:**
- **定义:** ```predict(conf: Configuration, model: MLModel, taskClass: Class[_ <: PredictTask[_, _, _]]): Unit```
- **功能描述:**启动Angel app 并使用增量训练的方式更新一个旧模型。该方法封装了具体的Angel ps/worker启动以及模型加载/存储过程,子类可直接引用
- **参数:**conf: Configuration Angel任务相关配置
- **返回值:**无
5 changes: 0 additions & 5 deletions docs/apis/angel_client.md

This file was deleted.

17 changes: 17 additions & 0 deletions docs/apis/core_api.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# 核心接口类

---

![](../img/angel_class_diagram.png)

如上图所示,Angel的核心接口类,在Train的过程中,按照调用的顺序,分别为:

* [MLRunner](MLRunner.md)
* [AngelClient](AngelClient.md)
* [TrainTask](Task.md)
* [DataBlock](DataBlock.md)
* [MLLearner](MLLearner.md)
* [PSModel](PSModel.md)
* [MLModel](MLModel.md)

了解这些接口类的定义和功能,对于基于Angel实现高性能的机器学习算法,会有不错的帮助。
2 changes: 0 additions & 2 deletions docs/apis/gpu_adpater.md

This file was deleted.

10 changes: 0 additions & 10 deletions docs/apis/interface_api.md

This file was deleted.

Binary file added docs/img/angel_class_diagram.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 7097f8f

Please sign in to comment.