[DOCS] Improve Core API

4T-Shirt · Aug 21, 2017 · 7097f8f · 7097f8f
1 parent 0e6e934
commit 7097f8f
Show file tree

Hide file tree

Showing 10 changed files with 89 additions and 82 deletions.
diff --git a/README.md b/README.md
@@ -37,7 +37,7 @@
 * [模型切分（modelPartitioner）](./docs/design/model_partitioner.md)
 * [异步控制（syncController）](./docs/design/sync_controller.md)
 * [定制函数（psFunc）](./docs/design/psfFunc.md)
-* [核心类的说明](./docs/apis/interface_api.md)
+* [核心接口类](./docs/apis/core_api.md)
 
 
 ## Algorithm

diff --git a/README_en.md b/README_en.md
@@ -33,8 +33,11 @@ We welcome everyone interested in machine learning to contribute code, create is
 
 ## Design
 
-* [Interface API](./docs/apis/interface_api.md)
+* [Model Partitioner](./docs/design/model_partitioner.md)
+* [SyncController](./docs/design/sync_controller.md)
 * [psFunc](./docs/design/psfFunc.md)
+* [Core API](./docs/apis/core_api.md)
+
 
 ## Algorithm
 

diff --git a/angel-ps/mllib/src/main/java/com/tencent/angel/ml/factorizationmachines/FMLearner.scala b/angel-ps/mllib/src/main/java/com/tencent/angel/ml/factorizationmachines/FMLearner.scala
@@ -38,10 +38,11 @@ import scala.collection.mutable
   * @param maxP: max value of y
   * @param feaUsed: array of used feature of the input data
   */
-class FMLearner(override val ctx: TaskContext, val minP: Double, val maxP: Double, val feaUsed:
-Array[Int]) extends MLLearner(ctx) {
+class FMLearner(override val ctx: TaskContext, val minP: Double, val maxP: Double, val feaUsed: Array[Int])
+  extends MLLearner(ctx) {
+
   val LOG: Log = LogFactory.getLog(classOf[FMLearner])
-  val fmmodel = new FMModel(conf, ctx)
+  val fmModel = new FMModel(conf, ctx)
 
   val learnType = conf.get(MLConf.ML_FM_LEARN_TYPE, MLConf.DEFAULT_ML_FM_LEARN_TYPE)
   val feaNum: Int = conf.getInt(MLConf.ML_FEATURE_NUM, MLConf.DEFAULT_ML_FEATURE_NUM)
@@ -53,12 +54,11 @@ Array[Int]) extends MLLearner(ctx) {
   val reg2: Double = conf.getDouble(MLConf.ML_FM_REG2, MLConf.DEFAULT_ML_FM_REG2)
   val lr: Double = conf.getDouble(MLConf.ML_LEARN_RATE, MLConf.DEFAULT_ML_LEAR_RATE)
   val vStddev: Double = conf.getDouble(MLConf.ML_FM_V_STDDEV, MLConf.DEFAULT_ML_FM_V_INIT)
-  // Put used feature indexes to vIndexs
-  //val vIndexs = feaUsed.zipWithIndex.filter((p:(Int,Int))=>p._1!=0).map((p:(Int,Int))=>p._2).array
-  val vIndexs = new RowIndex()
-  feaUsed.zipWithIndex.filter((p:(Int, Int))=>p._1!=0).map((p:(Int, Int))=>vIndexs.addRowId(p._2))
-  val feaUsedN = vIndexs.getRowsNumber
-  LOG.info("vIndexs's row's number = " + vIndexs)
+
+  val vIndexes = new RowIndex()
+  feaUsed.zipWithIndex.filter((p:(Int, Int))=>p._1!=0).map((p:(Int, Int))=>vIndexes.addRowId(p._2))
+  val feaUsedN = vIndexes.getRowsNumber
+  LOG.info("vIndexs's row's number = " + vIndexes)
 
   /**
     * Train a Factorization machines Model
@@ -68,8 +68,7 @@ Array[Int]) extends MLLearner(ctx) {
     * @return : a learned model
     */
   override
-  def train(trainData: DataBlock[LabeledData], vali: DataBlock[LabeledData]):
-  MLModel = {
+  def train(trainData: DataBlock[LabeledData], vali: DataBlock[LabeledData]): MLModel = {
     val start = System.currentTimeMillis()
     LOG.info(s"learnType=$learnType, feaNum=$feaNum, rank=$rank, #trainData=${trainData.size}")
     LOG.info(s"reg0=$reg0, reg1=$reg1, reg2=$reg2, lr=$lr, vStev=$vStddev")
@@ -79,7 +78,7 @@ Array[Int]) extends MLLearner(ctx) {
     val initCost = System.currentTimeMillis() - beforeInit
     LOG.info(s"Init matrixes cost $initCost ms.")
 
-    globalMetrics.addMetrics(fmmodel.FM_OBJ, LossMetric(trainData.size()))
+    globalMetrics.addMetrics(fmModel.FM_OBJ, LossMetric(trainData.size()))
 
     while (ctx.getIteration < epochNum) {
       val startIter = System.currentTimeMillis()
@@ -90,7 +89,7 @@ Array[Int]) extends MLLearner(ctx) {
       val loss = evaluate(trainData, w0.get(0), w, v)
       val valiCost = System.currentTimeMillis() - startVali
 
-      globalMetrics.metrics(fmmodel.FM_OBJ, loss)
+      globalMetrics.metrics(fmModel.FM_OBJ, loss)
       LOG.info(s"Epoch=${ctx.getIteration}, evaluate loss=${loss/trainData.size()}. " +
         s"trainCost=$iterCost, " +
         s"valiCost=$valiCost")
@@ -101,7 +100,7 @@ Array[Int]) extends MLLearner(ctx) {
     val end = System.currentTimeMillis()
     val cost = end - start
     LOG.info(s"FM Learner train cost $cost ms.")
-    fmmodel
+    fmModel
   }
 
   /**
@@ -110,22 +109,24 @@ Array[Int]) extends MLLearner(ctx) {
   def initModels(): Unit = {
     if(ctx.getTaskId.getIndex == 0) {
       for (row <- 0 until feaNum) {
-        fmmodel.v.update(new RandomNormal(fmmodel.v.getMatrixId(), row, 0.0, vStddev)).get()
+        fmModel.v.update(new RandomNormal(fmModel.v.getMatrixId(), row, 0.0, vStddev)).get()
       }
     }
 
-    fmmodel.v.clock().get()
+    fmModel.v.clock().get()
   }
 
   /**
     * One iteration to train Factorization Machines
+ *
     * @param dataBlock
     * @return
     */
-  def oneIteration(dataBlock: DataBlock[LabeledData]): (DenseDoubleVector,
-    DenseDoubleVector, mutable.HashMap[Int, DenseDoubleVector]) = {
+  def oneIteration(dataBlock: DataBlock[LabeledData]):
+    (DenseDoubleVector, DenseDoubleVector, mutable.HashMap[Int, DenseDoubleVector]) = {
+
     val startGet = System.currentTimeMillis()
-    val (w0, w, v) = fmmodel.pullFromPS(vIndexs)
+    val (w0, w, v) = fmModel.pullFromPS(vIndexes)
     val getCost = System.currentTimeMillis() - startGet
     LOG.info(s"Get matrixes cost $getCost ms.")
 
@@ -154,7 +155,7 @@ Array[Int]) extends MLLearner(ctx) {
       v(update._1).plusBy(update._2, -1.0).timesBy(-1.0)
     }
 
-    fmmodel.pushToPS(w0.plusBy(_w0, -1.0).timesBy(-1.0).asInstanceOf[DenseDoubleVector],
+    fmModel.pushToPS(w0.plusBy(_w0, -1.0).timesBy(-1.0).asInstanceOf[DenseDoubleVector],
       w.plusBy(_w, -1.0).timesBy(-1.0).asInstanceOf[DenseDoubleVector],
       v)
 
@@ -163,6 +164,7 @@ Array[Int]) extends MLLearner(ctx) {
 
   /**
     * Evaluate the objective value
+ *
     * @param dataBlock
     * @param w0
     * @param w
@@ -188,6 +190,7 @@ Array[Int]) extends MLLearner(ctx) {
 
   /**
     * Predict an instance
+ *
     * @param x：feature vector of instance
     * @param y: label value of instance
     * @param w0: w0 mat of FM
@@ -220,6 +223,7 @@ Array[Int]) extends MLLearner(ctx) {
 
   /**
     * \frac{\partial loss}{\partial x} = dm * \frac{\partial y}{\partial x}
+ *
     * @param y: label of the instance
     * @param pre: predict value of the instance
     * @return : dm value
@@ -240,6 +244,7 @@ Array[Int]) extends MLLearner(ctx) {
 
   /**
     * Update v mat
+ *
     * @param x: a train instance
     * @param dm: dm value of the instance
     * @param v: v mat

diff --git a/docs/apis/MLLearner.md b/docs/apis/MLLearner.md
@@ -1,13 +1,15 @@
 # MLLearner
-模型训练接口定义。
-## train
-- 定义：```def train(train: DataBlock[LabeledData], vali: DataBlock[LabeledData])```
 
+> 模型训练的核心类，理论上，Angel所有模型训练核心逻辑，都应该写在这个类中。通过这个类来实现和调用。它是Train的核心类。
 
-- 功能描述：使用算法对训练数据进行训练，得到模型
+## 功能
 
+* 不断读取DataBlock，训练得到MLModel模型
 
-- 参数：train: DataBlock[LabeledData] 训练数据集； vali: DataBlock[LabeledData] 验证数据集
+## 核心接口
 
-
-- 返回值：训练结果
+1. **train**
+	- 定义：```def train(train: DataBlock[LabeledData], vali: DataBlock[LabeledData])：MLModel```
+	- 功能描述：使用算法对训练数据进行训练，得到模型
+	- 参数：train: DataBlock[LabeledData] 训练数据集； vali: DataBlock[LabeledData] 验证数据集
+	- 返回值：MLModel
diff --git a/docs/apis/MLRunner.md b/docs/apis/MLRunner.md
@@ -1,52 +1,49 @@
 # MLRunner
-算法的启动类。它定义了启动Angel app和运行task的接口，需要应用程序根据需要实现这些接口。当然，它在封装了使用 [AngelClient](AngelClient.md)的使用。 
 
-包括：启动Angel ps，加载和存储模型，和启动task等过程的默认实现。一般情况下，应用程序直接调用它们就可以了。
+> Angel算法的启动入口类。它定义了启动Angel app标准流程，封装了对 [AngelClient](AngelClient.md)的使用。 
 
-## train
-- 定义：``` train(conf: Configuration)```
-- 功能描述：启动Angel app训练模型
-- 参数：conf: Configuration Angel任务相关配置和算法配置信息
-- 返回值：无
+## 功能
 
-## train（default implementation）
+* 启动Angel ps，加载和存储模型，和启动task等过程的默认实现
+* 一般情况下，应用程序直接调用它们就可以了。
 
-- **定义**：```train(conf: Configuration, model: MLModel, taskClass: Class[_ <: BaseTask[_, _, _]]): Unit```
+## 核心方法
 
-- **功能描述**：启动Angel app训练模型。该方法封装了具体的Angel ps/worker启动以及模型加载/存储过程，子类可直接引用
-- **参数**
-	* conf: Configuration Angel任务相关配置和算法配置信息
-	* model: MLModel 算法模型信息
-	* taskClass: Class[_ <: TrainTask[_, _, _]] 表示算法运行过程的Task 类
-	* 返回值：无
+1. **train**
+	- 定义：``` train(conf: Configuration)```
+	- 功能描述：启动Angel app训练模型
+	- 参数：conf: Configuration Angel任务相关配置和算法配置信息
+	- 返回值：无
 
-## incTrain
-* **定义**：```incTrain(conf: Configuration)```
-- **功能描述**：启动Angel app 并使用增量训练的方式更新一个旧模型
-- **参数**
-	- conf: Configuration Angel任务相关配置和算法配置信息
-- **返回值**：无
+2. **train（default implementation）**
 
-## predict
-- **定义**：```predict(conf: Configuration)```
+	- **定义**： ```train(conf: Configuration, model: MLModel, taskClass: Class[_ <: BaseTask[_, _, _]]): Unit```
 
+	- **功能描述**：启动Angel app训练模型。该方法封装了具体的Angel ps/worker启动以及模型加载/存储过程，子类可直接引用
 
-- **功能描述**：启动Angel app，计算预测结果
+	- **参数**
+		* conf: Configuration Angel任务相关配置和算法配置信息
+		* model: MLModel 算法模型信息
+		* taskClass: Class[_ <: TrainTask[_, _, _]] 表示算法运行过程的Task 类
+		* 返回值：无
 
+3. **incTrain**
 
-- **参数**：conf: Configuration Angel任务相关配置
+	* **定义**：```incTrain(conf: Configuration)```
+	- **功能描述**：使用增量训练的方式更新一个已有模型
+	- **参数：**conf: Configuration Angel任务相关配置和算法配置信息
+	- **返回值：**无
 
+4. **predict**
 
-- **返回值**：无
+	- **定义**：```predict(conf: Configuration)```
+	- **功能描述**：启动Angel app，计算预测结果
+	- **参数**：conf: Configuration Angel任务相关配置
+	- **返回值**：无
 
-## predict（default implementation）
-- **定义：**```predict(conf: Configuration, model: MLModel, taskClass: Class[_ <: PredictTask[_, _, _]]): Unit```
+5. **predict（default implementation）**
 
-
-- **功能描述：**启动Angel app 并使用增量训练的方式更新一个旧模型。该方法封装了具体的Angel ps/worker启动以及模型加载/存储过程，子类可直接引用
-
-
-- **参数：**conf: Configuration Angel任务相关配置
-
-
-- **返回值：**无
+	- **定义：** ```predict(conf: Configuration, model: MLModel, taskClass: Class[_ <: PredictTask[_, _, _]]): Unit```
+	- **功能描述：**启动Angel app 并使用增量训练的方式更新一个旧模型。该方法封装了具体的Angel ps/worker启动以及模型加载/存储过程，子类可直接引用
+	- **参数：**conf: Configuration Angel任务相关配置
+	- **返回值：**无
diff --git a/docs/apis/angel_client.md b/docs/apis/angel_client.md
diff --git a/docs/apis/core_api.md b/docs/apis/core_api.md
@@ -0,0 +1,17 @@
+# 核心接口类
+
+---
+
+![](../img/angel_class_diagram.png)
+
+如上图所示，Angel的核心接口类，在Train的过程中，按照调用的顺序，分别为：
+
+* [MLRunner](MLRunner.md)
+* [AngelClient](AngelClient.md)
+* [TrainTask](Task.md)
+* [DataBlock](DataBlock.md)
+* [MLLearner](MLLearner.md)
+* [PSModel](PSModel.md)
+* [MLModel](MLModel.md)
+
+了解这些接口类的定义和功能，对于基于Angel实现高性能的机器学习算法，会有不错的帮助。
diff --git a/docs/apis/gpu_adpater.md b/docs/apis/gpu_adpater.md
diff --git a/docs/apis/interface_api.md b/docs/apis/interface_api.md
diff --git a/docs/img/angel_class_diagram.png b/docs/img/angel_class_diagram.png