Skip to content

Commit

Permalink
Merge pull request Angel-ML#274 from f7753/branch-1.4.0
Browse files Browse the repository at this point in the history
Fix mis-config in angel-ps/pom, remove unused '}'
  • Loading branch information
andyyehoo authored Dec 27, 2017
2 parents 1c51ac4 + 58f2f0f commit 0403ffd
Show file tree
Hide file tree
Showing 6 changed files with 122 additions and 89 deletions.
2 changes: 1 addition & 1 deletion angel-ps/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
<scope>${compile.scope}}</scope>
<scope>${compile.scope}</scope>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
Expand Down
4 changes: 2 additions & 2 deletions angel-ps/python/examples/fm_local_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,12 @@ def set_conf(self):
self.conf[AngelConf.ANGEL_INPUTFORMAT_CLASS] = 'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat'
self.conf[AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST] = True

#set angel resource parameters #worker, #task, #PS
# Set angel resource parameters #worker, #task, #PS
self.conf[AngelConf.ANGEL_WORKERGROUP_NUMBER] = 1
self.conf[AngelConf.ANGEL_WORKER_TASK_NUMBER] = 1
self.conf[AngelConf.ANGEL_PS_NUMBER] = 1

#set FM algorithm parameters #feature #epoch
# Set FM algorithm parameters #feature #epoch
self.conf[MLConf.ML_FEATURE_NUM] = str(feature_num)
self.conf[MLConf.ML_EPOCH_NUM] = str(epoch_num)
self.conf[MLConf.ML_FM_RANK] = str(rank)
Expand Down
29 changes: 17 additions & 12 deletions angel-ps/python/examples/gbdt_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@
from pyangel.ml.conf import MLConf
from pyangel.ml.gbdt.runner import GBDTRunner


class GBDTExample(object):

def __init__(self):
self.conf= Configuration()
self.MLConf = MLConf()
self.conf = Configuration()

def set_conf(self):
# Input Path, please modify ${YOUR_ANGEL_HOME} as your local angel installation path,
Expand Down Expand Up @@ -52,38 +52,43 @@ def set_conf(self):
# Learning rate
learn_rate = 0.01

# Set GBDT category feature
cate_feat = "0:2,1:2,2:2,3:2,4:2,5:2,6:2,7:2,8:2,9:2,10:2,11:2,12:2,13:2,14:2,15:2,16:2,17:2,18:2,19:2,20:2," \
"21:2,22:2,23:2,24:2,25:2,26:2,27:2,28:2,29:2,30:2,31:2,32:2,33:2,34:2,35:2,36:2,37:2,38:2,39:2,40:2," \
"41:2,42:2,43:2,44:2,45:2,46:2,47:2,48:2,49:2,50:2,51:2,52:2,53:2,54:2,55:2,56:2,57:2,58:2,59:2,60:2," \
"61:2,62:2,63:2,64:2,65:2,66:2,67:2,68:2,69:2,70:2,71:2,72:2,73:2,74:2,75:2,76:2,77:2,78:2,79:2,80:2," \
"81:2,82:2,83:2,84:2,85:2,86:2,87:2,88:2,89:2,90:2,91:2,92:2,93:2,94:2,95:2,96:2,97:2,98:2,99:2,100:2," \
"101:2,102:2,103:2,104:2,105:2,106:2,107:2,108:2,109:2,110:2,111:2,112:2,113:2,114:2,115:2,116:2,117:2," \
"118:2,119:2,120:2,121:2,122:2,123:2,124:2,125:2,126:2"

# set input, output path
self.conf[AngelConf.ANGEL_TRAIN_DATA_PATH] = input_path
self.conf[AngelConf.ANGEL_SAVE_MODEL_PATH] = output_path

# Set GBDT algorithm parameters
self.conf[MLConf.ML_DATA_FORMAT] = dataFmt
self.conf[MLConf.ML_DATA_FORMAT] = data_fmt
self.conf[MLConf.ML_FEATURE_NUM] = feature_num
self.conf[MLConf.ML_FEATURE_NNZ] = feature_nzz
self.conf[MLConf.ML_GBDT_TREE_NUM] = tree_num
self.conf[MLConf.ML_GBDT_TREE_DEPTH] = tree_depth
self.conf[MLConf.ML_GBDT_SPLIT_NUM] = split_num
self.conf[MLConf.ML_GBDT_SAMPLE_RATIO] = sample_ratio
self.conf[MLConf.ML_LEARN_RATE] = learn_rate
self.conf[MLConf.ML_GBDT_CATE_FEAT] = cate_feat

def train(self):
self.set_conf()

runner = GBDTRunner()
runner.train(self.conf)



def predict(self):
self.set_conf()
# Load Model from HDFS.
TMP_PATH = tempfile.gettempdir()
self.conf["gbdt.split.feature"] = TMP_PATH + "/out/xxx"
self.conf["gbdt.split.value"] = TMP_PATH + "/out/xxx"

tmp_path = tempfile.gettempdir()
self.conf["gbdt.split.feature"] = tmp_path + "/out/xxx"
self.conf["gbdt.split.value"] = tmp_path + "/out/xxx"
runner = GBDTRunner()

runner.predict(conf)
runner.predict(self.conf)


example = GBDTExample()
Expand Down
104 changes: 34 additions & 70 deletions angel-ps/python/examples/gbdt_local_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,90 +22,56 @@
from pyangel.ml.conf import MLConf
from pyangel.ml.gbdt.runner import GBDTRunner


class GBDTExample(object):

def __init__(self):
self.conf= Configuration()
self.MLConf = MLConf()
self.conf = Configuration()

def set_conf(self):
"""
Input Path, please modify ${YOUR_ANGEL_HOME} as your local angel installation path,
e.g. if your path is /home/angel/angel_1.3.0, your input_path should be:
"file:///home/angel/angel_1.3.0/data/exampledata/GBDTLocalExampleData/agaricus.txt.train",
and your out_path could be: "file:///home/angel/angel_1.3.0/data/output"
if you need, you can delete the annotation mark before Line35,Line36,Line61,Line62, so
there is no need for you to pass the configs every time you submit the pyangel job.
:return:
"""
# Feature number of train data
feature_num = 127
# Number of nonzero features
feature_nzz = 25
# Tree number
tree_num = 2
# Tree depth
tree_depth = 2
# Split number
split_num = 10
# Feature sample ratio
sample_ratio = 1.0

# Data format
data_fmt = "libsvm"

# Learning rate
learn_rate = 0.01

# Use local deploy mode and dummy data spliter
self.conf[AngelConf.ANGEL_DEPLOY_MODE] = "LOCAL"

self.conf['mapred.mapper.new-api'] = True
self.conf[AngelConf.ANGEL_INPUTFORMAT_CLASS] = 'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat'
self.conf[AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST] = True

# Set angel resource parameters #worker, #task, #PS
self.conf[AngelConf.ANGEL_WORKERGROUP_NUMBER] = 1
self.conf[AngelConf.ANGEL_WORKER_TASK_NUMBER] = 1
self.conf[AngelConf.ANGEL_PS_NUMBER] = 1

# Set GBDT algorithm parameters
self.conf[MLConf.ML_DATA_FORMAT] = data_fmt
self.conf[MLConf.ML_FEATURE_NUM] = str(feature_num)
self.conf[MLConf.ML_FEATURE_NNZ] = str(feature_nzz)
self.conf[MLConf.ML_GBDT_TREE_NUM] = str(tree_num)
self.conf[MLConf.ML_GBDT_TREE_DEPTH] = str(tree_depth)
self.conf[MLConf.ML_GBDT_SPLIT_NUM] = str(split_num)
self.conf[MLConf.ML_GBDT_SAMPLE_RATIO] = str(sample_ratio)
self.conf[MLConf.ML_LEARN_RATE] = str(learn_rate)

cate_feat = "0:2,1:2,2:2,3:2,4:2,5:2,6:2,7:2,8:2,9:2,10:2,11:2,12:2,13:2,14:2,15:2,16:2,17:2,18:2,19:2,20:2," \
"21:2,22:2,23:2,24:2,25:2,26:2,27:2,28:2,29:2,30:2,31:2,32:2,33:2,34:2,35:2,36:2,37:2,38:2,39:2,40:2," \
"41:2,42:2,43:2,44:2,45:2,46:2,47:2,48:2,49:2,50:2,51:2,52:2,53:2,54:2,55:2,56:2,57:2,58:2,59:2,60:2," \
"61:2,62:2,63:2,64:2,65:2,66:2,67:2,68:2,69:2,70:2,71:2,72:2,73:2,74:2,75:2,76:2,77:2,78:2,79:2,80:2," \
"81:2,82:2,83:2,84:2,85:2,86:2,87:2,88:2,89:2,90:2,91:2,92:2,93:2,94:2,95:2,96:2,97:2,98:2,99:2,100:2," \
"101:2,102:2,103:2,104:2,105:2,106:2,107:2,108:2,109:2,110:2,111:2,112:2,113:2,114:2,115:2,116:2,117:2," \
"118:2,119:2,120:2,121:2,122:2,123:2,124:2,125:2,126:2"
params = {
AngelConf.ANGEL_DEPLOY_MODE:'LOCAL',
'mapred.mapper.new-api':True,
AngelConf.ANGEL_INPUTFORMAT_CLASS:'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat',
AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST:True,
AngelConf.ANGEL_WORKERGROUP_NUMBER:1,
AngelConf.ANGEL_WORKER_TASK_NUMBER:1,
AngelConf.ANGEL_PS_NUMBER:1,
MLConf.ML_DATA_FORMAT:'libsvm',
MLConf.ML_FEATURE_NUM:127,
MLConf.ML_FEATURE_NNZ:25,
MLConf.ML_GBDT_TREE_NUM:2,
MLConf.ML_GBDT_TREE_DEPTH:2,
MLConf.ML_GBDT_SPLIT_NUM:10,
MLConf.ML_GBDT_SAMPLE_RATIO:1.0,
MLConf.ML_LEARN_RATE:0.01
AngelConf.ANGEL_DEPLOY_MODE: 'LOCAL',
'mapred.mapper.new-api': True,
AngelConf.ANGEL_INPUTFORMAT_CLASS: 'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat',
AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST: True,
AngelConf.ANGEL_WORKERGROUP_NUMBER: 1,
AngelConf.ANGEL_WORKER_TASK_NUMBER: 1,
AngelConf.ANGEL_PS_NUMBER: 1,
MLConf.ML_DATA_FORMAT: 'libsvm',
MLConf.ML_FEATURE_NUM: 127,
MLConf.ML_FEATURE_NNZ: 25,
MLConf.ML_GBDT_TREE_NUM: 2,
MLConf.ML_GBDT_TREE_DEPTH: 2,
MLConf.ML_GBDT_SPLIT_NUM: 10,
MLConf.ML_GBDT_SAMPLE_RATIO: 1.0,
MLConf.ML_LEARN_RATE: 0.01,
MLConf.ML_GBDT_CATE_FEAT: cate_feat
}

self.conf.load(params)
self.conf.update(params)

def train(self):
self.set_conf()

LOCAL_FS = LocalFileSystem.DEFAULT_FS
TMP_PATH = tempfile.gettempdir()
save_path = LOCAL_FS + TMP_PATH + "/model"
log_path = LOCAL_FS + TMP_PATH + "/GBDTlog"
local_fs = LocalFileSystem.DEFAULT_FS
tmp_path = tempfile.gettempdir()
save_path = local_fs + tmp_path + "/model"
log_path = local_fs + tmp_path + "/GBDTlog"
input_path = "data/exampledata/GBDTLocalExampleData/agaricus.txt.train"
output_path = "data/output"

Expand All @@ -121,18 +87,16 @@ def train(self):
runner = GBDTRunner()
runner.train(self.conf)



def predict(self):
self.set_conf()
# Load Model from HDFS.
TMP_PATH = tempfile.gettempdir()
self.conf["gbdt.split.feature"] = TMP_PATH + "/out/xxx"
self.conf["gbdt.split.value"] = TMP_PATH + "/out/xxx"
tmp_path = tempfile.gettempdir()
self.conf["gbdt.split.feature"] = tmp_path + "/out/xxx"
self.conf["gbdt.split.value"] = tmp_path + "/out/xxx"

runner = GBDTRunner()

runner.predict(conf)
runner.predict(self.conf)


example = GBDTExample()
Expand Down
40 changes: 36 additions & 4 deletions docs/tutorials/pyangel_quick_start.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ PyAngel支持**交互式**和**脚本式**两种提交任务的模式,而每

- **交互式**

* **local模式**
* **Local模式**

```bash
bin/pyangel local
Expand All @@ -31,7 +31,7 @@ PyAngel支持**交互式**和**脚本式**两种提交任务的模式,而每

- **脚本式**

- **local模式**
- **Local模式**

```bash
bin/angel-local-submit --angel.pyangel.pyfile ${ANGEL_HOME}/python/examples/gbdt_local_example/py
Expand Down Expand Up @@ -99,8 +99,40 @@ PyAngel支持**交互式**和**脚本式**两种提交任务的模式,而每
runner = GBDTRunner()
runner.train(conf)
```

* [完整代码](../../angel-ps/examples/src/main/python/gbdt_example.py)
或者也可以通过创建一个字典的方式将参数传入:
```python
cate_feat = "0:2,1:2,2:2,3:2,4:2,5:2,6:2,7:2,8:2,9:2,10:2,11:2,12:2,13:2,14:2,15:2,16:2,17:2,18:2,19:2,20:2," \
"21:2,22:2,23:2,24:2,25:2,26:2,27:2,28:2,29:2,30:2,31:2,32:2,33:2,34:2,35:2,36:2,37:2,38:2,39:2,40:2," \
"41:2,42:2,43:2,44:2,45:2,46:2,47:2,48:2,49:2,50:2,51:2,52:2,53:2,54:2,55:2,56:2,57:2,58:2,59:2,60:2," \
"61:2,62:2,63:2,64:2,65:2,66:2,67:2,68:2,69:2,70:2,71:2,72:2,73:2,74:2,75:2,76:2,77:2,78:2,79:2,80:2," \
"81:2,82:2,83:2,84:2,85:2,86:2,87:2,88:2,89:2,90:2,91:2,92:2,93:2,94:2,95:2,96:2,97:2,98:2,99:2,100:2," \
"101:2,102:2,103:2,104:2,105:2,106:2,107:2,108:2,109:2,110:2,111:2,112:2,113:2,114:2,115:2,116:2,117:2," \
"118:2,119:2,120:2,121:2,122:2,123:2,124:2,125:2,126:2"
params = {
AngelConf.ANGEL_DEPLOY_MODE: 'LOCAL',
'mapred.mapper.new-api': True,
AngelConf.ANGEL_INPUTFORMAT_CLASS: 'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat',
AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST: True,
AngelConf.ANGEL_WORKERGROUP_NUMBER: 1,
AngelConf.ANGEL_WORKER_TASK_NUMBER: 1,
AngelConf.ANGEL_PS_NUMBER: 1,
MLConf.ML_DATA_FORMAT: 'libsvm',
MLConf.ML_FEATURE_NUM: 127,
MLConf.ML_FEATURE_NNZ: 25,
MLConf.ML_GBDT_TREE_NUM: 2,
MLConf.ML_GBDT_TREE_DEPTH: 2,
MLConf.ML_GBDT_SPLIT_NUM: 10,
MLConf.ML_GBDT_SAMPLE_RATIO: 1.0,
MLConf.ML_LEARN_RATE: 0.01,
MLConf.ML_GBDT_CATE_FEAT: cate_feat
}
self.conf.update(params)
runner = GBDTRunner()
runner.train(conf)
```

* [完整代码](../../angel-ps/examples/src/main/python/gbdt_local_example.py)

### 新版本

Expand Down
32 changes: 32 additions & 0 deletions docs/tutorials/pyangel_quick_start_en.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,38 @@ Run `bin/pyangel local` to start PyAngel local interactive command-line, then in
runner = GBDTRunner()
runner.train(conf)
```
Also, you can use a dict to set all the params Angel need:
```python
cate_feat = "0:2,1:2,2:2,3:2,4:2,5:2,6:2,7:2,8:2,9:2,10:2,11:2,12:2,13:2,14:2,15:2,16:2,17:2,18:2,19:2,20:2," \
"21:2,22:2,23:2,24:2,25:2,26:2,27:2,28:2,29:2,30:2,31:2,32:2,33:2,34:2,35:2,36:2,37:2,38:2,39:2,40:2," \
"41:2,42:2,43:2,44:2,45:2,46:2,47:2,48:2,49:2,50:2,51:2,52:2,53:2,54:2,55:2,56:2,57:2,58:2,59:2,60:2," \
"61:2,62:2,63:2,64:2,65:2,66:2,67:2,68:2,69:2,70:2,71:2,72:2,73:2,74:2,75:2,76:2,77:2,78:2,79:2,80:2," \
"81:2,82:2,83:2,84:2,85:2,86:2,87:2,88:2,89:2,90:2,91:2,92:2,93:2,94:2,95:2,96:2,97:2,98:2,99:2,100:2," \
"101:2,102:2,103:2,104:2,105:2,106:2,107:2,108:2,109:2,110:2,111:2,112:2,113:2,114:2,115:2,116:2,117:2," \
"118:2,119:2,120:2,121:2,122:2,123:2,124:2,125:2,126:2"
params = {
AngelConf.ANGEL_DEPLOY_MODE: 'LOCAL',
'mapred.mapper.new-api': True,
AngelConf.ANGEL_INPUTFORMAT_CLASS: 'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat',
AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST: True,
AngelConf.ANGEL_WORKERGROUP_NUMBER: 1,
AngelConf.ANGEL_WORKER_TASK_NUMBER: 1,
AngelConf.ANGEL_PS_NUMBER: 1,
MLConf.ML_DATA_FORMAT: 'libsvm',
MLConf.ML_FEATURE_NUM: 127,
MLConf.ML_FEATURE_NNZ: 25,
MLConf.ML_GBDT_TREE_NUM: 2,
MLConf.ML_GBDT_TREE_DEPTH: 2,
MLConf.ML_GBDT_SPLIT_NUM: 10,
MLConf.ML_GBDT_SAMPLE_RATIO: 1.0,
MLConf.ML_LEARN_RATE: 0.01,
MLConf.ML_GBDT_CATE_FEAT: cate_feat
}
self.conf.update(params)
runner = GBDTRunner()
runner.train(conf)
```

* [Complete Code](../../angel-ps/examples/src/main/python/gbdt_example.py)

Expand Down

0 comments on commit 0403ffd

Please sign in to comment.