Merge pull request FederatedAI#2049 from FederatedAI/develop-1.5

Develop 1.5
cpturing · Oct 11, 2020 · 63d24d8 · 63d24d8
2 parents ece1d48 + 16c8f6f
commit 63d24d8
Show file tree

Hide file tree

Showing 65 changed files with 2,631 additions and 645 deletions.
diff --git a/examples/benchmark_quality/hetero_fast_sbt/__init__.py b/examples/benchmark_quality/hetero_fast_sbt/__init__.py
diff --git a/examples/benchmark_quality/hetero_fast_sbt/fate-fast-sbt.py b/examples/benchmark_quality/hetero_fast_sbt/fate-fast-sbt.py
@@ -0,0 +1,118 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+import argparse
+
+from pipeline.backend.pipeline import PipeLine
+from pipeline.component.dataio import DataIO
+from pipeline.component.hetero_fast_secureboost import HeteroFastSecureBoost
+from pipeline.component.intersection import Intersection
+from pipeline.component.reader import Reader
+from pipeline.interface.data import Data
+from pipeline.component.evaluation import Evaluation
+from pipeline.interface.model import Model
+from pipeline.utils.tools import load_job_config
+from pipeline.utils.tools import JobConfig
+
+
+def main(config="../../config.yaml", param="./xgb_config_binary.yaml", namespace=""):
+    # obtain config
+    if isinstance(config, str):
+        config = load_job_config(config)
+
+    if isinstance(param, str):
+        param = JobConfig.load_from_file(param)
+
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+
+    backend = config.backend
+    work_mode = config.work_mode
+
+    # data sets
+    guest_train_data = {"name": param['data_guest_train'], "namespace": f"experiment{namespace}"}
+    host_train_data = {"name": param['data_host_train'], "namespace": f"experiment{namespace}"}
+    guest_validate_data = {"name": param['data_guest_val'], "namespace": f"experiment{namespace}"}
+    host_validate_data = {"name": param['data_host_val'], "namespace": f"experiment{namespace}"}
+
+    # init pipeline
+    pipeline = PipeLine().set_initiator(role="guest", party_id=guest).set_roles(guest=guest, host=host,)
+
+    # set data reader and data-io
+
+    reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1")
+    reader_0.get_party_instance(role="guest", party_id=guest).algorithm_param(table=guest_train_data)
+    reader_0.get_party_instance(role="host", party_id=host).algorithm_param(table=host_train_data)
+    reader_1.get_party_instance(role="guest", party_id=guest).algorithm_param(table=guest_validate_data)
+    reader_1.get_party_instance(role="host", party_id=host).algorithm_param(table=host_validate_data)
+
+    dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name="dataio_1")
+
+    dataio_0.get_party_instance(role="guest", party_id=guest).algorithm_param(with_label=True, output_format="dense")
+    dataio_0.get_party_instance(role="host", party_id=host).algorithm_param(with_label=False)
+    dataio_1.get_party_instance(role="guest", party_id=guest).algorithm_param(with_label=True, output_format="dense")
+    dataio_1.get_party_instance(role="host", party_id=host).algorithm_param(with_label=False)
+
+    # data intersect component
+    intersect_0 = Intersection(name="intersection_0")
+    intersect_1 = Intersection(name="intersection_1")
+
+    # secure boost component
+    hetero_fast_sbt_0 = HeteroFastSecureBoost(name="hetero_fast_sbt_0",
+                                              num_trees=param['tree_num'],
+                                              task_type=param['task_type'],
+                                              objective_param={"objective": param['loss_func']},
+                                              encrypt_param={"method": "iterativeAffine"},
+                                              tree_param={"max_depth": param['tree_depth']},
+                                              validation_freqs=1,
+                                              learning_rate=param['learning_rate'],
+                                              guest_depth=param['guest_depth'],
+                                              host_depth=param['host_depth'],
+                                              tree_num_per_party=param['tree_num_per_party'],
+                                              work_mode=param['work_mode']
+                                              )
+
+    # evaluation component
+    evaluation_0 = Evaluation(name="evaluation_0", eval_type=param['eval_type'])
+
+    pipeline.add_component(reader_0)
+    pipeline.add_component(reader_1)
+    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
+    pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model))
+    pipeline.add_component(intersect_0, data=Data(data=dataio_0.output.data))
+    pipeline.add_component(intersect_1, data=Data(data=dataio_1.output.data))
+    pipeline.add_component(hetero_fast_sbt_0, data=Data(train_data=intersect_0.output.data,
+                                                            validate_data=intersect_1.output.data))
+    pipeline.add_component(evaluation_0, data=Data(data=hetero_fast_sbt_0.output.data))
+
+    pipeline.compile()
+    pipeline.fit(backend=backend, work_mode=work_mode)
+
+    return {}, pipeline.get_component("evaluation_0").get_summary()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("BENCHMARK-QUALITY PIPELINE JOB")
+    parser.add_argument("-config", type=str,
+                        help="config file")
+    parser.add_argument("-param", type=str,
+                        help="config file for params")
+    args = parser.parse_args()
+    if args.config is not None:
+        main(args.config, args.param)
+    else:
+        main()
diff --git a/examples/benchmark_quality/hetero_fast_sbt/fate_fast_sbt_layered_binary.yaml b/examples/benchmark_quality/hetero_fast_sbt/fate_fast_sbt_layered_binary.yaml
@@ -0,0 +1,14 @@
+data_guest_train: "breast_hetero_guest"
+data_guest_val: "breast_hetero_guest"
+data_host_train: "breast_hetero_host"
+data_host_val: "breast_hetero_host"
+eval_type: "binary"
+task_type: "classification"
+loss_func: "cross_entropy"
+tree_depth: 3
+tree_num: 10
+learning_rate: 0.1
+work_mode: "layered"
+tree_num_per_party: 1
+guest_depth: 1
+host_depth: 2
diff --git a/examples/benchmark_quality/hetero_fast_sbt/fate_fast_sbt_layered_eplison_5k.yaml b/examples/benchmark_quality/hetero_fast_sbt/fate_fast_sbt_layered_eplison_5k.yaml
@@ -0,0 +1,14 @@
+data_guest_train: "epsilon_5k_hetero_guest"
+data_guest_val: "epsilon_5k_hetero_guest"
+data_host_train: "epsilon_5k_hetero_host"
+data_host_val: "epsilon_5k_hetero_host"
+eval_type: "binary"
+task_type: "classification"
+loss_func: "cross_entropy"
+tree_depth: 3
+tree_num: 10
+learning_rate: 0.1
+work_mode: "layered"
+tree_num_per_party: 1
+guest_depth: 1
+host_depth: 2
diff --git a/examples/benchmark_quality/hetero_fast_sbt/fate_fast_sbt_layered_multi.yaml b/examples/benchmark_quality/hetero_fast_sbt/fate_fast_sbt_layered_multi.yaml
@@ -0,0 +1,14 @@
+data_guest_train: "vehicle_scale_hetero_guest"
+data_guest_val: "vehicle_scale_hetero_guest"
+data_host_train: "vehicle_scale_hetero_host"
+data_host_val: "vehicle_scale_hetero_host"
+eval_type: "multi"
+task_type: "classification"
+loss_func: "cross_entropy"
+tree_depth: 3
+tree_num: 10
+learning_rate: 0.1
+work_mode: "layered"
+tree_num_per_party: 1
+guest_depth: 1
+host_depth: 2
diff --git a/examples/benchmark_quality/hetero_fast_sbt/fate_fast_sbt_layered_regression.yaml b/examples/benchmark_quality/hetero_fast_sbt/fate_fast_sbt_layered_regression.yaml
@@ -0,0 +1,14 @@
+data_guest_train: "student_hetero_guest"
+data_guest_val: "student_hetero_guest"
+data_host_train: "student_hetero_host"
+data_host_val: "student_hetero_host"
+eval_type: "regression"
+task_type: "regression"
+loss_func: "lse"
+tree_depth: 3
+tree_num: 10
+learning_rate: 0.1
+work_mode: "layered"
+tree_num_per_party: 1
+guest_depth: 1
+host_depth: 2
diff --git a/examples/benchmark_quality/hetero_fast_sbt/fate_fast_sbt_mix_binary.yaml b/examples/benchmark_quality/hetero_fast_sbt/fate_fast_sbt_mix_binary.yaml
@@ -0,0 +1,14 @@
+data_guest_train: "breast_hetero_guest"
+data_guest_val: "breast_hetero_guest"
+data_host_train: "breast_hetero_host"
+data_host_val: "breast_hetero_host"
+eval_type: "binary"
+task_type: "classification"
+loss_func: "cross_entropy"
+tree_depth: 3
+tree_num: 10
+learning_rate: 0.1
+work_mode: "mix"
+tree_num_per_party: 1
+guest_depth: 1
+host_depth: 2
diff --git a/examples/benchmark_quality/hetero_fast_sbt/fate_fast_sbt_mix_eplison_5k.yaml b/examples/benchmark_quality/hetero_fast_sbt/fate_fast_sbt_mix_eplison_5k.yaml
@@ -0,0 +1,14 @@
+data_guest_train: "epsilon_5k_hetero_guest"
+data_guest_val: "epsilon_5k_hetero_guest"
+data_host_train: "epsilon_5k_hetero_host"
+data_host_val: "epsilon_5k_hetero_host"
+eval_type: "binary"
+task_type: "classification"
+loss_func: "cross_entropy"
+tree_depth: 3
+tree_num: 10
+learning_rate: 0.1
+work_mode: "mix"
+tree_num_per_party: 1
+guest_depth: 1
+host_depth: 2
diff --git a/examples/benchmark_quality/hetero_fast_sbt/fate_fast_sbt_mix_multi.yaml b/examples/benchmark_quality/hetero_fast_sbt/fate_fast_sbt_mix_multi.yaml
@@ -0,0 +1,14 @@
+data_guest_train: "vehicle_scale_hetero_guest"
+data_guest_val: "vehicle_scale_hetero_guest"
+data_host_train: "vehicle_scale_hetero_host"
+data_host_val: "vehicle_scale_hetero_host"
+eval_type: "multi"
+task_type: "classification"
+loss_func: "cross_entropy"
+tree_depth: 3
+tree_num: 10
+learning_rate: 0.1
+work_mode: "mix"
+tree_num_per_party: 1
+guest_depth: 1
+host_depth: 2
diff --git a/examples/benchmark_quality/hetero_fast_sbt/fate_fast_sbt_mix_regression.yaml b/examples/benchmark_quality/hetero_fast_sbt/fate_fast_sbt_mix_regression.yaml
@@ -0,0 +1,14 @@
+data_guest_train: "student_hetero_guest"
+data_guest_val: "student_hetero_guest"
+data_host_train: "student_hetero_host"
+data_host_val: "student_hetero_host"
+eval_type: "regression"
+task_type: "regression"
+loss_func: "lse"
+tree_depth: 3
+tree_num: 10
+learning_rate: 0.1
+work_mode: "mix"
+tree_num_per_party: 1
+guest_depth: 1
+host_depth: 2
diff --git a/examples/benchmark_quality/hetero_fast_sbt/hetero_fast_sbt_xgboost_benchmark.json b/examples/benchmark_quality/hetero_fast_sbt/hetero_fast_sbt_xgboost_benchmark.json
@@ -0,0 +1,156 @@
+{
+    "data": [
+        {
+            "file": "data/vehicle_scale_hetero_host.csv",
+            "head": 1,
+            "partition": 16,
+            "table_name": "vehicle_scale_hetero_host",
+            "namespace": "experiment",
+            "role": "host_0"
+        },
+        {
+            "file": "data/vehicle_scale_hetero_guest.csv",
+            "head": 1,
+            "partition": 16,
+            "table_name": "vehicle_scale_hetero_guest",
+            "namespace": "experiment",
+            "role": "guest_0"
+        },
+        {
+            "file": "data/breast_hetero_host.csv",
+            "head": 1,
+            "partition": 16,
+            "table_name": "breast_hetero_host",
+            "namespace": "experiment",
+            "role": "host_0"
+        },
+        {
+            "file": "data/breast_hetero_guest.csv",
+            "head": 1,
+            "partition": 16,
+            "table_name": "breast_hetero_guest",
+            "namespace": "experiment",
+            "role": "guest_0"
+        },
+                {
+            "file": "data/student_hetero_host.csv",
+            "head": 1,
+            "partition": 16,
+            "table_name": "student_hetero_host",
+            "namespace": "experiment",
+            "role": "host_0"
+        },
+        {
+            "file": "data/student_hetero_guest.csv",
+            "head": 1,
+            "partition": 16,
+            "table_name": "student_hetero_guest",
+            "namespace": "experiment",
+            "role": "guest_0"
+        }
+    ],
+    "mix-binary": {
+        "local": {
+            "script": "./xgboost-sbt-binary.py",
+            "conf": "./xgb_config_binary.yaml"
+        },
+        "pipeline": {
+            "script": "./fate-fast-sbt.py",
+            "conf": "./fate_fast_sbt_mix_binary.yaml"
+        },
+        "compare_setting": {
+            "relative_tol": 1e-3
+        }
+    },
+    "mix-binary-epsilon-5k": {
+        "local": {
+            "script": "./xgboost-sbt-binary.py",
+            "conf": "./xgb_config_binary.yaml"
+        },
+        "pipeline": {
+            "script": "./fate-fast-sbt.py",
+            "conf": "./fate_fast_sbt_mix_epsilon_5k.yaml"
+        },
+        "compare_setting": {
+            "relative_tol": 1e-3
+        }
+    },
+    "mix-multi": {
+        "local": {
+            "script": "./xgboost-sbt-multi.py",
+            "conf": "./xgb_config_multi.yaml"
+        },
+        "pipeline": {
+            "script": "./fate-fast-sbt.py",
+            "conf": "./fate_fast_sbt_mix_multi.yaml"
+        },
+        "compare_setting": {
+            "relative_tol": 1e-3
+        }
+    },
+    "mix-regression": {
+        "local": {
+            "script": "./xgboost-sbt-regression.py",
+            "conf": "./xgb_config_reg.yaml"
+        },
+        "pipeline": {
+            "script": "./fate-fast-sbt.py",
+            "conf": "./fate_fast_sbt_mix_regression.yaml"
+        },
+        "compare_setting": {
+            "relative_tol": 1e-3
+        }
+    },
+    "layered-binary": {
+        "local": {
+            "script": "./xgboost-sbt-binary.py",
+            "conf": "./xgb_config_binary.yaml"
+        },
+        "pipeline": {
+            "script": "./fate-fast-sbt.py",
+            "conf": "./fate_fast_sbt_layered_binary.yaml"
+        },
+        "compare_setting": {
+            "relative_tol": 1e-3
+        }
+    },
+    "layered-multi": {
+        "local": {
+            "script": "./xgboost-sbt-multi.py",
+            "conf": "./xgb_config_multi.yaml"
+        },
+        "pipeline": {
+            "script": "./fate-fast-sbt.py",
+            "conf": "./fate_fast_sbt_layered_multi.yaml"
+        },
+        "compare_setting": {
+            "relative_tol": 1e-3
+        }
+    },
+    "layered-regression": {
+        "local": {
+            "script": "./xgboost-sbt-regression.py",
+            "conf": "./xgb_config_reg.yaml"
+        },
+        "pipeline": {
+            "script": "./fate-fast-sbt.py",
+            "conf": "./fate_fast_sbt_layered_regression.yaml"
+        },
+        "compare_setting": {
+            "relative_tol": 1e-3
+        }
+    },
+    "mix-layered-epsilon-5k": {
+        "local": {
+            "script": "./xgboost-sbt-binary.py",
+            "conf": "./xgb_config_binary.yaml"
+        },
+        "pipeline": {
+            "script": "./fate-fast-sbt.py",
+            "conf": "./fate_fast_sbt_layered_epsilon_5k.yaml"
+        },
+        "compare_setting": {
+            "relative_tol": 1e-3
+        }
+    }
+}