Merge pull request FederatedAI#1346 from FederatedAI/feature-1.4-add-…

…partition-note Feature 1.4 add partition note
pyisong · May 11, 2020 · dbe8d9c · dbe8d9c
2 parents a173fe1 + 3528ee0
commit dbe8d9c
Show file tree

Hide file tree

Showing 3 changed files with 64 additions and 10 deletions.
diff --git a/examples/federatedml-1.x-examples/README.rst b/examples/federatedml-1.x-examples/README.rst
@@ -1,3 +1,9 @@
+FATE Usage
+==========
+
+If you want to experience FATE quickly, we have provided you a quick start tool which can start a hetero-lr task quickly. After that, you are more than welcome to use provided configuration to experience algorithms listed. Before you upload and start training task, it is highly recommended that you read the configuration guide below.
+
+
 Quick Start
 ===========
 
@@ -123,8 +129,8 @@ Then all you need to do is running the following command:
 Please note this works only if you have finished the trainning task.
 
 
-Start Training Task
--------------------
+Start Training Task Manually
+============================
 
 There are three config files need to be prepared to build a algorithm model in FATE.
 
@@ -134,7 +140,7 @@ There are three config files need to be prepared to build a algorithm model in F
 
 
 Step1: Define upload data config file
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+-------------------------------------
 
 To make FATE be able to use your data, you need to upload them. Thus, a upload-data conf is needed. A sample file named "upload_data.json" has been provided in current folder.
 
@@ -146,6 +152,23 @@ To make FATE be able to use your data, you need to upload them. Thus, a upload-d
     4. table_name & namespace: Indicators for stored data table.
     5. work_mode: Indicate if using standalone version or cluster version. 0 represent for standalone version and 1 stand for cluster version.
 
+.. Note::
+    We suggest you fully consider the resource of modeling machines before setting partition number. The recommended partition number is related to your cores of machines, number of machines and concurrent number you want. Suppose you have
+        core_num per machine = n,
+        num of node(machine) = m,
+        your designed processors per node = p
+
+    Then
+        eggs = int(n * 0.8 / p)
+        partitions = egg * m
+    where
+        partitions = int(n * 0.8 / p) * m
+
+    For example, if your cores per machine is 20, node = 2 and you want to start 6 processor per node.
+    Then partitions = int(20 * 0.8 / 6) * 2 = 4
+
+    The way you set processor per node has been shown on step 3 below which specified submit runtime conf setting.
+
 
 Step2: Define your modeling task structure
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -196,6 +219,36 @@ This config file is used to config parameters for all components among every par
 3. role_parameters: Those parameters are differ from roles and roles are defined here separately. Please note each parameter are list, each element of which corresponds to a party in this role.
 4. algorithm_parameters: Those parameters are same among all parties are here.
 
+An example of config files can be shown as:
+
+    .. code-block::
+
+        {
+            "initiator": {
+                "role": "guest",
+                "party_id": 10000
+            },
+            "job_parameters": {
+                "work_mode": 1
+                "processor_per_node": 6
+            },
+            "role": {
+                "guest": [
+                    10000
+                ],
+                "host": [
+                    10000
+                ],
+                "arbiter": [
+                    10000
+                ]
+            },
+            "role_parameters": {"Your role parameters"},
+            "algorithm_parameters": {"Your algorithm parameters"},
+        }
+
+    You can set processor_per_node in job_parameters.
+
 Step4: Start Modeling Task
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -213,7 +266,7 @@ Step4: Start Modeling Task
         {
           "file": "examples/data/breast_b.csv",
           "head": 1,
-          "partition": 48,
+          "partition": 8,
           "work_mode": 0,
           "table_name": "hetero_breast_b",
           "namespace": "hetero_guest_breast"

diff --git a/examples/federatedml-1.x-examples/experiment/generate_mock_data.py b/examples/federatedml-1.x-examples/experiment/generate_mock_data.py
@@ -64,16 +64,16 @@ def generate_tag_data(ids):
         raise ValueError("len ids should equal to sample number")
 
     counter = 0
-    v_str = "0123456789abcd"
+    # v_str = "0123456789abcd"
     for sample_i in range(SAMPLE_NUM):
         one_data = [ids[sample_i]]
         for feature_i in range(FEATURE_NUM):
             tag = str(random.randint(TAG_INTERVAL[0], TAG_INTERVAL[1]))
-            value = ''
-            for i in range(VALUE_LENGTH):
-                value += v_str[int(random.random() * 14)]
-            tag_value = ":".join([tag, value])
-            one_data.append(tag_value)
+            # value = ''
+            # for i in range(VALUE_LENGTH):
+            #     value += v_str[int(random.random() * 14)]
+            # tag_value = ":".join([tag, value])
+            one_data.append(tag)
 
         counter += 1
         if counter % 10000 == 0:

diff --git a/examples/federatedml-1.x-examples/experiment/run_task.py b/examples/federatedml-1.x-examples/experiment/run_task.py
@@ -18,6 +18,7 @@
 
 fate_flow_path = home_dir + "/../../../fate_flow/fate_flow_client.py"
 
+# Should be one of "tag_integer_value", "tag", "tag_float_value", "tag_1" or "label"
 HOST_DATA_TYPE = 'tag_integer_value'
 
 intersect_output_name = ''