make develop code to update

skchina · Feb 25, 2019 · 36f544d · 36f544d
1 parent 675b52e
commit 36f544d
Show file tree

Hide file tree

Showing 24 changed files with 294 additions and 91 deletions.
diff --git a/README.md b/README.md
@@ -18,10 +18,10 @@ FATE $ docker exec -t -i ${CONTAINER_ID} bash
 There are a few algorithms under `examples/` folder, try them out!
 
 ##### Manual version
-
- > FATE (venv) $ pip install -r requirementes.txt
- > FATE (venv) $ export PYTHONPATH=`pwd -P`
-
+```
+FATE (venv) $ pip install -r requirements.txt
+FATE (venv) $ export PYTHONPATH=`pwd -P`
+```
 
 #### Cluster
 FATE also provides a distributed runtime architecture for Big Data scenario. Migration from standalone to cluster requires configuration change only. No algorithm change is needed. 

diff --git a/RELEASE.md b/RELEASE.md
@@ -3,9 +3,39 @@
 Initial release of FATE.
 
 ## Major Features
-*   Intesection
-*   LogisticRegression
-*   SecureBoost
-*   Secure Federated Transfer Learning
-*   Standalone & Distributed Computing
+> WorkFlow
+*	Support Intersection workflow
+*	Support Train workflow
+*	Support Predict workflow
+*	Support Validation workflow
+*	Support Model Load and Save workflow
+
+> FederatedML
+*	Support Distributed Secure Intersection and Raw Intersection for Sample Alignment
+*   Support Distributed Homogeneous LR and Heterogeneous LR
+*   Support Distributed SecureBoost
+*   Support Distributed Secure Federated Transfer Learning
+*   Support Binary and Multi-Class Evaluation
+*   Support Model Cross-Validation
+*	Supprt Mini-Batch
+*   Support L1, L2 Regularizers
+*   Support Multi-Party Homogeneous FederatedAggregator
+*	Support Multi-Party Heterogeneous FederatedAggregator
+*	Support Partially Homomorphic Encryption MPC Protocol
+
+
+> Architecture
+* Initial release of Computing APIs
+* Initial release of Storage APIs
+* Initial release of Federation APIs
+* Initial release of cross-site network communication (i.e. 'Federation')
+* Initial release of Standalone runtime, including computing engine and k-v storage
+* Initial release of Distributed runtime, including distributed computing engine, distributed k-v storage, metadata management and intra-site/cross-site network communication
+* Support cross-site heterogenous infrastructure
+* Initial support of modeling and inference
+
+
+> Deploy
+*	Support standalone (docker & manual) deployment
+*   Support cluster deployment
 
diff --git a/arch/api/standalone/eggroll.py b/arch/api/standalone/eggroll.py
@@ -76,18 +76,18 @@ def _evict(_, env):
 
 
 @cached(cache=cache_utils.EvictTTLCache(maxsize=64, ttl=3600, evict=_evict))
-def _open_env(path):
+def _open_env(path, write=False):
     os.makedirs(path, exist_ok=True)
-    return lmdb.open(path, create=True, max_dbs=1, max_readers=1024, sync=False, map_size=10_737_418_240)
+    return lmdb.open(path, create=True, max_dbs=1, max_readers=1024, lock=write, sync=False, map_size=10_737_418_240)
 
 
 def _get_db_path(*args):
     return os.sep.join([Standalone.get_instance().data_dir, *args])
 
 
-def _get_env(*args):
+def _get_env(*args, write=False):
     _path = _get_db_path(*args)
-    return _open_env(_path)
+    return _open_env(_path, write=write)
 
 
 def _hash_key_to_partition(key, partitions):
@@ -121,8 +121,8 @@ def __init__(self, _type, namespace, name, partition):
     def __str__(self):
         return _get_db_path(self._type, self._namespace, self._name, str(self._partition))
 
-    def as_env(self):
-        return _get_env(self._type, self._namespace, self._name, str(self._partition))
+    def as_env(self, write=False):
+        return _get_env(self._type, self._namespace, self._name, str(self._partition), write=write)
 
 
 class _UnaryProcess:
@@ -159,7 +159,7 @@ def do_map(p: _UnaryProcess):
     txn_map = {}
     partitions = Standalone.get_instance().meta_table.get(_table_key)
     for p in range(partitions):
-        env = _get_env(rtn._type, rtn._namespace, rtn._name, str(p))
+        env = _get_env(rtn._type, rtn._namespace, rtn._name, str(p), write=True)
         txn = env.begin(write=True)
         txn_map[p] = txn
     with source_env.begin() as source_txn:
@@ -182,7 +182,7 @@ def do_map_partitions(p: _UnaryProcess):
     op = p._operand
     rtn = _Operand(StoreType.IN_MEMORY.value, p._info._task_id, p._info._function_id, op._partition)
     source_env = op.as_env()
-    dst_env = rtn.as_env()
+    dst_env = rtn.as_env(write=True)
     serialize = c_pickle.dumps
     with source_env.begin() as source_txn:
         with dst_env.begin(write=True) as dst_txn:
@@ -200,7 +200,7 @@ def do_map_values(p: _UnaryProcess):
     op = p._operand
     rtn = _Operand(StoreType.IN_MEMORY.value, p._info._task_id, p._info._function_id, op._partition)
     source_env = op.as_env()
-    dst_env = rtn.as_env()
+    dst_env = rtn.as_env(write=True)
     serialize = c_pickle.dumps
     deserialize = c_pickle.loads
     with source_env.begin() as source_txn:
@@ -221,7 +221,7 @@ def do_join(p: _BinaryProcess):
     rtn = _Operand(StoreType.IN_MEMORY.value, p._info._task_id, p._info._function_id, left_op._partition)
     right_env = right_op.as_env()
     left_env = left_op.as_env()
-    dst_env = rtn.as_env()
+    dst_env = rtn.as_env(write=True)
     serialize = c_pickle.dumps
     deserialize = c_pickle.loads
     with left_env.begin() as left_txn:
@@ -260,7 +260,7 @@ def do_glom(p: _UnaryProcess):
     op = p._operand
     rtn = _Operand(StoreType.IN_MEMORY.value, p._info._task_id, p._info._function_id, op._partition)
     source_env = op.as_env()
-    dst_env = rtn.as_env()
+    dst_env = rtn.as_env(write=True)
     serialize = c_pickle.dumps
     deserialize = c_pickle.loads
     with source_env.begin() as source_txn:
@@ -280,7 +280,7 @@ def do_sample(p: _UnaryProcess):
     op = p._operand
     rtn = _Operand(StoreType.IN_MEMORY.value, p._info._task_id, p._info._function_id, op._partition)
     source_env = op.as_env()
-    dst_env = rtn.as_env()
+    dst_env = rtn.as_env(write=True)
     deserialize = c_pickle.loads
     fraction, seed = deserialize(p._info._function_bytes)
     with source_env.begin() as source_txn:

diff --git a/arch/core/pom.xml b/arch/core/pom.xml
@@ -33,7 +33,7 @@
 
     <properties>
 
-        <jackson.version>2.9.7</jackson.version>
+        <jackson.version>2.9.8</jackson.version>
 
         <mysql.connector.version>8.0.13</mysql.connector.version>
         <mybatis-generator-plugin.version>1.3.7</mybatis-generator-plugin.version>
@@ -121,4 +121,4 @@
         </plugins>
     </build>
 
-</project>
+</project>
diff --git a/arch/networking/proxy/src/main/resources/route_tables/route_table.json b/arch/networking/proxy/src/main/resources/route_tables/route_table.json
@@ -15,17 +15,17 @@
           "port": 8889
         }
       ]
+    },
+    "9999": {
+      "default": [
+        {
+          "ip": "127.0.0.1",
+          "port": 8890
+        }
+      ]
     }
   },
-  "9999": {
-    "default": [
-      {
-        "ip": "127.0.0.1",
-        "port": 8890
-      }
-    ]
-  },
   "permission": {
     "default_allow": true
   }
-}
+}
diff --git a/cluster-deploy/README.md b/cluster-deploy/README.md
@@ -1,5 +1,4 @@
 # Build
--
 
 ## 1. Checkout from git
 ```
@@ -21,13 +20,29 @@ There is a script in this project that can ease this. It packs up target jars an
 
 You need to change `base_dir` and `output_dir` with respect to the arch directory of this project and output directory. 
 
+`base_dir` is the path of `FATE/arch` in your local environment, and `output_dir` is the dir where you want to put the packging output tar files.
+
 Then you can run the following command:
 
 ```
 bash packaging.sh
 ```
 If everything is ok, tar files can be found in `output_dir`. 
 
+Output tar file names are like `fate-${module}-${version}.tar.gz`. It contains `fate-${module}-${version}.jar` and a `lib/` dir which contains dependent libraries for the corresponding `fate-${module}-${version}.jar`. 
+
+[`cluster-deploy/example-dir-tree`](https://github.com/WeBankFinTech/FATE/tree/master/cluster-deploy/example-dir-tree) contains an example dir trees. You can extract each tar file in the corresponding dir.
+
+Let's take `federation` module as an example. After packaing for version 0.1, you get a `fate-federation-0.1.tar.gz`. Then you can perform the followings:
+
+1. `cd ${path-to-example-dir-tree}/federation`. 
+2. `tar xzf fate-federation-0.1.tar.gz`, so that tar file is extracted.
+3. `cp -r FATE/arch/driver/federation/src/main/resources/ conf/`, so that example configuration files are copied and a `conf` dir is created for them.
+4. Modify configuration files. See section 2.3 for more details.
+5. `ln -s fate-federation-0.1.jar fate-federation.jar` to create a symlink, removing version-specific dependency for other tools.
+
+You can perform the same to Java modules.
+
 ### 2.3. Configuration Files
 Although configuration path is flexible, we recommend users to organize them well.
 Example configuration files can be found under 
@@ -39,65 +54,116 @@ Users can find a detailed configuration document in
 ### 2.4. How to run
 Main function is named after module name. Currently we have following Main functions:
 
-Number | Module Name     | Main Function
--------|-----------------|---------------
-1      | federation      | com.webank.ai.fate.driver.Federation
-2      | meta-service    | com.webank.ai.fate.eggroll.MetaService
-3      | proxy           | com.webank.ai.fate.networking.Proxy
-4      | roll            | com.webank.ai.fate.eggroll.Roll
-5      | storage-service | com.webank.ai.fate.eggroll.StorageService
+Number | Module Name     | Main Function                              | Configuration Example
+-------|-----------------|--------------------------------------------|-----------------------------
+1      | federation      | com.webank.ai.fate.driver.Federation       | FATE/arch/driver/federation/src/main/resources/
+2      | meta-service    | com.webank.ai.fate.eggroll.MetaService     | FATE/arch/eggroll/meta-service/src/main/resources/
+3      | proxy           | com.webank.ai.fate.networking.Proxy        | FATE/arch/networking/proxy/src/main/resources/
+4      | roll            | com.webank.ai.fate.eggroll.Roll            | FATE/arch/eggroll/roll/src/main/resources/
+5      | storage-service | com.webank.ai.fate.eggroll.StorageService  | FATE/arch/eggroll/storage-service/src/main/resources/
 
 Please note that users should add directory of configuration files to Java's classpath, so that these configurations can be loaded.
 
-We provide example management scripts to run these services. Users can find them under [`cluster-deploy/example-dir-tree`](https://github.com/WeBankFinTech/FATE/tree/master/cluster-deploy/example-dir-tree), along with example directory tree described in section 5.
+We have also provided example management scripts to run these services. Users can find them under [`cluster-deploy/example-dir-tree`](https://github.com/WeBankFinTech/FATE/tree/master/cluster-deploy/example-dir-tree), along with example directory tree described in section 5.
 
 ## 3. Python Components
 
+### 3.1. Packaging
+
+```
+mkdir -p ${path-to-example-dir-tree}/python
+git archive -o ${path-to-example-dir-tree}/python/python.tar $(git rev-parse HEAD) arch/api federatedml workflow examples
+cd ${path-to-example-dir-tree}/python
+tar -xf python.tar
+```
+
+### 3.2. Configuration Files
+Configuration file path: 
+`python/arch/conf/server_conf.json`
+
+Users can find a detailed configuration document in 
+[`cluster-deploy/doc` ](https://github.com/WeBankFinTech/FATE/tree/master/cluster-deploy/doc)
+
+### 3.3 How to run
+
+```
+#enter virtual env fisrt
+(venv) $ pip install -r requirements.txt
+(venv) $ export PYTHONPATH=${path-to-example-dir-tree}/python
+(venv) $ python ${path-to-example-dir-tree}/python/processor/processor.py 2>&1 > ${path-to-example-dir-tree}/python/processor.out &
+```
+
 
 ## 4. How to Run in Cluster Mode
 Please refer to configuation guide [here](https://github.com/WeBankFinTech/FATE/tree/master/cluster-deploy/doc/configuration.md)
 
 
 ## 5. Example Directory Tree
--
+
 ```
 deploy-dir
+|
 |--- federation
 |    |- conf/
+|    |  |- applicationContext-federation.xml
+|    |  |- federation.properties
+|    |  |- log4j2.properties
+|    |
 |    |- lib/
 |    |- fate-federation-0.1.jar
 |    |- fate-federation.jar -> fate-fedaration-0.1.jar
 |
 |--- meta-service
 |    |- conf/
+|    |  |- applicationContext-meta-service.xml
+|    |  |- jdbc.properties
+|    |  |- log4j2.properties
+|    |  |- meta-service.properties
+|    |
 |    |- lib/
 |    |- fate-meta-service-0.1.jar
 |    |- fate-mata-service.jar -> fate-meta-service-0.1.jar
 |
 |--- proxy
 |    |- conf/
+|    |  |- applicationContext-proxy.xml
+|    |  |- log4j2.properties
+|    |  |- proxy.properties
+|    |  |- route_table.json
+|    |
 |    |- lib/
 |    |- fate-proxy-0.1.jar
 |    |- fate-proxy.jar -> fate-proxy-0.1.jar
 |
 |--- python
-|    |- arch --- |- api/
-|                |- conf/
-|                |- processor/              
+|    |- arch
+|    |  |- api/
+|    |  |- conf/
+|    |  |- processor/
+|    |
 |    |- federatedml/
 |    |- examples/
 |    |- workflow/
 |
 |--- roll
 |    |- conf/
+|    |  |- applicationContext-roll.xml
+|    |  |- log4j2.properties
+|    |  |- roll.properties
+|    |
 |    |- lib/
 |    |- fate-roll-0.1.jar
 |    |- fate-roll.jar -> fate-roll-0.1.jar
 |
 |--- storage-service
 |    |- conf/
+|    |  |- log4j2.properties
+|    |
 |    |- lib/
 |    |- fate-storage-service-0.1.jar
 |    |- fate-storage-service.jar -> fate-storage-service-0.1.jar
 
 ```
+
+## 6. Future works
+Deploy and build will be automated in future releases.
diff --git a/doc/AAAI_2019/GDPR_Data_Shortage_and_AI-AAAI_2019_PPT.pdf b/doc/AAAI_2019/GDPR_Data_Shortage_and_AI-AAAI_2019_PPT.pdf
diff --git a/examples/hetero_ftl/HOWTORUN.md b/examples/hetero_ftl/HOWTORUN.md
@@ -10,7 +10,7 @@ You can turn on the encryption version by setting the <b style="color:red">is_en
 
 #### Standalone vs Cluster
 
-You can run FTL algorithm on two different work modes: *standalone* mode and *cluster* mode. On standalone mode, host, guest and arbiter are running in one machine while on cluster mode they are running in multiple machines. Running algorithm on cluster mode requires some configuration. Please refer to this [article]() for more details.
+You can run FTL algorithm on two different work modes: *standalone* mode and *cluster* mode. On standalone mode, host, guest and arbiter are running in one machine while on cluster mode they are running in multiple machines. Running algorithm on cluster mode requires some configuration. Please refer to [`cluster-deploy`](https://github.com/WeBankFinTech/FATE/tree/master/cluster-deploy) for more details.
 
 You can turn on the cluster mode by setting the <b style="color:red">work_mode</b> parameter to 1. Otherwise set it to 0 (default). You can find this parameter in **guest_runtime_conf.json**, **host_runtime_conf.json**  and  **arbiter_runtime_conf.json** located in **examples/hetero_ftl/conf** folder.
 
@@ -86,7 +86,7 @@ For plain version, you only need to check logs for host and guest since arbiter
 For encryption version, in addition to above two logs, you may also want to check log for arbiter:
 
 * **hetero_ftl_arbiter.log**, records log information for arbiter side of running the FTL algorithm. 
-  * In encryption version of FTL algorithm, only arbiter knows the loss for each iteration. Therefore, you can check the change loss in this log file. 
+  * In encryption version of FTL algorithm, only arbiter knows the loss for each iteration. Therefore, you can check the change of loss in this log file. 
 
 If you run the FTL algorithm by using **sh run_ftl_plain_standalone.sh {job_id}** or **sh run_ftl_enc_standalone.sh {job_id}**, two or three logs would be generated under **examples/hetero_ftl/** folder:
 
@@ -109,7 +109,7 @@ For running FTL algorithm, we only need to know four sections of parameters.
       * predict: predict labels for samples from the host
     * *work_mode*: if 0, we would run FTL algorithm in standalone mode. if 1, we would run FTL algorithm in cluster mode.
 
-    > Host, guest and/or arbiter must have the value for *work_mode* in a particular job.
+    > Host, guest and/or arbiter must have the same value for *work_mode* in a particular job.
     
     > Host and guest must have the same value for *method* parameter in a particular job. Arbiter should always have the value of "train" for this parameter.