Merge pull request project-codeflare#29 from project-codeflare/fix_yref

raghukiran1224 · web-flow · commit 53f55426cde3 · 2021-07-23T09:45:21.000-04:00
Fix yref assignment for pipeline PREDICT and SCORE
diff --git a/codeflare/pipelines/Runtime.py b/codeflare/pipelines/Runtime.py
@@ -62,7 +62,7 @@ class ExecutionType(Enum):
 
 
 @ray.remote
-def execute_or_node_remote(node: dm.EstimatorNode, mode: ExecutionType, xy_ref: dm.XYRef):
+def execute_or_node_remote(node: dm.EstimatorNode, mode: ExecutionType, xy_ref: dm.XYRef, is_outputNode: bool):
     """
     Helper remote function that executes an OR node. As such, this is a remote task that runs the estimator
     in the provided mode with the data pointed to by XYRef. The key aspect to note here is the choice of input
@@ -107,9 +107,16 @@ def execute_or_node_remote(node: dm.EstimatorNode, mode: ExecutionType, xy_ref:
     elif mode == ExecutionType.SCORE:
         if base.is_classifier(estimator) or base.is_regressor(estimator):
             estimator = node.get_estimator()
-            res_Xref = ray.put(estimator.score(X, y))
-            result = dm.XYRef(res_Xref, xy_ref.get_yref(), prev_node_ptr, prev_node_ptr, [xy_ref])
-            return result
+            if is_outputNode:
+                score_ref = ray.put(estimator.score(X, y))
+                result = dm.XYRef(score_ref, score_ref, prev_node_ptr, prev_node_ptr, [xy_ref])
+                return result
+            else:
+                res_xy = estimator.score(xy_list)
+                res_xref = ray.put(res_xy.get_x())
+                res_yref = ray.put(res_xy.get_y())
+                result = dm.XYRef(res_xref, res_yref, prev_node_ptr, prev_node_ptr, Xyref_list)
+                return result
         else:
             res_Xref = ray.put(estimator.transform(X))
             result = dm.XYRef(res_Xref, xy_ref.get_yref(), prev_node_ptr, prev_node_ptr, [xy_ref])
@@ -118,16 +125,24 @@ def execute_or_node_remote(node: dm.EstimatorNode, mode: ExecutionType, xy_ref:
     elif mode == ExecutionType.PREDICT:
         # Test mode does not clone as it is a simple predict or transform
         if base.is_classifier(estimator) or base.is_regressor(estimator):
-            res_Xref = ray.put(estimator.predict(X))
-            result = dm.XYRef(res_Xref, xy_ref.get_yref(), prev_node_ptr, prev_node_ptr, [xy_ref])
-            return result
+            if is_outputNode:
+                predict_ref = ray.put(estimator.predict(X))
+                result = dm.XYRef(predict_ref, predict_ref, prev_node_ptr, prev_node_ptr, [xy_ref])
+                return result
+            else:
+                res_xy = estimator.predict(xy_list)
+                res_xref = ray.put(res_xy.get_x())
+                res_yref = ray.put(res_xy.get_y())
+
+                result = dm.XYRef(res_xref, res_yref, prev_node_ptr, prev_node_ptr, Xyref_list)
+                return result
         else:
             res_Xref = ray.put(estimator.transform(X))
             result = dm.XYRef(res_Xref, xy_ref.get_yref(), prev_node_ptr, prev_node_ptr, [xy_ref])
             return result
 
 
-def execute_or_node(node, pre_edges, edge_args, post_edges, mode: ExecutionType):
+def execute_or_node(node, pre_edges, edge_args, post_edges, mode: ExecutionType, is_outputNode):
     """
     Inner method that executes the estimator node parallelizing at the level of input objects. This defines the
     strategy of execution of the node, in this case, parallel for each object that is input. The function takes
@@ -147,7 +162,7 @@ def execute_or_node(node, pre_edges, edge_args, post_edges, mode: ExecutionType)
         exec_xyrefs = []
         for xy_ref_ptr in Xyref_ptrs:
             xy_ref = ray.get(xy_ref_ptr)
-            inner_result = execute_or_node_remote.remote(node, mode, xy_ref)
+            inner_result = execute_or_node_remote.remote(node, mode, xy_ref, is_outputNode)
             exec_xyrefs.append(inner_result)
 
         for post_edge in post_edges:
@@ -337,7 +352,7 @@ def execute_pipeline(pipeline: dm.Pipeline, mode: ExecutionType, pipeline_input:
             pre_edges = pipeline.get_pre_edges(node)
             post_edges = pipeline.get_post_edges(node)
             if node.get_node_input_type() == dm.NodeInputType.OR:
-                execute_or_node(node, pre_edges, edge_args, post_edges, mode)
+                execute_or_node(node, pre_edges, edge_args, post_edges, mode, pipeline.is_output(node))
             elif node.get_node_input_type() == dm.NodeInputType.AND:
                 execute_and_node(node, pre_edges, edge_args, post_edges, mode)
 
@@ -662,4 +677,4 @@ def save(pipeline_output: dm.PipelineOutput, xy_ref: dm.XYRef, filehandle):
     :return: None
     """
     pipeline = select_pipeline(pipeline_output, xy_ref)
-    pipeline.save(filehandle)
+    pipeline.save(filehandle)
diff --git a/codeflare/pipelines/tests/test_pipeline_predict.py b/codeflare/pipelines/tests/test_pipeline_predict.py
@@ -77,8 +77,8 @@ def test_pipeline_predict():
 
 	predict_clf_output = predict_output.get_xyrefs(node_clf)
 
-	#y_pred = ray.get(predict_clf_output[0].get_yref())
-	y_pred = ray.get(predict_clf_output[0].get_Xref())
+	y_pred = ray.get(predict_clf_output[0].get_yref())
+	#y_pred = ray.get(predict_clf_output[0].get_Xref())
 
 
 	report_codeflare = classification_report(y_test, y_pred)
diff --git a/notebooks/plot_nca_classification.ipynb b/notebooks/plot_nca_classification.ipynb
@@ -10,7 +10,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -37,7 +37,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -150,14 +150,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2021-06-08 16:33:25,975\tINFO services.py:1267 -- View the Ray dashboard at \u001b[1m\u001b[32mhttp://127.0.0.1:8266\u001b[39m\u001b[22m\n"
+      "2021-07-22 17:14:51,530\tINFO services.py:1267 -- View the Ray dashboard at \u001b[1m\u001b[32mhttp://127.0.0.1:8266\u001b[39m\u001b[22m\n"
      ]
     },
     {
@@ -243,7 +243,7 @@
     "\n",
     "knn_pipeline = rt.select_pipeline(pipeline_fitted, pipeline_fitted.get_xyrefs(node_knn)[0])\n",
     "knn_score = ray.get(rt.execute_pipeline(knn_pipeline, ExecutionType.SCORE, test_input)\n",
-    "                    .get_xyrefs(node_knn)[0].get_Xref())\n",
+    "                    .get_xyrefs(node_knn)[0].get_yref())\n",
     "\n",
     "# Plot the decision boundary. For that, we will assign a color to each\n",
     "# point in the mesh [x_min, x_max]x[y_min, y_max].\n",
@@ -254,7 +254,7 @@
     "predict_input.add_xy_arg(node_scalar, dm.Xy(meshinput, meshlabel))\n",
     "\n",
     "Z = ray.get(rt.execute_pipeline(knn_pipeline, ExecutionType.PREDICT, predict_input)\n",
-    "                         .get_xyrefs(node_knn)[0].get_Xref())\n",
+    "                         .get_xyrefs(node_knn)[0].get_yref())\n",
     "\n",
     "# Put the result into a color plot\n",
     "Z = Z.reshape(xx.shape)\n",
@@ -273,10 +273,10 @@
     "name = names[1]\n",
     "nca_pipeline = rt.select_pipeline(pipeline_fitted, pipeline_fitted.get_xyrefs(node_knn_post_nca)[0])\n",
     "nca_score = ray.get(rt.execute_pipeline(nca_pipeline, ExecutionType.SCORE, test_input)\n",
-    "                    .get_xyrefs(node_knn_post_nca)[0].get_Xref())\n",
+    "                    .get_xyrefs(node_knn_post_nca)[0].get_yref())\n",
     "\n",
     "Z = ray.get(rt.execute_pipeline(nca_pipeline, ExecutionType.PREDICT, predict_input)\n",
-    "                         .get_xyrefs(node_knn_post_nca)[0].get_Xref())\n",
+    "                         .get_xyrefs(node_knn_post_nca)[0].get_yref())\n",
     "\n",
     "# Put the result into a color plot\n",
     "Z = Z.reshape(xx.shape)\n",
diff --git a/notebooks/plot_rbm_logistic_classification.ipynb b/notebooks/plot_rbm_logistic_classification.ipynb
@@ -27,16 +27,16 @@
      "output_type": "stream",
      "text": [
       "Automatically created module for IPython interactive environment\n",
-      "[BernoulliRBM] Iteration 1, pseudo-likelihood = -25.57, time = 0.11s\n",
+      "[BernoulliRBM] Iteration 1, pseudo-likelihood = -25.57, time = 0.13s\n",
       "[BernoulliRBM] Iteration 2, pseudo-likelihood = -23.68, time = 0.18s\n",
       "[BernoulliRBM] Iteration 3, pseudo-likelihood = -22.74, time = 0.18s\n",
-      "[BernoulliRBM] Iteration 4, pseudo-likelihood = -21.83, time = 0.16s\n",
-      "[BernoulliRBM] Iteration 5, pseudo-likelihood = -21.62, time = 0.16s\n",
-      "[BernoulliRBM] Iteration 6, pseudo-likelihood = -21.11, time = 0.15s\n",
-      "[BernoulliRBM] Iteration 7, pseudo-likelihood = -20.88, time = 0.15s\n",
-      "[BernoulliRBM] Iteration 8, pseudo-likelihood = -20.58, time = 0.13s\n",
-      "[BernoulliRBM] Iteration 9, pseudo-likelihood = -20.32, time = 0.14s\n",
-      "[BernoulliRBM] Iteration 10, pseudo-likelihood = -20.13, time = 0.14s\n",
+      "[BernoulliRBM] Iteration 4, pseudo-likelihood = -21.83, time = 0.17s\n",
+      "[BernoulliRBM] Iteration 5, pseudo-likelihood = -21.62, time = 0.17s\n",
+      "[BernoulliRBM] Iteration 6, pseudo-likelihood = -21.11, time = 0.18s\n",
+      "[BernoulliRBM] Iteration 7, pseudo-likelihood = -20.88, time = 0.17s\n",
+      "[BernoulliRBM] Iteration 8, pseudo-likelihood = -20.58, time = 0.17s\n",
+      "[BernoulliRBM] Iteration 9, pseudo-likelihood = -20.32, time = 0.17s\n",
+      "[BernoulliRBM] Iteration 10, pseudo-likelihood = -20.13, time = 0.16s\n",
       "Logistic regression using RBM features:\n",
       "              precision    recall  f1-score   support\n",
       "\n",
@@ -207,60 +207,30 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2021-06-09 10:48:44,778\tINFO services.py:1267 -- View the Ray dashboard at \u001b[1m\u001b[32mhttp://127.0.0.1:8266\u001b[39m\u001b[22m\n"
+      "2021-07-22 17:16:19,742\tINFO services.py:1267 -- View the Ray dashboard at \u001b[1m\u001b[32mhttp://127.0.0.1:8267\u001b[39m\u001b[22m\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 1, pseudo-likelihood = -25.57, time = 0.11s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 1, pseudo-likelihood = -25.57, time = 0.11s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 1, pseudo-likelihood = -25.57, time = 0.11s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 1, pseudo-likelihood = -25.57, time = 0.11s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 2, pseudo-likelihood = -23.68, time = 0.15s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 2, pseudo-likelihood = -23.68, time = 0.15s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 2, pseudo-likelihood = -23.68, time = 0.15s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 2, pseudo-likelihood = -23.68, time = 0.15s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 3, pseudo-likelihood = -22.74, time = 0.15s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 3, pseudo-likelihood = -22.74, time = 0.15s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 3, pseudo-likelihood = -22.74, time = 0.15s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 3, pseudo-likelihood = -22.74, time = 0.15s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 4, pseudo-likelihood = -21.83, time = 0.14s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 4, pseudo-likelihood = -21.83, time = 0.14s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 4, pseudo-likelihood = -21.83, time = 0.14s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 4, pseudo-likelihood = -21.83, time = 0.14s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 5, pseudo-likelihood = -21.62, time = 0.15s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 5, pseudo-likelihood = -21.62, time = 0.15s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 5, pseudo-likelihood = -21.62, time = 0.15s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 5, pseudo-likelihood = -21.62, time = 0.15s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 6, pseudo-likelihood = -21.11, time = 0.14s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 6, pseudo-likelihood = -21.11, time = 0.14s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 6, pseudo-likelihood = -21.11, time = 0.14s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 6, pseudo-likelihood = -21.11, time = 0.14s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 7, pseudo-likelihood = -20.88, time = 0.15s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 7, pseudo-likelihood = -20.88, time = 0.15s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 7, pseudo-likelihood = -20.88, time = 0.15s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 7, pseudo-likelihood = -20.88, time = 0.15s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 8, pseudo-likelihood = -20.58, time = 0.15s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 8, pseudo-likelihood = -20.58, time = 0.15s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 8, pseudo-likelihood = -20.58, time = 0.15s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 8, pseudo-likelihood = -20.58, time = 0.15s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 9, pseudo-likelihood = -20.32, time = 0.13s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 9, pseudo-likelihood = -20.32, time = 0.13s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 9, pseudo-likelihood = -20.32, time = 0.13s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 9, pseudo-likelihood = -20.32, time = 0.13s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 10, pseudo-likelihood = -20.13, time = 0.15s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 10, pseudo-likelihood = -20.13, time = 0.15s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 10, pseudo-likelihood = -20.13, time = 0.15s\n",
-      "\u001b[2m\u001b[36m(pid=4523)\u001b[0m [BernoulliRBM] Iteration 10, pseudo-likelihood = -20.13, time = 0.15s\n",
+      "\u001b[2m\u001b[36m(pid=12180)\u001b[0m [BernoulliRBM] Iteration 1, pseudo-likelihood = -25.57, time = 0.16s\n",
+      "\u001b[2m\u001b[36m(pid=12180)\u001b[0m [BernoulliRBM] Iteration 2, pseudo-likelihood = -23.68, time = 0.22s\n",
+      "\u001b[2m\u001b[36m(pid=12180)\u001b[0m [BernoulliRBM] Iteration 3, pseudo-likelihood = -22.74, time = 0.22s\n",
+      "\u001b[2m\u001b[36m(pid=12180)\u001b[0m [BernoulliRBM] Iteration 4, pseudo-likelihood = -21.83, time = 0.22s\n",
+      "\u001b[2m\u001b[36m(pid=12180)\u001b[0m [BernoulliRBM] Iteration 5, pseudo-likelihood = -21.62, time = 0.22s\n",
+      "\u001b[2m\u001b[36m(pid=12180)\u001b[0m [BernoulliRBM] Iteration 6, pseudo-likelihood = -21.11, time = 0.21s\n",
+      "\u001b[2m\u001b[36m(pid=12180)\u001b[0m [BernoulliRBM] Iteration 7, pseudo-likelihood = -20.88, time = 0.21s\n",
+      "\u001b[2m\u001b[36m(pid=12180)\u001b[0m [BernoulliRBM] Iteration 8, pseudo-likelihood = -20.58, time = 0.21s\n",
+      "\u001b[2m\u001b[36m(pid=12180)\u001b[0m [BernoulliRBM] Iteration 9, pseudo-likelihood = -20.32, time = 0.21s\n",
+      "\u001b[2m\u001b[36m(pid=12180)\u001b[0m [BernoulliRBM] Iteration 10, pseudo-likelihood = -20.13, time = 0.47s\n",
       "Logistic regression using RBM features:\n",
       "              precision    recall  f1-score   support\n",
       "\n",
@@ -411,14 +381,14 @@
     "\n",
     "logistic_pipeline = rt.select_pipeline(pipeline_fitted, pipeline_fitted.get_xyrefs(node_logistic)[0])\n",
     "Y_pred = ray.get(rt.execute_pipeline(logistic_pipeline, ExecutionType.PREDICT, predict_input)\n",
-    "                         .get_xyrefs(node_logistic)[0].get_Xref())\n",
+    "                         .get_xyrefs(node_logistic)[0].get_yref())\n",
     "\n",
     "print(\"Logistic regression using RBM features:\\n%s\\n\" % (\n",
     "    metrics.classification_report(Y_test, Y_pred)))\n",
     "\n",
     "raw_pixel_pipeline = rt.select_pipeline(pipeline_fitted, pipeline_fitted.get_xyrefs(node_raw_pixel)[0])\n",
     "Y_pred = ray.get(rt.execute_pipeline(raw_pixel_pipeline, ExecutionType.PREDICT, predict_input)\n",
-    "                         .get_xyrefs(node_raw_pixel)[0].get_Xref())\n",
+    "                         .get_xyrefs(node_raw_pixel)[0].get_yref())\n",
     "\n",
     "print(\"Logistic regression using raw pixel features:\\n%s\\n\" % (\n",
     "    metrics.classification_report(Y_test, Y_pred)))\n",
@@ -438,6 +408,13 @@
     "\n",
     "ray.shutdown()"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
diff --git a/notebooks/plot_scalable_poly_kernels.ipynb b/notebooks/plot_scalable_poly_kernels.ipynb