[promptflow] Hide additional info & debug info when list runs (micros…

…oft#1384) # Description This PR targets to hide `additionalInfo` and `debugInfo` when list runs (pf/pfazure), so that we can have more clean run list experience; otherwise, one failed run may print a long and not useful stacktrace. This PR also applies a little update on email sanitization, it will only sanitize `<username>@microsoft.com` to avoid mis-sanitize something like `@tool`. # All Promptflow Contribution checklist: - [x] **The pull request does not introduce [breaking changes].** - [x] **CHANGELOG is updated for new features, bug fixes or other significant changes.** - [x] **I have read the [contribution guidelines](../CONTRIBUTING.md).** - [ ] **Create an issue and link to the pull request to get dedicated review from promptflow team. Learn more: [suggested workflow](../CONTRIBUTING.md#suggested-workflow).** ## General Guidelines and Best Practices - [x] Title of the pull request is clear and informative. - [x] There are a small number of commits, each of which have an informative message. This means that previously merged commits do not appear in the history of the PR. For more information on cleaning up the commits in your PR, [see this page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md). ### Testing Guidelines - [x] Pull request includes test coverage for the included changes.
supersoob · Dec 5, 2023 · a49f779 · a49f779
1 parent 6cc2bcb
commit a49f779
Show file tree

Hide file tree

Showing 8 changed files with 273 additions and 7 deletions.
diff --git a/src/promptflow/CHANGELOG.md b/src/promptflow/CHANGELOG.md
@@ -6,6 +6,7 @@
 
 - [SDK/CLI] Removing telemetry warning when running commands.
 - Empty node stdout & stderr to avoid large visualize HTML.
+- Hide unnecessary fields in run list for better readability.
 
 ## 1.1.1 (2023.12.1)
 

diff --git a/src/promptflow/promptflow/_cli/_pf/_run.py b/src/promptflow/promptflow/_cli/_pf/_run.py
@@ -458,7 +458,8 @@ def list_runs(
         max_results=max_results,
         list_view_type=get_list_view_type(archived_only=archived_only, include_archived=include_archived),
     )
-    json_list = [run._to_dict() for run in runs]
+    # hide additional info and debug info in run list for better user experience
+    json_list = [run._to_dict(exclude_additional_info=True, exclude_debug_info=True) for run in runs]
     _output_result_list_with_format(result_list=json_list, output_format=output)
     return runs
 

diff --git a/src/promptflow/promptflow/_cli/_pf_azure/_run.py b/src/promptflow/promptflow/_cli/_pf_azure/_run.py
@@ -430,7 +430,8 @@ def list_runs(
 
     pf = _get_azure_pf_client(subscription_id, resource_group, workspace_name)
     runs = pf.runs.list(max_results=max_results, list_view_type=list_view_type)
-    run_list = [run._to_dict() for run in runs]
+    # hide additional info and debug info in run list for better user experience
+    run_list = [run._to_dict(exclude_additional_info=True, exclude_debug_info=True) for run in runs]
     _output_result_list_with_format(result_list=run_list, output_format=output)
     return runs
 

diff --git a/src/promptflow/promptflow/_sdk/entities/_run.py b/src/promptflow/promptflow/_sdk/entities/_run.py
@@ -319,7 +319,7 @@ def _dump(self) -> None:
         """Dump current run entity to local DB."""
         self._to_orm_object().dump()
 
-    def _to_dict(self):
+    def _to_dict(self, *, exclude_additional_info: bool = False, exclude_debug_info: bool = False):
         from promptflow._sdk.operations._local_storage_operations import LocalStorageOperations
 
         properties = self.properties
@@ -348,6 +348,10 @@ def _to_dict(self):
             # add exception part if any
             exception_dict = local_storage.load_exception()
             if exception_dict:
+                if exclude_additional_info:
+                    exception_dict.pop("additionalInfo", None)
+                if exclude_debug_info:
+                    exception_dict.pop("debugInfo", None)
                 result["error"] = exception_dict
         elif self._run_source == RunInfoSources.INDEX_SERVICE:
             result["creation_context"] = self._creation_context
@@ -371,6 +375,10 @@ def _to_dict(self):
                 result[RunDataKeys.INPUT_RUN_PORTAL_URL] = self._input_run_portal_url
             if self._error:
                 result["error"] = self._error
+                if exclude_additional_info:
+                    result["error"]["error"].pop("additionalInfo", None)
+                if exclude_debug_info:
+                    result["error"]["error"].pop("debugInfo", None)
         return result
 
     @classmethod

diff --git a/src/promptflow/tests/sdk_cli_azure_test/e2etests/test_run_operations.py b/src/promptflow/tests/sdk_cli_azure_test/e2etests/test_run_operations.py
@@ -2,6 +2,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
 
+import copy
 import json
 import shutil
 from logging import Logger
@@ -34,8 +35,10 @@
 RUNS_DIR = "./tests/test_configs/runs"
 DATAS_DIR = "./tests/test_configs/datas"
 
+# TODO(2770419): make this dynamic created during migrate live test to canary
+FAILED_RUN_NAME_EASTUS = "3dfd077a-f071-443e-9c4e-d41531710950"
+
 
-# TODO(2528577): we should run these test with recording mode.
 @pytest.mark.timeout(timeout=DEFAULT_TEST_TIMEOUT, method=PYTEST_TIMEOUT_METHOD)
 @pytest.mark.e2etest
 @pytest.mark.usefixtures(
@@ -351,12 +354,20 @@ def test_stream_run_logs(self, pf):
     def test_stream_failed_run_logs(self, pf, capfd: pytest.CaptureFixture):
         # (default) raise_on_error=True
         with pytest.raises(InvalidRunStatusError):
-            pf.stream(run="3dfd077a-f071-443e-9c4e-d41531710950")
+            pf.stream(run=FAILED_RUN_NAME_EASTUS)
         # raise_on_error=False
-        pf.stream(run="3dfd077a-f071-443e-9c4e-d41531710950", raise_on_error=False)
+        pf.stream(run=FAILED_RUN_NAME_EASTUS, raise_on_error=False)
         out, _ = capfd.readouterr()
         assert "Input 'question' in line 0 is not provided for flow 'Simple_mock_answer'." in out
 
+    def test_failed_run_to_dict_exclude(self, pf):
+        failed_run = pf.runs.get(run=FAILED_RUN_NAME_EASTUS)
+        # Azure run object reference a dict, use deepcopy to avoid unexpected modification
+        default = copy.deepcopy(failed_run._to_dict())
+        exclude = failed_run._to_dict(exclude_additional_info=True, exclude_debug_info=True)
+        assert "additionalInfo" in default["error"]["error"] and "additionalInfo" not in exclude["error"]["error"]
+        assert "debugInfo" in default["error"]["error"] and "debugInfo" not in exclude["error"]["error"]
+
     @pytest.mark.skipif(
         condition=not is_live(),
         reason="cannot differ the two requests to run history in replay mode.",

diff --git a/src/promptflow/tests/sdk_cli_azure_test/recording_utilities/utils.py b/src/promptflow/tests/sdk_cli_azure_test/recording_utilities/utils.py
@@ -202,7 +202,7 @@ def sanitize_pfs_body(body: str) -> str:
 
 
 def sanitize_email(value: str) -> str:
-    return re.sub(r"([\w\.-]+)@([\w\.-]+)", r"{}@\2".format(SanitizedValues.EMAIL_USERNAME), value)
+    return re.sub(r"([\w\.-]+)@(microsoft.com)", r"{}@\2".format(SanitizedValues.EMAIL_USERNAME), value)
 
 
 def _is_json_payload(headers: Dict, key: str) -> bool:

diff --git a/src/promptflow/tests/sdk_cli_test/e2etests/test_flow_run.py b/src/promptflow/tests/sdk_cli_test/e2etests/test_flow_run.py
@@ -1025,3 +1025,15 @@ def test_specify_run_output_path_with_invalid_macro(self, pf: PFClient, mocker:
             local_storage = LocalStorageOperations(run=run)
             expected_output_path_prefix = (Path.home() / PROMPT_FLOW_DIR_NAME / ".runs" / run.name).resolve().as_posix()
             assert local_storage.outputs_folder.as_posix().startswith(expected_output_path_prefix)
+
+    def test_failed_run_to_dict_exclude(self, pf):
+        failed_run = pf.run(
+            flow=f"{FLOWS_DIR}/failed_flow",
+            data=f"{DATAS_DIR}/webClassification1.jsonl",
+            column_mapping={"text": "${data.url}"},
+        )
+        default = failed_run._to_dict()
+        # CLI will exclude additional info and debug info
+        exclude = failed_run._to_dict(exclude_additional_info=True, exclude_debug_info=True)
+        assert "additionalInfo" in default["error"] and "additionalInfo" not in exclude["error"]
+        assert "debugInfo" in default["error"] and "debugInfo" not in exclude["error"]
diff --git a/...t_configs/recordings/test_run_operations_TestFlowRun_test_failed_run_to_dict_exclude.yaml b/...t_configs/recordings/test_run_operations_TestFlowRun_test_failed_run_to_dict_exclude.yaml
@@ -0,0 +1,232 @@
+interactions:
+- request:
+    body: null
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      User-Agent:
+      - promptflow-sdk/0.0.1 promptflow/0.0.1 azure-ai-ml/1.12.1 azsdk-python-mgmt-machinelearningservices/0.1.0
+        Python/3.10.13 (Windows-10-10.0.22631-SP0)
+    method: GET
+    uri: https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000
+  response:
+    body:
+      string: '{"id": "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000",
+        "name": "00000", "type": "Microsoft.MachineLearningServices/workspaces", "location":
+        "eastus", "tags": {}, "etag": null, "kind": "Default", "sku": {"name": "Basic",
+        "tier": "Basic"}, "properties": {"discoveryUrl": "https://eastus.api.azureml.ms/discovery"}}'
+    headers:
+      cache-control:
+      - no-cache
+      content-length:
+      - '3630'
+      content-type:
+      - application/json; charset=utf-8
+      expires:
+      - '-1'
+      pragma:
+      - no-cache
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains
+      vary:
+      - Accept-Encoding
+      x-cache:
+      - CONFIG_NOCACHE
+      x-content-type-options:
+      - nosniff
+      x-request-time:
+      - '0.029'
+    status:
+      code: 200
+      message: OK
+- request:
+    body: null
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      User-Agent:
+      - promptflow-sdk/0.0.1 promptflow/0.0.1 azure-ai-ml/1.12.1 azsdk-python-mgmt-machinelearningservices/0.1.0
+        Python/3.10.13 (Windows-10-10.0.22631-SP0)
+    method: GET
+    uri: https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/datastores?count=30&isDefault=true&orderByAsc=false
+  response:
+    body:
+      string: '{"value": [{"id": "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/datastores/workspaceblobstore",
+        "name": "workspaceblobstore", "type": "Microsoft.MachineLearningServices/workspaces/datastores",
+        "properties": {"description": null, "tags": null, "properties": null, "isDefault":
+        true, "credentials": {"credentialsType": "AccountKey"}, "intellectualProperty":
+        null, "subscriptionId": "00000000-0000-0000-0000-000000000000", "resourceGroup":
+        "00000", "datastoreType": "AzureBlob", "accountName": "fake_account_name",
+        "containerName": "fake-container-name", "endpoint": "core.windows.net", "protocol":
+        "https", "serviceDataAccessAuthIdentity": "WorkspaceSystemAssignedIdentity"},
+        "systemData": {"createdAt": "2023-04-08T02:53:06.5886442+00:00", "createdBy":
+        "779301c0-18b2-4cdc-801b-a0a3368fee0a", "createdByType": "Application", "lastModifiedAt":
+        "2023-04-08T02:53:07.521127+00:00", "lastModifiedBy": "779301c0-18b2-4cdc-801b-a0a3368fee0a",
+        "lastModifiedByType": "Application"}}]}'
+    headers:
+      cache-control:
+      - no-cache
+      content-length:
+      - '1372'
+      content-type:
+      - application/json; charset=utf-8
+      expires:
+      - '-1'
+      pragma:
+      - no-cache
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains
+      vary:
+      - Accept-Encoding
+      x-cache:
+      - CONFIG_NOCACHE
+      x-content-type-options:
+      - nosniff
+      x-request-time:
+      - '0.205'
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"runId": "3dfd077a-f071-443e-9c4e-d41531710950", "selectRunMetadata":
+      true, "selectRunDefinition": true, "selectJobSpecification": true}'
+    headers:
+      Accept:
+      - '*/*'
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '137'
+      Content-Type:
+      - application/json
+      User-Agent:
+      - python-requests/2.31.0
+    method: POST
+    uri: https://eastus.api.azureml.ms/history/v1.0/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/rundata
+  response:
+    body:
+      string: '{"runMetadata": {"runNumber": 1689673888, "rootRunId": "fd68a549-2027-4f0f-9f21-adc39cc86c94",
+        "createdUtc": "2023-07-18T09:51:28.1405441+00:00", "createdBy": {"userObjectId":
+        "c05e0746-e125-4cb3-9213-a8b535eacd79", "userPuId": "10032000324F7449", "userIdp":
+        null, "userAltSecId": null, "userIss": "https://sts.windows.net/00000000-0000-0000-0000-000000000000/",
+        "userTenantId": "00000000-0000-0000-0000-000000000000", "userName": "Honglin
+        Du", "upn": null}, "userId": "c05e0746-e125-4cb3-9213-a8b535eacd79", "token":
+        null, "tokenExpiryTimeUtc": null, "error": {"error": {"code": "UserError",
+        "severity": null, "message": "Input ''question'' in line 0 is not provided
+        for flow ''Simple_mock_answer''.", "messageFormat": "", "messageParameters":
+        {}, "referenceCode": "Executor", "detailsUri": null, "target": null, "details":
+        [], "innerError": {"code": "ValidationError", "innerError": {"code": "InvalidFlowRequest",
+        "innerError": {"code": "InputNotFound", "innerError": null}}}, "debugInfo":
+        {"type": "InputNotFound", "message": null, "stackTrace": "Traceback (most
+        recent call last):\n  File \"/azureml-envs/prompt-flow/runtime/lib/python3.9/site-packages/promptflow/executor/executor.py\",
+        line 243, in exec_request_raw\n    return self._route_request_raw(raw_request,
+        raise_ex=raise_ex)\n  File \"/azureml-envs/prompt-flow/runtime/lib/python3.9/site-packages/promptflow/executor/executor.py\",
+        line 316, in _route_request_raw\n    return self._exec_batch_request(\n  File
+        \"/azureml-envs/prompt-flow/runtime/lib/python3.9/site-packages/promptflow/executor/executor.py\",
+        line 393, in _exec_batch_request\n    run_infos = self._exec_batch_request_inner(\n  File
+        \"/azureml-envs/prompt-flow/runtime/lib/python3.9/site-packages/promptflow/executor/executor.py\",
+        line 654, in _exec_batch_request_inner\n    batch_inputs = FlowRequestValidator.ensure_batch_inputs_type(batch_request.flow,
+        batch_request.batch_inputs)\n  File \"/azureml-envs/prompt-flow/runtime/lib/python3.9/site-packages/promptflow/executor/flow_request_validator.py\",
+        line 99, in ensure_batch_inputs_type\n    return [cls.ensure_flow_inputs_type(flow,
+        inputs, idx) for idx, inputs in enumerate(batch_inputs)]\n  File \"/azureml-envs/prompt-flow/runtime/lib/python3.9/site-packages/promptflow/executor/flow_request_validator.py\",
+        line 99, in <listcomp>\n    return [cls.ensure_flow_inputs_type(flow, inputs,
+        idx) for idx, inputs in enumerate(batch_inputs)]\n  File \"/azureml-envs/prompt-flow/runtime/lib/python3.9/site-packages/promptflow/executor/flow_request_validator.py\",
+        line 192, in ensure_flow_inputs_type\n    raise InputNotFound(\npromptflow.executor.flow_request_validator.InputNotFound:
+        Input ''question'' in line 0 is not provided for flow ''Simple_mock_answer''.\n",
+        "innerException": null, "data": null, "errorResponse": null}, "additionalInfo":
+        null}, "correlation": null, "environment": null, "location": null, "time":
+        "2023-07-18T09:51:31.441881+00:00", "componentName": "promptflow/20230710.v2"},
+        "warnings": null, "revision": 4, "statusRevision": 2, "runUuid": "555c524f-0fa8-47d7-bf0b-cc6db82ab734",
+        "parentRunUuid": "01cdc8fe-2bfd-40be-817c-7ae28282e7a7", "rootRunUuid": "01cdc8fe-2bfd-40be-817c-7ae28282e7a7",
+        "lastStartTimeUtc": null, "currentComputeTime": null, "computeDuration": "00:00:00.4131767",
+        "effectiveStartTimeUtc": null, "lastModifiedBy": {"userObjectId": "c05e0746-e125-4cb3-9213-a8b535eacd79",
+        "userPuId": "10032000324F7449", "userIdp": null, "userAltSecId": null, "userIss":
+        "https://sts.windows.net/00000000-0000-0000-0000-000000000000/", "userTenantId":
+        "00000000-0000-0000-0000-000000000000", "userName": "Honglin Du", "upn": null},
+        "lastModifiedUtc": "2023-07-18T09:51:28.1405441+00:00", "duration": "00:00:00.4131767",
+        "cancelationReason": null, "currentAttemptId": 1, "runId": "3dfd077a-f071-443e-9c4e-d41531710950",
+        "parentRunId": "fd68a549-2027-4f0f-9f21-adc39cc86c94", "experimentId": "64956f20-fc4f-4b13-aa32-8c52f722b94f",
+        "status": "Failed", "startTimeUtc": "2023-07-18T09:51:31.2748721+00:00", "endTimeUtc":
+        "2023-07-18T09:51:31.6880488+00:00", "scheduleId": null, "displayName": "Simple_mock_answer-bulktest-variant_0-fd68a549-2027-4f0f-9f21-adc39cc86c94",
+        "name": null, "dataContainerId": "dcid.3dfd077a-f071-443e-9c4e-d41531710950",
+        "description": "", "hidden": false, "runType": "azureml.promptflow.FlowRun",
+        "runTypeV2": {"orchestrator": null, "traits": [], "attribution": null, "computeType":
+        "MIR_v2"}, "properties": {"azureml.promptflow.flow_id": "6f0d05fd-2cc1-495a-be6d-c60c3f3b1f14",
+        "azureml.promptflow.flow_type": "Default", "azureml.promptflow.variant_id":
+        "variant_0", "azureml.promptflow.baseline_variant_run_id": "3dfd077a-f071-443e-9c4e-d41531710950",
+        "azureml.promptflow.bulk_test_id": "fd68a549-2027-4f0f-9f21-adc39cc86c94",
+        "azureml.promptflow.flow_experiment_id": "3e123da1-f9a5-4c91-9234-8d9ffbb39ff5",
+        "azureml.promptflow.runtime_name": "demo-mir", "azureml.promptflow.runtime_version":
+        "20230710.v2"}, "parameters": {}, "actionUris": {}, "scriptName": null, "target":
+        null, "uniqueChildRunComputeTargets": [], "tags": {}, "settings": {}, "services":
+        {}, "inputDatasets": [], "outputDatasets": [], "runDefinition": null, "jobSpecification":
+        null, "primaryMetricName": null, "createdFrom": null, "cancelUri": null, "completeUri":
+        null, "diagnosticsUri": null, "computeRequest": null, "compute": null, "retainForLifetimeOfWorkspace":
+        null, "queueingInfo": null, "inputs": null, "outputs": null}, "runDefinition":
+        {"Nodes": [{"Name": "hello_prompt", "Tool": "hello_prompt", "Comment": null,
+        "Inputs": {}, "Api": null, "Provider": null, "Connection": null, "Module":
+        null, "Reduce": false}, {"Name": "echo_my_prompt", "Tool": "echo_my_prompt",
+        "Comment": null, "Inputs": {"input1": "${hello_prompt.output}"}, "Api": null,
+        "Provider": null, "Connection": null, "Module": null, "Reduce": false}], "Tools":
+        [{"Name": "hello_prompt", "Type": "prompt", "Inputs": {"question": {"Name":
+        null, "Type": ["string"], "Default": null, "Description": null, "Enum": null,
+        "enabled_by": null, "enabled_by_type": null, "model_list": null, "Capabilities":
+        null}}, "Outputs": null, "Description": null, "connection_type": null, "Module":
+        null, "class_name": null, "Source": null, "LkgCode": "Q: {{question}}", "Code":
+        "Q: {{question}}", "Function": null, "action_type": null, "provider_config":
+        null, "function_config": null, "is_builtin": false, "package": null, "package_version":
+        null}, {"Name": "echo_my_prompt", "Type": "python", "Inputs": {"input1": {"Name":
+        "input1", "Type": ["string"], "Default": null, "Description": null, "Enum":
+        null, "enabled_by": null, "enabled_by_type": null, "model_list": null, "Capabilities":
+        null}}, "Outputs": null, "Description": null, "connection_type": null, "Module":
+        null, "class_name": null, "Source": null, "LkgCode": "from promptflow import
+        tool\nimport time\nimport sys\n# The inputs section will change based on the
+        arguments of the tool function, after you save the code\n# Adding type to
+        arguments and return value will help the system show the types properly\n#
+        Please update the function name/signature per need\n\n\n@tool\ndef my_python_tool(input1:
+        str) -> str:\n    print(f\"@@@ My input data is {input1}...\")\n    sys.stderr.write(f\"###
+        This is an error message {input1}\")  \n    return \"Prompt: \" + input1\n",
+        "Code": "from promptflow import tool\nimport time\nimport sys\n# The inputs
+        section will change based on the arguments of the tool function, after you
+        save the code\n# Adding type to arguments and return value will help the system
+        show the types properly\n# Please update the function name/signature per need\n\n\n@tool\ndef
+        my_python_tool(input1: str) -> str:\n    print(f\"@@@ My input data is {input1}...\")\n    sys.stderr.write(f\"###
+        This is an error message {input1}\")  \n    return \"Prompt: \" + input1\n",
+        "Function": "my_python_tool", "action_type": null, "provider_config": null,
+        "function_config": null, "is_builtin": false, "package": null, "package_version":
+        null}], "Codes": null, "Inputs": {"question": {"Name": null, "Type": "string",
+        "Default": null, "Description": null, "is_chat_input": false}}, "Outputs":
+        {"output_prompt": {"Name": null, "Type": "string", "Description": null, "Reference":
+        "${echo_my_prompt.output}", "evaluation_only": false, "is_chat_output": false}}},
+        "jobSpecification": null, "systemSettings": null}'
+    headers:
+      connection:
+      - keep-alive
+      content-length:
+      - '10114'
+      content-type:
+      - application/json; charset=utf-8
+      strict-transport-security:
+      - max-age=15724800; includeSubDomains; preload
+      transfer-encoding:
+      - chunked
+      vary:
+      - Accept-Encoding
+      x-content-type-options:
+      - nosniff
+      x-request-time:
+      - '0.112'
+    status:
+      code: 200
+      message: OK
+version: 1