Skip to content

Commit

Permalink
[promptflow] Hide additional info & debug info when list runs (micros…
Browse files Browse the repository at this point in the history
…oft#1384)

# Description

This PR targets to hide `additionalInfo` and `debugInfo` when list runs
(pf/pfazure), so that we can have more clean run list experience;
otherwise, one failed run may print a long and not useful stacktrace.

This PR also applies a little update on email sanitization, it will only
sanitize `<username>@microsoft.com` to avoid mis-sanitize something like
`@tool`.

# All Promptflow Contribution checklist:
- [x] **The pull request does not introduce [breaking changes].**
- [x] **CHANGELOG is updated for new features, bug fixes or other
significant changes.**
- [x] **I have read the [contribution guidelines](../CONTRIBUTING.md).**
- [ ] **Create an issue and link to the pull request to get dedicated
review from promptflow team. Learn more: [suggested
workflow](../CONTRIBUTING.md#suggested-workflow).**

## General Guidelines and Best Practices
- [x] Title of the pull request is clear and informative.
- [x] There are a small number of commits, each of which have an
informative message. This means that previously merged commits do not
appear in the history of the PR. For more information on cleaning up the
commits in your PR, [see this
page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md).

### Testing Guidelines
- [x] Pull request includes test coverage for the included changes.
  • Loading branch information
zhengfeiwang authored Dec 5, 2023
1 parent 6cc2bcb commit a49f779
Show file tree
Hide file tree
Showing 8 changed files with 273 additions and 7 deletions.
1 change: 1 addition & 0 deletions src/promptflow/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

- [SDK/CLI] Removing telemetry warning when running commands.
- Empty node stdout & stderr to avoid large visualize HTML.
- Hide unnecessary fields in run list for better readability.

## 1.1.1 (2023.12.1)

Expand Down
3 changes: 2 additions & 1 deletion src/promptflow/promptflow/_cli/_pf/_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,8 @@ def list_runs(
max_results=max_results,
list_view_type=get_list_view_type(archived_only=archived_only, include_archived=include_archived),
)
json_list = [run._to_dict() for run in runs]
# hide additional info and debug info in run list for better user experience
json_list = [run._to_dict(exclude_additional_info=True, exclude_debug_info=True) for run in runs]
_output_result_list_with_format(result_list=json_list, output_format=output)
return runs

Expand Down
3 changes: 2 additions & 1 deletion src/promptflow/promptflow/_cli/_pf_azure/_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,8 @@ def list_runs(

pf = _get_azure_pf_client(subscription_id, resource_group, workspace_name)
runs = pf.runs.list(max_results=max_results, list_view_type=list_view_type)
run_list = [run._to_dict() for run in runs]
# hide additional info and debug info in run list for better user experience
run_list = [run._to_dict(exclude_additional_info=True, exclude_debug_info=True) for run in runs]
_output_result_list_with_format(result_list=run_list, output_format=output)
return runs

Expand Down
10 changes: 9 additions & 1 deletion src/promptflow/promptflow/_sdk/entities/_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ def _dump(self) -> None:
"""Dump current run entity to local DB."""
self._to_orm_object().dump()

def _to_dict(self):
def _to_dict(self, *, exclude_additional_info: bool = False, exclude_debug_info: bool = False):
from promptflow._sdk.operations._local_storage_operations import LocalStorageOperations

properties = self.properties
Expand Down Expand Up @@ -348,6 +348,10 @@ def _to_dict(self):
# add exception part if any
exception_dict = local_storage.load_exception()
if exception_dict:
if exclude_additional_info:
exception_dict.pop("additionalInfo", None)
if exclude_debug_info:
exception_dict.pop("debugInfo", None)
result["error"] = exception_dict
elif self._run_source == RunInfoSources.INDEX_SERVICE:
result["creation_context"] = self._creation_context
Expand All @@ -371,6 +375,10 @@ def _to_dict(self):
result[RunDataKeys.INPUT_RUN_PORTAL_URL] = self._input_run_portal_url
if self._error:
result["error"] = self._error
if exclude_additional_info:
result["error"]["error"].pop("additionalInfo", None)
if exclude_debug_info:
result["error"]["error"].pop("debugInfo", None)
return result

@classmethod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------

import copy
import json
import shutil
from logging import Logger
Expand Down Expand Up @@ -34,8 +35,10 @@
RUNS_DIR = "./tests/test_configs/runs"
DATAS_DIR = "./tests/test_configs/datas"

# TODO(2770419): make this dynamic created during migrate live test to canary
FAILED_RUN_NAME_EASTUS = "3dfd077a-f071-443e-9c4e-d41531710950"


# TODO(2528577): we should run these test with recording mode.
@pytest.mark.timeout(timeout=DEFAULT_TEST_TIMEOUT, method=PYTEST_TIMEOUT_METHOD)
@pytest.mark.e2etest
@pytest.mark.usefixtures(
Expand Down Expand Up @@ -351,12 +354,20 @@ def test_stream_run_logs(self, pf):
def test_stream_failed_run_logs(self, pf, capfd: pytest.CaptureFixture):
# (default) raise_on_error=True
with pytest.raises(InvalidRunStatusError):
pf.stream(run="3dfd077a-f071-443e-9c4e-d41531710950")
pf.stream(run=FAILED_RUN_NAME_EASTUS)
# raise_on_error=False
pf.stream(run="3dfd077a-f071-443e-9c4e-d41531710950", raise_on_error=False)
pf.stream(run=FAILED_RUN_NAME_EASTUS, raise_on_error=False)
out, _ = capfd.readouterr()
assert "Input 'question' in line 0 is not provided for flow 'Simple_mock_answer'." in out

def test_failed_run_to_dict_exclude(self, pf):
failed_run = pf.runs.get(run=FAILED_RUN_NAME_EASTUS)
# Azure run object reference a dict, use deepcopy to avoid unexpected modification
default = copy.deepcopy(failed_run._to_dict())
exclude = failed_run._to_dict(exclude_additional_info=True, exclude_debug_info=True)
assert "additionalInfo" in default["error"]["error"] and "additionalInfo" not in exclude["error"]["error"]
assert "debugInfo" in default["error"]["error"] and "debugInfo" not in exclude["error"]["error"]

@pytest.mark.skipif(
condition=not is_live(),
reason="cannot differ the two requests to run history in replay mode.",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ def sanitize_pfs_body(body: str) -> str:


def sanitize_email(value: str) -> str:
return re.sub(r"([\w\.-]+)@([\w\.-]+)", r"{}@\2".format(SanitizedValues.EMAIL_USERNAME), value)
return re.sub(r"([\w\.-]+)@(microsoft.com)", r"{}@\2".format(SanitizedValues.EMAIL_USERNAME), value)


def _is_json_payload(headers: Dict, key: str) -> bool:
Expand Down
12 changes: 12 additions & 0 deletions src/promptflow/tests/sdk_cli_test/e2etests/test_flow_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -1025,3 +1025,15 @@ def test_specify_run_output_path_with_invalid_macro(self, pf: PFClient, mocker:
local_storage = LocalStorageOperations(run=run)
expected_output_path_prefix = (Path.home() / PROMPT_FLOW_DIR_NAME / ".runs" / run.name).resolve().as_posix()
assert local_storage.outputs_folder.as_posix().startswith(expected_output_path_prefix)

def test_failed_run_to_dict_exclude(self, pf):
failed_run = pf.run(
flow=f"{FLOWS_DIR}/failed_flow",
data=f"{DATAS_DIR}/webClassification1.jsonl",
column_mapping={"text": "${data.url}"},
)
default = failed_run._to_dict()
# CLI will exclude additional info and debug info
exclude = failed_run._to_dict(exclude_additional_info=True, exclude_debug_info=True)
assert "additionalInfo" in default["error"] and "additionalInfo" not in exclude["error"]
assert "debugInfo" in default["error"] and "debugInfo" not in exclude["error"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
interactions:
- request:
body: null
headers:
Accept:
- application/json
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
User-Agent:
- promptflow-sdk/0.0.1 promptflow/0.0.1 azure-ai-ml/1.12.1 azsdk-python-mgmt-machinelearningservices/0.1.0
Python/3.10.13 (Windows-10-10.0.22631-SP0)
method: GET
uri: https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000
response:
body:
string: '{"id": "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000",
"name": "00000", "type": "Microsoft.MachineLearningServices/workspaces", "location":
"eastus", "tags": {}, "etag": null, "kind": "Default", "sku": {"name": "Basic",
"tier": "Basic"}, "properties": {"discoveryUrl": "https://eastus.api.azureml.ms/discovery"}}'
headers:
cache-control:
- no-cache
content-length:
- '3630'
content-type:
- application/json; charset=utf-8
expires:
- '-1'
pragma:
- no-cache
strict-transport-security:
- max-age=31536000; includeSubDomains
vary:
- Accept-Encoding
x-cache:
- CONFIG_NOCACHE
x-content-type-options:
- nosniff
x-request-time:
- '0.029'
status:
code: 200
message: OK
- request:
body: null
headers:
Accept:
- application/json
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
User-Agent:
- promptflow-sdk/0.0.1 promptflow/0.0.1 azure-ai-ml/1.12.1 azsdk-python-mgmt-machinelearningservices/0.1.0
Python/3.10.13 (Windows-10-10.0.22631-SP0)
method: GET
uri: https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/datastores?count=30&isDefault=true&orderByAsc=false
response:
body:
string: '{"value": [{"id": "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/datastores/workspaceblobstore",
"name": "workspaceblobstore", "type": "Microsoft.MachineLearningServices/workspaces/datastores",
"properties": {"description": null, "tags": null, "properties": null, "isDefault":
true, "credentials": {"credentialsType": "AccountKey"}, "intellectualProperty":
null, "subscriptionId": "00000000-0000-0000-0000-000000000000", "resourceGroup":
"00000", "datastoreType": "AzureBlob", "accountName": "fake_account_name",
"containerName": "fake-container-name", "endpoint": "core.windows.net", "protocol":
"https", "serviceDataAccessAuthIdentity": "WorkspaceSystemAssignedIdentity"},
"systemData": {"createdAt": "2023-04-08T02:53:06.5886442+00:00", "createdBy":
"779301c0-18b2-4cdc-801b-a0a3368fee0a", "createdByType": "Application", "lastModifiedAt":
"2023-04-08T02:53:07.521127+00:00", "lastModifiedBy": "779301c0-18b2-4cdc-801b-a0a3368fee0a",
"lastModifiedByType": "Application"}}]}'
headers:
cache-control:
- no-cache
content-length:
- '1372'
content-type:
- application/json; charset=utf-8
expires:
- '-1'
pragma:
- no-cache
strict-transport-security:
- max-age=31536000; includeSubDomains
vary:
- Accept-Encoding
x-cache:
- CONFIG_NOCACHE
x-content-type-options:
- nosniff
x-request-time:
- '0.205'
status:
code: 200
message: OK
- request:
body: '{"runId": "3dfd077a-f071-443e-9c4e-d41531710950", "selectRunMetadata":
true, "selectRunDefinition": true, "selectJobSpecification": true}'
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
Content-Length:
- '137'
Content-Type:
- application/json
User-Agent:
- python-requests/2.31.0
method: POST
uri: https://eastus.api.azureml.ms/history/v1.0/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/rundata
response:
body:
string: '{"runMetadata": {"runNumber": 1689673888, "rootRunId": "fd68a549-2027-4f0f-9f21-adc39cc86c94",
"createdUtc": "2023-07-18T09:51:28.1405441+00:00", "createdBy": {"userObjectId":
"c05e0746-e125-4cb3-9213-a8b535eacd79", "userPuId": "10032000324F7449", "userIdp":
null, "userAltSecId": null, "userIss": "https://sts.windows.net/00000000-0000-0000-0000-000000000000/",
"userTenantId": "00000000-0000-0000-0000-000000000000", "userName": "Honglin
Du", "upn": null}, "userId": "c05e0746-e125-4cb3-9213-a8b535eacd79", "token":
null, "tokenExpiryTimeUtc": null, "error": {"error": {"code": "UserError",
"severity": null, "message": "Input ''question'' in line 0 is not provided
for flow ''Simple_mock_answer''.", "messageFormat": "", "messageParameters":
{}, "referenceCode": "Executor", "detailsUri": null, "target": null, "details":
[], "innerError": {"code": "ValidationError", "innerError": {"code": "InvalidFlowRequest",
"innerError": {"code": "InputNotFound", "innerError": null}}}, "debugInfo":
{"type": "InputNotFound", "message": null, "stackTrace": "Traceback (most
recent call last):\n File \"/azureml-envs/prompt-flow/runtime/lib/python3.9/site-packages/promptflow/executor/executor.py\",
line 243, in exec_request_raw\n return self._route_request_raw(raw_request,
raise_ex=raise_ex)\n File \"/azureml-envs/prompt-flow/runtime/lib/python3.9/site-packages/promptflow/executor/executor.py\",
line 316, in _route_request_raw\n return self._exec_batch_request(\n File
\"/azureml-envs/prompt-flow/runtime/lib/python3.9/site-packages/promptflow/executor/executor.py\",
line 393, in _exec_batch_request\n run_infos = self._exec_batch_request_inner(\n File
\"/azureml-envs/prompt-flow/runtime/lib/python3.9/site-packages/promptflow/executor/executor.py\",
line 654, in _exec_batch_request_inner\n batch_inputs = FlowRequestValidator.ensure_batch_inputs_type(batch_request.flow,
batch_request.batch_inputs)\n File \"/azureml-envs/prompt-flow/runtime/lib/python3.9/site-packages/promptflow/executor/flow_request_validator.py\",
line 99, in ensure_batch_inputs_type\n return [cls.ensure_flow_inputs_type(flow,
inputs, idx) for idx, inputs in enumerate(batch_inputs)]\n File \"/azureml-envs/prompt-flow/runtime/lib/python3.9/site-packages/promptflow/executor/flow_request_validator.py\",
line 99, in <listcomp>\n return [cls.ensure_flow_inputs_type(flow, inputs,
idx) for idx, inputs in enumerate(batch_inputs)]\n File \"/azureml-envs/prompt-flow/runtime/lib/python3.9/site-packages/promptflow/executor/flow_request_validator.py\",
line 192, in ensure_flow_inputs_type\n raise InputNotFound(\npromptflow.executor.flow_request_validator.InputNotFound:
Input ''question'' in line 0 is not provided for flow ''Simple_mock_answer''.\n",
"innerException": null, "data": null, "errorResponse": null}, "additionalInfo":
null}, "correlation": null, "environment": null, "location": null, "time":
"2023-07-18T09:51:31.441881+00:00", "componentName": "promptflow/20230710.v2"},
"warnings": null, "revision": 4, "statusRevision": 2, "runUuid": "555c524f-0fa8-47d7-bf0b-cc6db82ab734",
"parentRunUuid": "01cdc8fe-2bfd-40be-817c-7ae28282e7a7", "rootRunUuid": "01cdc8fe-2bfd-40be-817c-7ae28282e7a7",
"lastStartTimeUtc": null, "currentComputeTime": null, "computeDuration": "00:00:00.4131767",
"effectiveStartTimeUtc": null, "lastModifiedBy": {"userObjectId": "c05e0746-e125-4cb3-9213-a8b535eacd79",
"userPuId": "10032000324F7449", "userIdp": null, "userAltSecId": null, "userIss":
"https://sts.windows.net/00000000-0000-0000-0000-000000000000/", "userTenantId":
"00000000-0000-0000-0000-000000000000", "userName": "Honglin Du", "upn": null},
"lastModifiedUtc": "2023-07-18T09:51:28.1405441+00:00", "duration": "00:00:00.4131767",
"cancelationReason": null, "currentAttemptId": 1, "runId": "3dfd077a-f071-443e-9c4e-d41531710950",
"parentRunId": "fd68a549-2027-4f0f-9f21-adc39cc86c94", "experimentId": "64956f20-fc4f-4b13-aa32-8c52f722b94f",
"status": "Failed", "startTimeUtc": "2023-07-18T09:51:31.2748721+00:00", "endTimeUtc":
"2023-07-18T09:51:31.6880488+00:00", "scheduleId": null, "displayName": "Simple_mock_answer-bulktest-variant_0-fd68a549-2027-4f0f-9f21-adc39cc86c94",
"name": null, "dataContainerId": "dcid.3dfd077a-f071-443e-9c4e-d41531710950",
"description": "", "hidden": false, "runType": "azureml.promptflow.FlowRun",
"runTypeV2": {"orchestrator": null, "traits": [], "attribution": null, "computeType":
"MIR_v2"}, "properties": {"azureml.promptflow.flow_id": "6f0d05fd-2cc1-495a-be6d-c60c3f3b1f14",
"azureml.promptflow.flow_type": "Default", "azureml.promptflow.variant_id":
"variant_0", "azureml.promptflow.baseline_variant_run_id": "3dfd077a-f071-443e-9c4e-d41531710950",
"azureml.promptflow.bulk_test_id": "fd68a549-2027-4f0f-9f21-adc39cc86c94",
"azureml.promptflow.flow_experiment_id": "3e123da1-f9a5-4c91-9234-8d9ffbb39ff5",
"azureml.promptflow.runtime_name": "demo-mir", "azureml.promptflow.runtime_version":
"20230710.v2"}, "parameters": {}, "actionUris": {}, "scriptName": null, "target":
null, "uniqueChildRunComputeTargets": [], "tags": {}, "settings": {}, "services":
{}, "inputDatasets": [], "outputDatasets": [], "runDefinition": null, "jobSpecification":
null, "primaryMetricName": null, "createdFrom": null, "cancelUri": null, "completeUri":
null, "diagnosticsUri": null, "computeRequest": null, "compute": null, "retainForLifetimeOfWorkspace":
null, "queueingInfo": null, "inputs": null, "outputs": null}, "runDefinition":
{"Nodes": [{"Name": "hello_prompt", "Tool": "hello_prompt", "Comment": null,
"Inputs": {}, "Api": null, "Provider": null, "Connection": null, "Module":
null, "Reduce": false}, {"Name": "echo_my_prompt", "Tool": "echo_my_prompt",
"Comment": null, "Inputs": {"input1": "${hello_prompt.output}"}, "Api": null,
"Provider": null, "Connection": null, "Module": null, "Reduce": false}], "Tools":
[{"Name": "hello_prompt", "Type": "prompt", "Inputs": {"question": {"Name":
null, "Type": ["string"], "Default": null, "Description": null, "Enum": null,
"enabled_by": null, "enabled_by_type": null, "model_list": null, "Capabilities":
null}}, "Outputs": null, "Description": null, "connection_type": null, "Module":
null, "class_name": null, "Source": null, "LkgCode": "Q: {{question}}", "Code":
"Q: {{question}}", "Function": null, "action_type": null, "provider_config":
null, "function_config": null, "is_builtin": false, "package": null, "package_version":
null}, {"Name": "echo_my_prompt", "Type": "python", "Inputs": {"input1": {"Name":
"input1", "Type": ["string"], "Default": null, "Description": null, "Enum":
null, "enabled_by": null, "enabled_by_type": null, "model_list": null, "Capabilities":
null}}, "Outputs": null, "Description": null, "connection_type": null, "Module":
null, "class_name": null, "Source": null, "LkgCode": "from promptflow import
tool\nimport time\nimport sys\n# The inputs section will change based on the
arguments of the tool function, after you save the code\n# Adding type to
arguments and return value will help the system show the types properly\n#
Please update the function name/signature per need\n\n\n@tool\ndef my_python_tool(input1:
str) -> str:\n print(f\"@@@ My input data is {input1}...\")\n sys.stderr.write(f\"###
This is an error message {input1}\") \n return \"Prompt: \" + input1\n",
"Code": "from promptflow import tool\nimport time\nimport sys\n# The inputs
section will change based on the arguments of the tool function, after you
save the code\n# Adding type to arguments and return value will help the system
show the types properly\n# Please update the function name/signature per need\n\n\n@tool\ndef
my_python_tool(input1: str) -> str:\n print(f\"@@@ My input data is {input1}...\")\n sys.stderr.write(f\"###
This is an error message {input1}\") \n return \"Prompt: \" + input1\n",
"Function": "my_python_tool", "action_type": null, "provider_config": null,
"function_config": null, "is_builtin": false, "package": null, "package_version":
null}], "Codes": null, "Inputs": {"question": {"Name": null, "Type": "string",
"Default": null, "Description": null, "is_chat_input": false}}, "Outputs":
{"output_prompt": {"Name": null, "Type": "string", "Description": null, "Reference":
"${echo_my_prompt.output}", "evaluation_only": false, "is_chat_output": false}}},
"jobSpecification": null, "systemSettings": null}'
headers:
connection:
- keep-alive
content-length:
- '10114'
content-type:
- application/json; charset=utf-8
strict-transport-security:
- max-age=15724800; includeSubDomains; preload
transfer-encoding:
- chunked
vary:
- Accept-Encoding
x-content-type-options:
- nosniff
x-request-time:
- '0.112'
status:
code: 200
message: OK
version: 1

0 comments on commit a49f779

Please sign in to comment.