From ceef58824d658a5a8452af767a661cbbcdd4a903 Mon Sep 17 00:00:00 2001 From: vyokky <7678676@qq.com> Date: Wed, 14 Aug 2024 15:35:24 +0800 Subject: [PATCH] change log --- ufo/agents/processors/app_agent_processor.py | 2 +- ufo/agents/processors/host_agent_processor.py | 2 +- ufo/config/config_dev.yaml | 2 +- .../examples/visual/app_agent_example.yaml | 30 +++++++++++++++++++ ufo/prompts/share/base/app_agent.yaml | 2 ++ 5 files changed, 35 insertions(+), 3 deletions(-) diff --git a/ufo/agents/processors/app_agent_processor.py b/ufo/agents/processors/app_agent_processor.py index a146c899..372ed0aa 100644 --- a/ufo/agents/processors/app_agent_processor.py +++ b/ufo/agents/processors/app_agent_processor.py @@ -365,7 +365,7 @@ def update_memory(self) -> None: "Action": self.action, "ActionType": self.app_agent.Puppeteer.get_command_types(self._operation), "Request": self.request, - "Agent": "ActAgent", + "Agent": "AppAgent", "AgentName": self.app_agent.name, "Application": app_root, "Cost": self._cost, diff --git a/ufo/agents/processors/host_agent_processor.py b/ufo/agents/processors/host_agent_processor.py index c2d06149..600a32c6 100644 --- a/ufo/agents/processors/host_agent_processor.py +++ b/ufo/agents/processors/host_agent_processor.py @@ -235,7 +235,7 @@ def update_memory(self) -> None: "RoundStep": self.round_step, "AgentStep": self.host_agent.step, "Round": self.round_num, - "ControlLabel": self.control_text, + "ControlLabel": self.control_label, "SubtaskIndex": -1, "Action": self.action, "ActionType": "UIControl", diff --git a/ufo/config/config_dev.yaml b/ufo/config/config_dev.yaml index c3c623ed..16b5d5de 100644 --- a/ufo/config/config_dev.yaml +++ b/ufo/config/config_dev.yaml @@ -4,7 +4,7 @@ SLEEP_TIME: 5 # The sleep time between each step to wait for the window to be r RECTANGLE_TIME: 1 SAFE_GUARD: True # Whether to use the safe guard to prevent the model from doing sensitve operations. -CONTROL_LIST: ["Button", "Edit", "TabItem", "Document", "ListItem", "MenuItem", "ScrollBar", "TreeItem", "Hyperlink", "ComboBox", "RadioButton", "DataItem"] +CONTROL_LIST: ["Button", "Edit", "TabItem", "Document", "ListItem", "MenuItem", "ScrollBar", "TreeItem", "Hyperlink", "ComboBox", "RadioButton", "DataItem", "Spinner"] # The list of widgets that allowed to be selected, in uia backend, it will be used for filter the control_type, while in win32 backend, it will be used for filter the class_name. HISTORY_KEYS: ["Step", "Thought", "ControlText", "Subtask", "Action", "Comment", "Results", "UserConfirm"] # The keys of the action history for the next step. ANNOTATION_COLORS: { diff --git a/ufo/prompts/examples/visual/app_agent_example.yaml b/ufo/prompts/examples/visual/app_agent_example.yaml index 04f73632..7a69f6be 100644 --- a/ufo/prompts/examples/visual/app_agent_example.yaml +++ b/ufo/prompts/examples/visual/app_agent_example.yaml @@ -408,3 +408,33 @@ example12: Tips: |- - If the control item is not available in the control item list and screenshot, you can use the 'click_on_coordinates' API to click on a specific point in the application window. + +example13: + Request: |- + Fill the information for top 3 events one by one in the forms of private Event Bookings web page. + Sub-task: |- + Fill out the form on the 'Private Event Bookings' web page with the extracted information for the top 3 events, one by one. + Response: + Observation: |- + The screenshot shows that I am on the 'Private Event Bookings' web page. The form for booking a private event is visible, the first field of 'Event Type' has a default value of 'Wedding'. + Thought: |- + I need to first input the information for the 'Event Type' field, which is 'Restaurant Reservation'. However, the 'Event Type' field is already filled with 'Wedding'. I need to first click the 'Event Type' field to open the dropdown list and select 'Restaurant Reservation'. + ControlLabel: |- + 70 + ControlText: |- + Event Type + Function: |- + click_input + Args: + {"button": "left", "double": false} + Status: |- + CONTINUE + Plan: + - (1) Click the 'Event Type' field to open the dropdown list. + - (2) Select 'Restaurant Reservation' from the dropdown list. + - (3) Input the information for the 'Event Name' field, which is 'Birthday Party'. + - (4) Input the information for the 'Event Date' field, which is '2022-12-25'. + - (5) Input the information for the 'Event Time' field, which is '18:00'. + - (6) Click the 'Submit' button to submit the form. + Comment: |- + I need to first click the 'Event Type' field to open the dropdown list and select 'Restaurant Reservation' to change the default value of 'Wedding'. diff --git a/ufo/prompts/share/base/app_agent.yaml b/ufo/prompts/share/base/app_agent.yaml index ec2e4e71..201a6e7d 100644 --- a/ufo/prompts/share/base/app_agent.yaml +++ b/ufo/prompts/share/base/app_agent.yaml @@ -109,6 +109,7 @@ system: |- - Your output of SaveScreenshot must be strictly in the format of {{"save": True/False, "reason": "The reason for saving the screenshot"}}. Only set "save" to True if you strongly believe the screenshot is useful for the future steps, for example, the screenshot contains important information to fill in the form in the future steps. You must provide a reason for saving the screenshot in the "reason" field. - When inputting the searched text on Google, you must use the Search Box, which is a ComboBox type of control item. Do not use the address bar to input the searched text. - The 'Copilot' Add-in can help you with some special requests, such as creating a slide in PowerPoint from a Word document, or summarizing the entire ppt. + - If there are default values in a form, you have to check if the default values are correct and meet the user request. If the default values are not correct, you must change them by clicking the drop-down list (for ComboBox) to select the correct value, or inputting the correct values (for Edit or other control items that can input text). - Saving a ppt file into pdf format can be done by clicking the "Save As Adobe PDF" button. - You are given the help documents of the application or/and the online search results for completing the user request. You may use them to help you think about the next step and construct your planning. These information are for reference only, and may not be relevant, accurate or up-to-date. You must rely more on the current screenshots and control item list to make the decision. - The "UserConfirm" field in the action trajectory in the Blackboard is used to record the user's confirmation of the sensitive action. If the user confirms the action, the value of "UserConfirm" will be set to "Yes" and the action was executed. If the user does not confirm the action, the value of "UserConfirm" will be set to "No" and the action was not executed. @@ -210,6 +211,7 @@ system_nonvisual: |- - Try to locate and use the "Results" in the to complete the user request, such as adding these results along with information to meet the user request into SetText when composing a message, email or document, when necessary. For example, if the the user request need includes results from different applications, you must try to find them in previous "Results" and incorporate them into the message with other necessary text, not leaving them as placeholders. Make sure the text composed is integrated and meets the user request. - When inputting the searched text on Google, you must use the Search Box, which is a ComboBox type of control item. Do not use the address bar to input the searched text. - The 'Copilot' Add-in can help you with some special requests, such as creating a slide in PowerPoint from a Word document, or creating a presentation of a specific topic. + - If there are default values in a form, you have to check if the default values are correct and meet the user request. If the default values are not correct, you must change them by clicking the drop-down list (for ComboBox) to select the correct value, or inputting the correct values (for Edit or other control items that can input text). - You are given the help documents of the application or/and the online search results for completing the user request. You may use them to help you think about the next step and construct your planning. These information are for reference only, and may not be relevant, accurate or up-to-date. You must rely more on the current control item list to make the decision. - The "UserConfirm" field in the action trajectory in the Blackboard is used to record the user's confirmation of the sensitive action. If the user confirms the action, the value of "UserConfirm" will be set to "Yes" and the action was executed. If the user does not confirm the action, the value of "UserConfirm" will be set to "No" and the action was not executed.