Skip to content

Commit

Permalink
resolve comment
Browse files Browse the repository at this point in the history
  • Loading branch information
vyokky committed Mar 21, 2024
1 parent ca03ab2 commit e6c0f2a
Show file tree
Hide file tree
Showing 11 changed files with 53 additions and 41 deletions.
9 changes: 0 additions & 9 deletions ufo/experience/config.yaml

This file was deleted.

20 changes: 12 additions & 8 deletions ufo/experience/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import json
import os
import re
from ..utils import encode_image_from_path
from ..utils import encode_image_from_path, print_with_color


class ExperienceLogLoader:
Expand All @@ -19,7 +19,7 @@ def __init__(self, log_path: str):
"""
self.log_path = log_path
self.response = self.load_response_log()
self.max_stepnum = self.find_max_number_in_filenames()
self.max_stepnum = self.find_max_number_in_filenames(log_path)
self.request_partition = self.get_request_partition()
self.screenshots = {}

Expand All @@ -38,26 +38,30 @@ def load_response_log(self):
# Read the lines and split them into a list
response_log = file.readlines()
for response_string in response_log:
response.append(json.loads(response_string))
try:
response.append(json.loads(response_string))
except json.JSONDecodeError:
print_with_color(f"Error loading response log: {response_string}", "yellow")
return response


def find_max_number_in_filenames(self) -> int:
@staticmethod
def find_max_number_in_filenames(log_path) -> int:
"""
Find the maximum number in the filenames.
:return: The maximum number in the filenames.
"""

# Get the list of files in the folder
files = os.listdir(self.log_path)
files = os.listdir(log_path)

# Initialize an empty list to store extracted numbers
numbers = []

# Iterate through each file
for file in files:
# Extract the number from the filename
number = self.extract_number(file)
number = ExperienceLogLoader.extract_action_step_count(file)
if number is not None:
# Append the extracted number to the list
numbers.append(number)
Expand Down Expand Up @@ -176,9 +180,9 @@ def get_app_list(log_partition: dict) -> list:


@staticmethod
def extract_number(filename : str) -> int:
def extract_action_step_count(filename : str) -> int:
"""
Extract the number from the filename.
Extract the action step count from the filename.
:param filename: The filename.
:return: The number extracted from the filename.
"""
Expand Down
3 changes: 2 additions & 1 deletion ufo/llm/llm_call.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ def get_completion(messages, agent: str='APP', use_backup_engine: bool=True):
raise ValueError(f'API_TYPE {api_type} not supported')
except Exception as e:
if use_backup_engine:
print_with_color(f"The API request of {agent_type} failed: {e}, try to use the backup engine", "red")
print_with_color(f"The API request of {agent_type} failed: {e}.", "red")
print_with_color(f"Switching to use the backup engine...", "yellow")
return get_completion(messages, agent='backup', use_backup_engine=False)
else:
raise e
2 changes: 1 addition & 1 deletion ufo/prompter/agent_prompter.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def __init__(self, is_visual: bool, prompt_template: str, example_prompt_templat
self.api_prompt_template = self.load_prompt_template(api_prompt_template)


def system_prompt_construction(self, additional_examples=[], tips=[]) -> str:
def system_prompt_construction(self, additional_examples: list =[], tips: list =[]) -> str:
"""
Construct the prompt for app selection.
return: The prompt for app selection.
Expand Down
6 changes: 5 additions & 1 deletion ufo/prompter/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from abc import ABC, abstractmethod
import os
import yaml
from ..utils import print_with_color


class BasicPrompter(ABC):
Expand Down Expand Up @@ -42,7 +43,10 @@ def load_prompt_template(self, template_path: str) -> dict:
path = template_path.format(mode = "visual" if self.is_visual == True else "nonvisual")

if os.path.exists(path):
prompt = yaml.safe_load(open(path, "r", encoding="utf-8"))
try:
prompt = yaml.safe_load(open(path, "r", encoding="utf-8"))
except yaml.YAMLError as exc:
print_with_color(f"Error loading prompt template: {exc}", "yellow")
else:
raise FileNotFoundError(f"Prompt template not found at {path}")

Expand Down
2 changes: 1 addition & 1 deletion ufo/prompts/base/nonvisual/action_selection.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ system: |-
## Other Guidelines
- You are required to select the control item and take open-step action by calling API on it to complete the user request for one step.
- You are required to are required to response in a JSON format, consisting of 9 distinct parts with the following keys and corresponding content:
- You are required to response in a JSON format, consisting of 9 distinct parts with the following keys and corresponding content:
{{"Observation": <summarize the control item list of the current application window in details. Such as what applications are available, what is the current status of the application related to the current user request etc.>
"Thought": <Outline your thinking and logic of current one-step action required to fulfill the given request. You are restricted to provide you thought for only one step action.>
"ControlLabel": <Specify the precise annotated label of the control item to be selected, adhering strictly to the provided options in the field of "label" in the control information. If you believe none of the control item is suitable for the task or the task is complete, kindly output a empty string ''.>
Expand Down
5 changes: 3 additions & 2 deletions ufo/prompts/base/visual/action_selection.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ system: |-
## Other Guidelines
- You are required to select the control item and take open-step action by calling API on it to complete the user request for one step.
- You are required to are required to response in a JSON format, consisting of 9 distinct parts with the following keys and corresponding content:
- You are required to response in a JSON format, consisting of 9 distinct parts with the following keys and corresponding content:
{{"Observation": <Describe the screenshot of the current application window in details. Such as what are your observation of the application, what is the current status of the application related to the current user request etc. You can also compare the current screenshot with the one taken at previous step.>
"Thought": <Outline your thinking and logic of current one-step action required to fulfill the given request. You are restricted to provide you thought for only one step action.>
"ControlLabel": <Specify the precise annotated label of the control item to be selected, adhering strictly to the provided options in the field of "label" in the control information. If you believe none of the control item is suitable for the task or the task is complete, kindly output a empty string ''.>
Expand Down Expand Up @@ -106,7 +106,8 @@ system: |-
- Do not take action if the current action need further input. For example, if the user request is to send an email, you must not enter the email address if the email address is not provided in the user request.
- Try to locate and use the "Results" in the <Step History> to complete the user request, such as adding these results along with information to meet the user request into SetText when composing a message, email or document, when necessary. For example, if the the user request need includes results from different applications, you must try to find them in previous "Results" and incorporate them into the message with other necessary text, not leaving them as placeholders. Make sure the text composed is integrated and meets the user request.
- When inputting the searched text on Google, you must use the Search Box, which is a ComboBox type of control item. Do not use the address bar to input the searched text.
- The 'Copilot' Add-in can help you with some special requests, such as creating a slide in PowerPoint from a Word document.
- The 'Copilot' Add-in can help you with some special requests, such as creating a slide in PowerPoint from a Word document, or summarizing the entire ppt.
- Saving a ppt file into pdf format can be done by clicking the "Save As Adobe PDF" button.
- You are given the help documents of the application or/and the online search results for completing the user request. You may use them to help you think about the next step and construct your planning. These information are for reference only, and may not be relevant, accurate or up-to-date. You must rely more on the current screenshots and control item list to make the decision.
## Here are other tips for you to complete the user request:
Expand Down
3 changes: 2 additions & 1 deletion ufo/prompts/base/visual/app_selection.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ system: |-
- If the user request is just asking question and do not need to take action on the application, you should answer the user request on the "Comment" field, and set the "Status" as "FINISH".
- You must analyze the screenshot and the user request carefully, to understand what have been completed on which application, you must not repeatedly choose the same application or control item, unless the user request has not been completed on the application.
- In your response, the control text of the selected application must strictly match its control label.
- The 'Copilot' Add-in can help you with some special requests, such as creating a slide in PowerPoint from a Word document.
- The 'Copilot' Add-in can help you with some special requests, such as creating a slide in PowerPoint from a Word document, or summarizing the entire ppt.
- Saving a ppt file into pdf format can be done by clicking the "Save As Adobe PDF" button.
- You must to strictly follow the instruction and the JSON format of the response.
- Below are two example of the response. You can refer to them as a reference.
Expand Down
40 changes: 25 additions & 15 deletions ufo/prompts/examples/visual/action_example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,18 @@ example2:
(1) Find the New Email button in the Main Page and click it to open a New Email window. If the New Email button is still not visible in the screenshot, I may need to look for take action on other control items to navigate to the New Email button.
(2) Input the email address of the receiver.
(3) Input the title of the email. I need to input 'Thanks for your contribution on the open source.'.
(4) Input the content of the email. I need to input 'Dear Jack,\\nI hope this message finds you well. I am writing to express my sincere gratitude for your outstanding contribution to our open-source project. Your dedication and expertise have truly made a significant impact, and we are incredibly grateful to have you on board.\\nYour commitment to the open-source community has not gone unnoticed, and your recent contributions have been instrumental in enhancing the functionality and quality of our project. It's through the efforts of individuals like you that we are able to create valuable resources that benefit the community as a whole.\\nYour code reviews, bug fixes, and innovative ideas have not only improved the project but have also inspired others to contribute their best. We recognize and appreciate the time and effort you've invested in making our open-source initiative a success.\\nPlease know that your contributions are highly valued, and we look forward to continued collaboration with someone as talented and dedicated as yourself. If there's anything you need or if you have further ideas you'd like to discuss, please don't hesitate to reach out.\\nOnce again, thank you for your exceptional contributions. We are fortunate to have you as part of our open-source community.\\nBest regards,\\nZac'.
(4) Input the content of the email. I need to input
'Dear Jack,
I hope this message finds you well. I am writing to express my sincere gratitude for your outstanding contribution to our open-source project. Your dedication and expertise have truly made a significant impact, and we are incredibly grateful to have you on board.
Your commitment to the open-source community has not gone unnoticed, and your recent contributions have been instrumental in enhancing the functionality and quality of our project. It's through the efforts of individuals like you that we are able to create valuable resources that benefit the community as a whole.
Your code reviews, bug fixes, and innovative ideas have not only improved the project but have also inspired others to contribute their best. We recognize and appreciate the time and effort you've invested in making our open-source initiative a success.
Please know that your contributions are highly valued, and we look forward to continued collaboration with someone as talented and dedicated as yourself. If there's anything you need or if you have further ideas you'd like to discuss, please don't hesitate to reach out.
Once again, thank you for your exceptional contributions. We are fortunate to have you as part of our open-source community.
Best regards,
Zac'.
(5) Click the Send button to send the email.
Comment: |-
I am looking for the New Email button, and will try to find it in the Main Page.
Expand Down Expand Up @@ -280,52 +291,51 @@ example10:
Observation: |-
The current screenshot shows that I am on the Microsoft PowerPoint application. The Copilot Add-in is visible in the screenshot.
Thought: |-
I need to double click the Copilot Add-in, which can help me create a slide from the meeting_notes.docx. The Copilot Add-in can directly create a slide from the meeting_notes.docx.
I need to click the Copilot Add-in, which can help me create a slide from the meeting_notes.docx. The Copilot Add-in can directly create a slide from the meeting_notes.docx.
ControlLabel: |-
60
ControlText: |-
Copilot
Function: |-
click_input
Args:
{"button": "left", "double": true}
{"button": "left", "double": false}
Status: |-
CONTINUE
Plan: |-
(1) Click the 'Create presentation from file' button to open a file picker.
(2) Select and click the meeting_notes.docx file in the file picker.
(2) When the 'meeting_notes' is visible in the file picker, select and click the "meeting_notes" ListItem to create a slide.
(3) Click the 'Send' buttom to send the request to Copilot. This completes the task.
Comment: |-
I need use the Copilot Add-in to create a slide from the meeting_notes.docx.
I need to use the Copilot Add-in to create a slide from the meeting_notes.docx.
Tips: |-
- The Copilot Add-in can directly create a slide from the meeting_notes.docx. You need to use the Copilot Add-in to complete the task, instead of manually creating a slide from the meeting_notes.docx.
- The Copilot Add-in is visible in the screenshot, and I need to double click it to open the Copilot Add-in.
example11:
Request: |-
Please @Zac to revise the presentation1.pptx.
Save the presentation1.pptx into pdf format.
Response:
Observation: |-
The current screenshot shows the powerpoint application is visible, and the presentation1.pptx is opened.
Thought: |-
I need to set focus on the powerpoint application and leave a comment in the presentation1.pptx to remind Zac to revise the presentation1.pptx.
I need to click the 'File' button, to make the the 'Save As Adobe PDF' button available. Then I need to click the 'Save As Adobe PDF' button to save the presentation1.pptx into pdf format. If the 'Save As Adobe PDF' button is not visible in the screenshot, I may need to use the 'Save a Copy' button to manually save the presentation1.pptx into pdf format.
ControlLabel: |-
42
34
ControlText: |-
Insert
File
Function: |-
click_input
Args:
{"button": "left", "double": false}
Status: |-
CONTINUE
Plan: |-
(1) Click the Comment button to open the comment text box.
(2) Input '@Zac, please revise the presentation1.pptx.' in the comment text box.
(1) Click the "Save As Adobe PDF" button to open the save as dialog.
(2) Click the "Save" button to save the presentation1.pptx into pdf format. This completes the task.
Comment: |-
I need to leave a comment in the presentation1.pptx to remind Zac to revise the presentation1.pptx.
I need to use the "Save As Adobe PDF" button to save the presentation1.pptx into pdf format.
Tips: |-
- The comment text box is usually opened by clicking the Comment button.
- Saving the presentation1.pptx into pdf format can be done by clicking the "Save As Adobe PDF" button.
- If the "Save As Adobe PDF" button is not visible in the screenshot, you may need to use the "Save a Copy" button to manually save the presentation1.pptx into pdf format.
2 changes: 1 addition & 1 deletion ufo/prompts/experience/nonvisual/experience_summary.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ system: |-
## Output Format
- You are required to are required to response in a JSON format, consisting of 10 distinct parts with the following keys and corresponding content:
- You are required to response in a JSON format, consisting of 10 distinct parts with the following keys and corresponding content:
{{"Observation": <Describe and summarize your observation of the Agent Trajectory.>}
"Thought": <Outline the logic behind the first action required to fulfill the request.>
"ControlLabel": <Specify the precise annotated label of the control item to be selected at the first step. If none of the control items are suitable or the task is complete, output an empty string.>
Expand Down
2 changes: 1 addition & 1 deletion ufo/prompts/experience/visual/experience_summary.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ system: |-
## Output Format
- You are required to are required to response in a JSON format, consisting of 10 distinct parts with the following keys and corresponding content:
- You are required to response in a JSON format, consisting of 10 distinct parts with the following keys and corresponding content:
{{"Observation": <Describe the initial screenshot of the application window in detail, including observations about the application's status relevant to the user request.>
"Thought": <Outline the logic behind the first action required to fulfill the request.>
"ControlLabel": <Specify the precise annotated label of the control item to be selected at the first step. If none of the control items are suitable or the task is complete, output an empty string.>
Expand Down

0 comments on commit e6c0f2a

Please sign in to comment.