forked from microsoft/UFO
-
Notifications
You must be signed in to change notification settings - Fork 0
/
config_dev.yaml
83 lines (68 loc) · 4.53 KB
/
config_dev.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
CONTROL_BACKEND: "uia" # The backend for control action, currently we support uia and win32
MAX_STEP: 100 # The max step limit for completing the user request
SLEEP_TIME: 5 # The sleep time between each step to wait for the window to be ready
RECTANGLE_TIME: 1
SAFE_GUARD: True # Whether to use the safe guard to prevent the model from doing sensitve operations.
CONTROL_LIST: ["Button", "Edit", "TabItem", "Document", "ListItem", "MenuItem", "ScrollBar", "TreeItem", "Hyperlink", "ComboBox", "RadioButton", "DataItem"]
# The list of widgets that allowed to be selected, in uia backend, it will be used for filter the control_type, while in win32 backend, it will be used for filter the class_name.
HISTORY_KEYS: ["Step", "Thought", "ControlText", "Subtask", "Action", "Comment", "Results", "UserConfirm"] # The keys of the action history for the next step.
ANNOTATION_COLORS: {
"Button": "#FFF68F",
"Edit": "#A5F0B5",
"TabItem": "#A5E7F0",
"Document": "#FFD18A",
"ListItem": "#D9C3FE",
"MenuItem": "#E7FEC3",
"ScrollBar": "#FEC3F8",
"TreeItem": "#D6D6D6",
"Hyperlink": "#91FFEB",
"ComboBox": "#D8B6D4"
}
PRINT_LOG: False # Whether to print the log
CONCAT_SCREENSHOT: False # Whether to concat the screenshot for the control item
LOG_LEVEL: "DEBUG" # The log level
INCLUDE_LAST_SCREENSHOT: True # Whether to include the last screenshot in the observation
REQUEST_TIMEOUT: 250 # The call timeout for the GPT-V model
HOSTAGENT_PROMPT: "ufo/prompts/share/base/host_agent.yaml" # The prompt for the app selection
# Due to the limitation of input size, lite version of the prompt help users have a taste. And the path is "ufo/prompts/share/lite/host_agent.yaml"
APPAGENT_PROMPT: "ufo/prompts/share/base/app_agent.yaml" # The prompt for the action selection
# Lite version: "ufo/prompts/share/lite/app_agent.yaml"
FOLLOWERAHENT_PROMPT: "ufo/prompts/share/base/app_agent.yaml" # The prompt for the follower agent
EVALUATION_PROMPT: "ufo/prompts/evaluation/evaluate.yaml" # The prompt for the evaluation
HOSTAGENT_EXAMPLE_PROMPT: "ufo/prompts/examples/{mode}/host_agent_example.yaml" # The prompt for the app selection
# Lite version: "ufo/prompts/examples/lite/{mode}/host_agent_example.yaml"
APPAGENT_EXAMPLE_PROMPT: "ufo/prompts/examples/{mode}/app_agent_example.yaml" # The prompt for the action selection
# Lite version: "ufo/prompts/examples/lite/{mode}/app_agent_example.yaml"
## For experience learning
EXPERIENCE_PROMPT: "ufo/prompts/experience/experience_summary.yaml"
EXPERIENCE_SAVED_PATH: "vectordb/experience/"
## For user demonstration learning
DEMONSTRATION_PROMPT: "ufo/prompts/demonstration/demonstration_summary.yaml"
DEMONSTRATION_SAVED_PATH: "vectordb/demonstration/"
API_PROMPT: "ufo/prompts/share/base/api.yaml" # The prompt for the API
CLICK_API: "click_input" # The click API
INPUT_TEXT_API: "type_keys" # The input text API. Can be "type_keys" or "set_text"
INPUT_TEXT_ENTER: False # whether to press enter after typing the text
## APIs related
USE_APIS: True # Whether to use the API
WORD_API_PROMPT: "ufo/prompts/apps/word/api.yaml" # The prompt for the word API
EXCEL_API_PROMPT: "ufo/prompts/apps/excel/api.yaml" # The prompt for the word API
# For control filtering
#'TEXT' for only rich text filter, 'SEMANTIC' for only semantic similarity match, 'ICON' for only icon match
CONTROL_FILTER_TYPE: [] # The list of control filter type, support 'TEXT', 'SEMANTIC', 'ICON'
CONTROL_FILTER_TOP_K_PLAN: 2 # The control filter effect on top k plans from UFO, default is 2
CONTROL_FILTER_TOP_K_SEMANTIC: 15 # The control filter top k for semantic similarity
CONTROL_FILTER_TOP_K_ICON: 15 # The control filter top k for icon similarity
CONTROL_FILTER_MODEL_SEMANTIC_NAME: "all-MiniLM-L6-v2" # The control filter model name of semantic similarity
CONTROL_FILTER_MODEL_ICON_NAME: "clip-ViT-B-32" # The control filter model name of icon similarity
ALLOW_OPENAPP: FALSE # Whether to allow the open app action
LOG_XML: False # Whether to log the xml file for the at every step.
SCREENSHOT_TO_MEMORY: True # Whether to allow the screenshot to memory for the agent's decision making.
# For customizations
USE_CUSTOMIZATION: True # Whether to use the customization
QA_PAIR_FILE: "customization/historical_qa.txt" # The path for the historical QA
QA_PAIR_NUM: 20 # The number of QA pairs for the customization
# For the evaluation
EVA_SESSION: True # Whether to include the session in the evaluation
EVA_ROUND: FALSE
EVA_ALL_SCREENSHOTS: True # Whether to include all the screenshots in the evaluation