forked from microsoft/UFO
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
173 additions
and
141 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,59 +1,101 @@ | ||
version: 0.1 | ||
APP_AGENT: { | ||
VISUAL_MODE: True, # Whether to use the visual mode | ||
|
||
BING_API_KEY: "YOUR_BING_SEARCH_API_KEY" # The Bing search API key | ||
API_TYPE: "openai" , # The API type, "openai" for the OpenAI API, "aoai" for the AOAI API, 'azure_ad' for the ad authority of the AOAI API. | ||
API_BASE: "https://api.openai.com/v1/chat/completions", # The the OpenAI API endpoint, "https://api.openai.com/v1/chat/completions" for the OpenAI API. | ||
API_KEY: "sk-", # The OpenAI API key, begin with sk- | ||
API_VERSION: "2024-02-15-preview", # "2024-02-15-preview" by default | ||
API_MODEL: "gpt-4-vision-preview", # The only OpenAI model by now that accepts visual input | ||
|
||
|
||
### Comment above and uncomment these if using "aoai". | ||
# API_TYPE: "aoai" , # The API type, "openai" for the OpenAI API, "aoai" for the Azure OpenAI. | ||
# API_BASE: "YOUR_ENDPOINT", # The the OpenAI API endpoint, "https://api.openai.com/v1/chat/completions" for the OpenAI API. As for the aoai, it should be https://{your-resource-name}.openai.azure.com | ||
# API_KEY: "YOUR_KEY", # The aoai API key | ||
# API_VERSION: "2024-02-15-preview", # "2024-02-15-preview" by default | ||
# API_MODEL: "YOUR_MODEL", # The only OpenAI model by now that accepts visual input | ||
# API_DEPLOYMENT_ID: "gpt-4-visual-preview", # The deployment id for the AOAI API | ||
|
||
### For Azure_AD | ||
# AAD_TENANT_ID: "YOUR_TENANT_ID", # Set the value to your tenant id for the llm model | ||
# AAD_API_SCOPE: "YOUR_SCOPE", # Set the value to your scope for the llm model | ||
# AAD_API_SCOPE_BASE: "YOUR_SCOPE_BASE" # Set the value to your scope base for the llm model, whose format is API://YOUR_SCOPE_BASE, and the only need is the YOUR_SCOPE_BASE | ||
} | ||
|
||
ACTION_AGENT: { | ||
VISUAL_MODE: True, # Whether to use the visual mode | ||
|
||
API_TYPE: "openai" , # The API type, "openai" for the OpenAI API, "aoai" for the AOAI API, 'azure_ad' for the ad authority of the AOAI API. | ||
API_BASE: "https://api.openai.com/v1/chat/completions", # The the OpenAI API endpoint, "https://api.openai.com/v1/chat/completions" for the OpenAI API. | ||
API_KEY: "sk-", # The OpenAI API key, begin with sk- | ||
API_VERSION: "2024-02-15-preview", # "2024-02-15-preview" by default | ||
API_MODEL: "gpt-4-vision-preview", # The only OpenAI model by now that accepts visual input | ||
|
||
|
||
### Comment above and uncomment these if using "aoai". | ||
# API_TYPE: "aoai" , # The API type, "openai" for the OpenAI API, "aoai" for the Azure OpenAI. | ||
# API_BASE: "YOUR_ENDPOINT", # The the OpenAI API endpoint, "https://api.openai.com/v1/chat/completions" for the OpenAI API. As for the aoai, it should be https://{your-resource-name}.openai.azure.com | ||
# API_KEY: "YOUR_KEY", # The aoai API key | ||
# API_VERSION: "2024-02-15-preview", # "2024-02-15-preview" by default | ||
# API_MODEL: "YOUR_MODEL", # The only OpenAI model by now that accepts visual input | ||
# API_DEPLOYMENT_ID: "gpt-4-visual-preview", # The deployment id for the AOAI API | ||
|
||
### For Azure_AD | ||
# AAD_TENANT_ID: "YOUR_TENANT_ID", # Set the value to your tenant id for the llm model | ||
# AAD_API_SCOPE: "YOUR_SCOPE", # Set the value to your scope for the llm model | ||
# AAD_API_SCOPE_BASE: "YOUR_SCOPE_BASE" # Set the value to your scope base for the llm model, whose format is API://YOUR_SCOPE_BASE, and the only need is the YOUR_SCOPE_BASE | ||
} | ||
|
||
BACKUP_AGENT: { | ||
VISUAL_MODE: True, # Whether to use the visual mode | ||
|
||
API_TYPE: "openai" , # The API type, "openai" for the OpenAI API, "aoai" for the AOAI API, 'azure_ad' for the ad authority of the AOAI API. | ||
API_BASE: "https://api.openai.com/v1/chat/completions", # The the OpenAI API endpoint, "https://api.openai.com/v1/chat/completions" for the OpenAI API. | ||
API_KEY: "sk-", # The OpenAI API key, begin with sk- | ||
API_VERSION: "2024-02-15-preview", # "2024-02-15-preview" by default | ||
API_MODEL: "gpt-4-vision-preview", # The only OpenAI model by now that accepts visual input | ||
|
||
|
||
### Comment above and uncomment these if using "aoai". | ||
# API_TYPE: "aoai" , # The API type, "openai" for the OpenAI API, "aoai" for the Azure OpenAI. | ||
# API_BASE: "YOUR_ENDPOINT", # The the OpenAI API endpoint, "https://api.openai.com/v1/chat/completions" for the OpenAI API. As for the aoai, it should be https://{your-resource-name}.openai.azure.com | ||
# API_KEY: "YOUR_KEY", # The aoai API key | ||
# API_VERSION: "2024-02-15-preview", # "2024-02-15-preview" by default | ||
# API_MODEL: "YOUR_MODEL", # The only OpenAI model by now that accepts visual input | ||
# API_DEPLOYMENT_ID: "gpt-4-visual-preview", # The deployment id for the AOAI API | ||
|
||
### For Azure_AD | ||
# AAD_TENANT_ID: "YOUR_TENANT_ID", # Set the value to your tenant id for the llm model | ||
# AAD_API_SCOPE: "YOUR_SCOPE", # Set the value to your scope for the llm model | ||
# AAD_API_SCOPE_BASE: "YOUR_SCOPE_BASE" # Set the value to your scope base for the llm model, whose format is API://YOUR_SCOPE_BASE, and the only need is the YOUR_SCOPE_BASE | ||
} | ||
|
||
CONTROL_BACKEND: "uia" # The backend for control action | ||
MAX_STEP: 30 # The max step limit for completing the user request | ||
SLEEP_TIME: 5 # The sleep time between each step to wait for the window to be ready | ||
SAFE_GUARD: True # Whether to use the safe guard to prevent the model from doing sensitve operations. | ||
CONTROL_TYPE_LIST: ["Button", "Edit", "TabItem", "Document", "ListItem", "MenuItem", "ScrollBar", "TreeItem", "Hyperlink", "ComboBox", "RadioButton"] # The list of control types that are allowed to be selected | ||
HISTORY_KEYS: ["Step", "Thought", "ControlText", "Action", "Comment", "Results"] # The keys of the action history for the next step. | ||
ANNOTATION_COLORS: { | ||
"Button": "#FFF68F", | ||
"Edit": "#A5F0B5", | ||
"TabItem": "#A5E7F0", | ||
"Document": "#FFD18A", | ||
"ListItem": "#D9C3FE", | ||
"MenuItem": "#E7FEC3", | ||
"ScrollBar": "#FEC3F8", | ||
"TreeItem": "#D6D6D6", | ||
"Hyperlink": "#91FFEB", | ||
"ComboBox": "#D8B6D4" | ||
} | ||
|
||
PRINT_LOG: False # Whether to print the log | ||
CONCAT_SCREENSHOT: True # Whether to concat the screenshot for the control item | ||
LOG_LEVEL: "DEBUG" # The log level | ||
INCLUDE_LAST_SCREENSHOT: True # Whether to include the last screenshot in the observation | ||
REQUEST_TIMEOUT: 250 # The call timeout for the GPT-V model | ||
|
||
APP_SELECTION_PROMPT: "ufo/prompts/base/{mode}/app_selection.yaml" # The prompt for the app selection | ||
ACTION_SELECTION_PROMPT: "ufo/prompts/base/{mode}/action_selection.yaml" # The prompt for the action selection | ||
|
||
APP_SELECTION_EXAMPLE_PROMPT: "ufo/prompts/examples/{mode}/app_example.yaml" # The prompt for the app selection | ||
ACTION_SELECTION_EXAMPLE_PROMPT: "ufo/prompts/examples/{mode}/action_example.yaml" # The prompt for the action selection | ||
|
||
## For experience learning | ||
EXPERIENCE_PROMPT: "ufo/prompts/experience/{mode}/experience_summary.yaml" | ||
EXPERIENCE_SAVED_PATH: "vectordb/experience/" | ||
|
||
|
||
API_PROMPT: "ufo/prompts/base/{mode}/api.yaml" # The prompt for the API | ||
INPUT_TEXT_API: "type_keys" # The input text API | ||
INPUT_TEXT_ENTER: True # whether to press enter after typing the text | ||
|
||
### For GPT parameters | ||
MAX_TOKENS: 2000 # The max token limit for the response completion | ||
MAX_RETRY: 3 # The max retry limit for the response completion | ||
TEMPERATURE: 0.0 # The temperature of the model: the lower the value, the more consistent the output of the model | ||
TOP_P: 0.0 # The top_p of the model: the lower the value, the more conservative the output of the model | ||
TIMEOUT: 60 # The call timeout(s), default is 10 mins | ||
|
||
|
||
### For RAG | ||
|
||
## RAG Configuration for the offline docs | ||
RAG_OFFLINE_DOCS: False # Whether to use the offline RAG. | ||
RAG_OFFLINE_DOCS_RETRIEVED_TOPK: 1 # The topk for the offline retrieved documents | ||
|
||
|
||
## RAG Configuration for the Bing search | ||
BING_API_KEY: "YOUR_BING_SEARCH_API_KEY" # The Bing search API key | ||
RAG_ONLINE_SEARCH: False # Whether to use the online search for the RAG. | ||
RAG_ONLINE_SEARCH_TOPK: 5 # The topk for the online search | ||
RAG_ONLINE_RETRIEVED_TOPK: 1 # The topk for the online retrieved documents | ||
|
||
|
||
## RAG Configuration for experience | ||
RAG_EXPERIENCE: True # Whether to use the offline RAG. | ||
RAG_EXPERIENCE: True # Whether to use the RAG from its self-experience. | ||
RAG_EXPERIENCE_RETRIEVED_TOPK: 5 # The topk for the offline retrieved documents | ||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
CONTROL_BACKEND: "uia" # The backend for control action | ||
MAX_STEP: 30 # The max step limit for completing the user request | ||
SLEEP_TIME: 5 # The sleep time between each step to wait for the window to be ready | ||
SAFE_GUARD: True # Whether to use the safe guard to prevent the model from doing sensitve operations. | ||
CONTROL_TYPE_LIST: ["Button", "Edit", "TabItem", "Document", "ListItem", "MenuItem", "ScrollBar", "TreeItem", "Hyperlink", "ComboBox", "RadioButton"] # The list of control types that are allowed to be selected | ||
HISTORY_KEYS: ["Step", "Thought", "ControlText", "Action", "Comment", "Results"] # The keys of the action history for the next step. | ||
ANNOTATION_COLORS: { | ||
"Button": "#FFF68F", | ||
"Edit": "#A5F0B5", | ||
"TabItem": "#A5E7F0", | ||
"Document": "#FFD18A", | ||
"ListItem": "#D9C3FE", | ||
"MenuItem": "#E7FEC3", | ||
"ScrollBar": "#FEC3F8", | ||
"TreeItem": "#D6D6D6", | ||
"Hyperlink": "#91FFEB", | ||
"ComboBox": "#D8B6D4" | ||
} | ||
|
||
PRINT_LOG: False # Whether to print the log | ||
CONCAT_SCREENSHOT: True # Whether to concat the screenshot for the control item | ||
LOG_LEVEL: "DEBUG" # The log level | ||
INCLUDE_LAST_SCREENSHOT: True # Whether to include the last screenshot in the observation | ||
REQUEST_TIMEOUT: 250 # The call timeout for the GPT-V model | ||
|
||
APP_SELECTION_PROMPT: "ufo/prompts/base/{mode}/app_selection.yaml" # The prompt for the app selection | ||
ACTION_SELECTION_PROMPT: "ufo/prompts/base/{mode}/action_selection.yaml" # The prompt for the action selection | ||
|
||
APP_SELECTION_EXAMPLE_PROMPT: "ufo/prompts/examples/{mode}/app_example.yaml" # The prompt for the app selection | ||
ACTION_SELECTION_EXAMPLE_PROMPT: "ufo/prompts/examples/{mode}/action_example.yaml" # The prompt for the action selection | ||
|
||
## For experience learning | ||
EXPERIENCE_PROMPT: "ufo/prompts/experience/{mode}/experience_summary.yaml" | ||
EXPERIENCE_SAVED_PATH: "vectordb/experience/" | ||
|
||
API_PROMPT: "ufo/prompts/base/{mode}/api.yaml" # The prompt for the API | ||
INPUT_TEXT_API: "type_keys" # The input text API | ||
INPUT_TEXT_ENTER: True # whether to press enter after typing the text |
Oops, something went wrong.