Skip to content

Commit

Permalink
Merge pull request microsoft#28 from microsoft/vyokky/dev
Browse files Browse the repository at this point in the history
Vyokky/dev Experience Learning
  • Loading branch information
vyokky authored Mar 21, 2024
2 parents e101fb4 + e6c0f2a commit 15739fc
Show file tree
Hide file tree
Showing 35 changed files with 2,262 additions and 1,409 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# Ignore Jupyter Notebook checkpoints
.ipynb_checkpoints
/test/*
/deprecated/*
/test/*.ipynb
/logs/*
__pycache__/
Expand All @@ -19,6 +20,7 @@ ufo/config/config_llm.yaml
ufo/rag/app_docs/*
learner/records.json
vectordb/docs/*
vectordb/experience/*

# Don't ignore the example files
!vectordb/docs/example/
Expand Down
2 changes: 2 additions & 0 deletions learner/indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'



def create_indexer(app: str, docs: str, format: str, incremental: bool, save_path: str):
"""
Create an indexer for the given application.
Expand Down
5 changes: 3 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,6 @@ pywin32==304
pywinauto==0.6.8
PyYAML==6.0.1
Requests==2.31.0
faiss-cpu==1.23.5
lxml==5.1.0
faiss-cpu==1.8.0
lxml==5.1.0
psutil==5.9.8
1 change: 1 addition & 0 deletions ufo/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ def load_config(config_path="ufo/config/"):
:return: Merged configuration from environment variables and YAML file.
"""
# Copy environment variables to avoid modifying them directly
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Suppress TensorFlow warnings
configs = dict(os.environ)

path = config_path
Expand Down
12 changes: 11 additions & 1 deletion ufo/config/config.yaml.template
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ ACTION_SELECTION_PROMPT: "ufo/prompts/base/{mode}/action_selection.yaml" # The
APP_SELECTION_EXAMPLE_PROMPT: "ufo/prompts/examples/{mode}/app_example.yaml" # The prompt for the app selection
ACTION_SELECTION_EXAMPLE_PROMPT: "ufo/prompts/examples/{mode}/action_example.yaml" # The prompt for the action selection


## For experience learning
EXPERIENCE_PROMPT: "ufo/prompts/experience/{mode}/experience_summary.yaml"
EXPERIENCE_SAVED_PATH: "vectordb/experience/"

API_PROMPT: "ufo/prompts/base/{mode}/api.yaml" # The prompt for the API
INPUT_TEXT_API: "type_keys" # The input text API
INPUT_TEXT_ENTER: True # whether to press enter after typing the text
Expand All @@ -46,4 +51,9 @@ RAG_OFFLINE_DOCS_RETRIEVED_TOPK: 1 # The topk for the offline retrieved documen
## RAG Configuration for the Bing search
RAG_ONLINE_SEARCH: False # Whether to use the online search for the RAG.
RAG_ONLINE_SEARCH_TOPK: 5 # The topk for the online search
RAG_ONLINE_RETRIEVED_TOPK: 1 # The topk for the online retrieved documents
RAG_ONLINE_RETRIEVED_TOPK: 1 # The topk for the online retrieved documents


## RAG Configuration for experience
RAG_EXPERIENCE: True # Whether to use the offline RAG.
RAG_EXPERIENCE_RETRIEVED_TOPK: 5 # The topk for the offline retrieved documents
19 changes: 9 additions & 10 deletions ufo/config/config_llm.yaml.template
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,20 @@ APP_AGENT: {
# API_TYPE: "azure_ad", # The API type, "openai" for the OpenAI API, "aoai" for the AOAI API, 'azure_ad' for the ad authority of the AOAI API.
# API_BASE: "YOUR_ENDPOINT", # The the OpenAI API endpoint, "https://api.openai.com/v1/chat/completions" for the OpenAI API. As for the AAD, it should be your endpoints.
# API_KEY: "YOUR_KEY", # The OpenAI API key
# API_VERSION: "2023-12-01-preview" ,# "2024-02-15-preview" by default
# API_VERSION: "2024-02-15-preview" ,# "2024-02-15-preview" by default
# API_MODEL: "YOUR_MODEL", # The only OpenAI model by now that accepts visual input

API_TYPE: "aoai" , # The API type, "openai" for the OpenAI API, "aoai" for the AOAI API, 'azure_ad' for the ad authority of the AOAI API.
API_BASE: "YOUR_ENDPOINT", # The the OpenAI API endpoint, "https://api.openai.com/v1/chat/completions" for the OpenAI API. As for the AAD, it should be your endpoints.
API_KEY: "YOUR_KEY", # The OpenAI API key
API_VERSION: "2023-12-01-preview" ,# "2024-02-15-preview" by default
API_VERSION: "2024-02-15-preview", # "2024-02-15-preview" by default
API_MODEL: "YOUR_MODEL", # The only OpenAI model by now that accepts visual input

###For the AOAI
API_DEPLOYMENT_ID: "gpt-4-visual-preview", # The deployment id for the AOAI API
### For Azure_AD
AAD_TENANT_ID: "YOUR_TENANT_ID", # Set the value to your tenant id for the llm model
AAD_API_SCOPE: "YOUR_SCOPE" # Set the value to your scope for the llm model
AAD_API_SCOPE: "YOUR_SCOPE", # Set the value to your scope for the llm model
AAD_API_SCOPE_BASE: "YOUR_SCOPE_BASE" # Set the value to your scope base for the llm model, whose format is API://YOUR_SCOPE_BASE, and the only need is the YOUR_SCOPE_BASE
}

Expand All @@ -25,20 +25,20 @@ ACTION_AGENT: {
API_TYPE: "azure_ad", # The API type, "openai" for the OpenAI API, "aoai" for the AOAI API, 'azure_ad' for the ad authority of the AOAI API.
API_BASE: "YOUR_ENDPOINT", # The the OpenAI API endpoint, "https://api.openai.com/v1/chat/completions" for the OpenAI API. As for the AAD, it should be your endpoints.
API_KEY: "YOUR_KEY", # The OpenAI API key
API_VERSION: "2023-12-01-preview" ,# "2024-02-15-preview" by default
API_VERSION: "2024-02-15-preview", # "2024-02-15-preview" by default
API_MODEL: "YOUR_MODEL", # The only OpenAI model by now that accepts visual input

# API_TYPE: "aoai" , # The API type, "openai" for the OpenAI API, "aoai" for the AOAI API, 'azure_ad' for the ad authority of the AOAI API.
# API_BASE: "YOUR_ENDPOINT", # The the OpenAI API endpoint, "https://api.openai.com/v1/chat/completions" for the OpenAI API. As for the AAD, it should be your endpoints.
# API_KEY: "YOUR_KEY", # The OpenAI API key
# API_VERSION: "2023-12-01-preview" ,# "2024-02-15-preview" by default
# API_VERSION: "2024-02-15-preview", # "2024-02-15-preview" by default
# API_MODEL: "YOUR_MODEL", # The only OpenAI model by now that accepts visual input

###For the AOAI
API_DEPLOYMENT_ID: "gpt-4-visual-preview", # The deployment id for the AOAI API
### For Azure_AD
AAD_TENANT_ID: "YOUR_TENANT_ID", # Set the value to your tenant id for the llm model
AAD_API_SCOPE: "YOUR_SCOPE" # Set the value to your scope for the llm model
AAD_API_SCOPE: "YOUR_SCOPE", # Set the value to your scope for the llm model
AAD_API_SCOPE_BASE: "YOUR_SCOPE_BASE" # Set the value to your scope base for the llm model, whose format is API://YOUR_SCOPE_BASE, and the only need is the YOUR_SCOPE_BASE
}

Expand All @@ -47,26 +47,25 @@ BACKUP_AGENT: {
API_TYPE: "azure_ad", # The API type, "openai" for the OpenAI API, "aoai" for the AOAI API, 'azure_ad' for the ad authority of the AOAI API.
API_BASE: "YOUR_ENDPOINT", # The the OpenAI API endpoint, "https://api.openai.com/v1/chat/completions" for the OpenAI API. As for the AAD, it should be your endpoints.
API_KEY: "YOUR_KEY", # The OpenAI API key
API_VERSION: "2023-12-01-preview" ,# "2024-02-15-preview" by default
API_VERSION: "2024-02-15-preview", # "2024-02-15-preview" by default
API_MODEL: "YOUR_MODEL", # The only OpenAI model by now that accepts visual input

# API_TYPE: "aoai" , # The API type, "openai" for the OpenAI API, "aoai" for the AOAI API, 'azure_ad' for the ad authority of the AOAI API.
# API_BASE: "YOUR_ENDPOINT", # The the OpenAI API endpoint, "https://api.openai.com/v1/chat/completions" for the OpenAI API. As for the AAD, it should be your endpoints.
# API_KEY: "YOUR_KEY", # The OpenAI API key
# API_VERSION: "2023-12-01-preview" ,# "2024-02-15-preview" by default
# API_VERSION: "2024-02-15-preview", # "2024-02-15-preview" by default
# API_MODEL: "YOUR_MODEL", # The only OpenAI model by now that accepts visual input

###For the AOAI
API_DEPLOYMENT_ID: "gpt-4-visual-preview", # The deployment id for the AOAI API
### For Azure_AD
AAD_TENANT_ID: "YOUR_TENANT_ID", # Set the value to your tenant id for the llm model
AAD_API_SCOPE: "YOUR_SCOPE" # Set the value to your scope for the llm model
AAD_API_SCOPE: "YOUR_SCOPE", # Set the value to your scope for the llm model
AAD_API_SCOPE_BASE: "YOUR_SCOPE_BASE" # Set the value to your scope base for the llm model, whose format is API://YOUR_SCOPE_BASE, and the only need is the YOUR_SCOPE_BASE
}




### For parameters
MAX_TOKENS: 2000 # The max token limit for the response completion
MAX_RETRY: 3 # The max retry limit for the response completion
Expand Down
2 changes: 2 additions & 0 deletions ufo/experience/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
200 changes: 200 additions & 0 deletions ufo/experience/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

import json
import os
import re
from ..utils import encode_image_from_path, print_with_color


class ExperienceLogLoader:
"""
Loading the logs from previous runs.
"""

def __init__(self, log_path: str):
"""
Initialize the LogLoader.
:param log_path: The path of the log file.
"""
self.log_path = log_path
self.response = self.load_response_log()
self.max_stepnum = self.find_max_number_in_filenames(log_path)
self.request_partition = self.get_request_partition()
self.screenshots = {}

self.logs = []


def load_response_log(self):
"""
Load the response log.
:return: The response log.
"""

response = []
response_log_path = os.path.join(self.log_path, "response.log")
with open(response_log_path, 'r', encoding='utf-8') as file:
# Read the lines and split them into a list
response_log = file.readlines()
for response_string in response_log:
try:
response.append(json.loads(response_string))
except json.JSONDecodeError:
print_with_color(f"Error loading response log: {response_string}", "yellow")
return response


@staticmethod
def find_max_number_in_filenames(log_path) -> int:
"""
Find the maximum number in the filenames.
:return: The maximum number in the filenames.
"""

# Get the list of files in the folder
files = os.listdir(log_path)

# Initialize an empty list to store extracted numbers
numbers = []

# Iterate through each file
for file in files:
# Extract the number from the filename
number = ExperienceLogLoader.extract_action_step_count(file)
if number is not None:
# Append the extracted number to the list
numbers.append(number)

if numbers:
# Return the maximum number if numbers list is not empty
return max(numbers)
else:
# Return None if no numbers are found in filenames
return None


def load_screenshot(self, stepnum: int = 0, version: str = "") -> str:
"""
Load the screenshot.
:param stepnum: The step number of the screenshot.
:param version: The version of the screenshot.
:return: The screenshot.
"""

# create version tag
if version:
version_tag = "_" + version
else:
version_tag = ""

# Get the filename of the screenshot
filename = "action_step{stepnum}{version}.png".format(stepnum=stepnum, version=version_tag)
screenshot_path = os.path.join(self.log_path, filename)

# Check if the screenshot exists
if os.path.exists(screenshot_path):
image_url = encode_image_from_path(screenshot_path)
else:
image_url = None

return image_url


def create_logs(self) -> list:
"""
Create the response log.
:return: The response log.
"""
self.logs = []
for partition in self.request_partition:
request = self.response[partition[0]]["Request"]
nround = self.response[partition[0]]["Round"]
partitioned_logs = {
"request": request,
"round": nround,
"step_num": len(partition),
**{
"step_%s" % local_step: {
"response": self.response[step],
"is_first_action": local_step == 1,
"screenshot": {
version: self.load_screenshot(step, "" if version == "raw" else version)
for version in ["raw", "selected_controls"]
}
}
for local_step, step in enumerate(partition)
},
"application": list({self.response[step]["Application"] for step in partition})
}
self.logs.append(partitioned_logs)
return self.logs


def get_request_partition(self) -> list:
"""
Partition the logs.
:return: The partitioned logs.
"""
request_partition = []
current_round = 0
current_partition = []

for step in range(self.max_stepnum):
nround = self.response[step]["Round"]

if nround != current_round:
if current_partition:
request_partition.append(current_partition)
current_partition = [step]
current_round = nround
else:
current_partition.append(step)

if current_partition:
request_partition.append(current_partition)

return request_partition



@staticmethod
def get_user_request(log_partition: dict) -> str:
"""
Get the user request.
:param log_partition: The log partition.
:return: The user request.
"""
return log_partition.get("request")



@staticmethod
def get_app_list(log_partition: dict) -> list:
"""
Get the user request.
:param log_partition: The log partition.
:return: The application list.
"""
return log_partition.get("application")


@staticmethod
def extract_action_step_count(filename : str) -> int:
"""
Extract the action step count from the filename.
:param filename: The filename.
:return: The number extracted from the filename.
"""

# Define a regular expression pattern to extract numbers
pattern = r'action_step(\d+)\.png'
# Use re.search to find the matching pattern in the filename
match = re.search(pattern, filename)
if match:
# Return the extracted number as an integer
return int(match.group(1))
else:
# Return None if no match is found
return None

Loading

0 comments on commit 15739fc

Please sign in to comment.