Skip to content

Commit

Permalink
Merge pull request microsoft#69 from microsoft/dev/reformat
Browse files Browse the repository at this point in the history
Dev/reformat
  • Loading branch information
vyokky authored May 8, 2024
2 parents 1fad66b + c87ec89 commit 6b5d53a
Show file tree
Hide file tree
Showing 51 changed files with 3,388 additions and 2,431 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ Both agents leverage the multi-modal capabilities of GPT-Vision to comprehend th


## 📢 News
- 📅 2024-05-07: **New Release for v0.1.1!** We've made some significant updates! Previously known as AppAgent and ActAgent, we've rebranded them to HostAgent and AppAgent to better align with their functionalities. Explore the latest enhancements:
- 📅 2024-05-08: **New Release for v0.1.1!** We've made some significant updates! Previously known as AppAgent and ActAgent, we've rebranded them to HostAgent and AppAgent to better align with their functionalities. Explore the latest enhancements:
1. **Learning from Human Demonstration:** UFO now supports learning from human demonstration! Utilize the [Windows Step Recorder](https://support.microsoft.com/en-us/windows/record-steps-to-reproduce-a-problem-46582a9b-620f-2e36-00c9-04e25d784e47) to record your steps and demonstrate them for UFO. Refer to our detailed guide in [README.md](/record_processor/README.md) for more information.
2. **Win32 Support:** We've incorporated support for [Win32](https://learn.microsoft.com/en-us/windows/win32/controls/window-controls) as a control backend, enhancing our UI automation capabilities.
3. **Extended Application Interaction:** UFO now goes beyond UI controls, allowing interaction with your application through keyboard inputs and native APIs! Presently, we support Word ([examples](/ufo/prompts/apps/word/api.yaml)), with more to come soon. Customize and build your own interactions.
Expand Down
6 changes: 0 additions & 6 deletions learner/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,13 @@ def __init__(self, extensions: str = None, directory: str = None):
self.extensions = extensions
self.directory = directory


def load_file_name(self):
"""
Load the documents from the given directory.
:param directory: The directory to load from.
:return: The list of loaded documents.
"""
return utils.find_files_with_extension(self.directory, self.extensions)


def construct_document_list(self):
"""
Expand All @@ -33,7 +31,3 @@ def construct_document_list(self):
:return: The list of metadata for the loaded documents.
"""
pass




30 changes: 18 additions & 12 deletions learner/indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"


def create_indexer(app: str, docs: str, format: str, incremental: bool, save_path: str):
Expand All @@ -31,35 +31,41 @@ def create_indexer(app: str, docs: str, format: str, incremental: bool, save_pat
loader = xml_loader.XMLLoader(docs)
documents = loader.construct_document()

print_with_color("Creating indexer for {num} documents for {app}...".format(num=len(documents), app=app), "yellow")
print_with_color(
"Creating indexer for {num} documents for {app}...".format(
num=len(documents), app=app
),
"yellow",
)

if format == "xml":
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-mpnet-base-v2"
)
else:
raise ValueError("Invalid format: " + format)

db = FAISS.from_documents(documents, embeddings)

if incremental:
if app in records:
print_with_color("Merging with previous indexer...", "yellow")
prev_db = FAISS.load_local(records[app], embeddings)
db.merge_from(prev_db)

db_file_path = os.path.join(save_path, app)
db_file_path = os.path.abspath(db_file_path)
db.save_local(db_file_path)

records[app] = db_file_path


save_json_file("./learner/records.json", records)

print_with_color("Indexer for {app} created successfully. Save in {path}.".format(app=app, path=db_file_path), "green")
print_with_color(
"Indexer for {app} created successfully. Save in {path}.".format(
app=app, path=db_file_path
),
"green",
)

return db_file_path





40 changes: 27 additions & 13 deletions learner/learn.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,28 +6,42 @@


args = argparse.ArgumentParser()
args.add_argument("--app", help="The name of application to learn.",
type=str, default="./")
args.add_argument("--docs", help="The help application of the app.", type=str,
default="./")
args.add_argument("--format", help="The format of the help doc.", type=str,
default="xml")
args.add_argument('--incremental', action='store_true', help='Enable incremental update.')
args.add_argument("--save_path", help="The format of the help doc.", type=str,
default="./vectordb/docs/")


args.add_argument(
"--app", help="The name of application to learn.", type=str, default="./"
)
args.add_argument(
"--docs", help="The help application of the app.", type=str, default="./"
)
args.add_argument(
"--format", help="The format of the help doc.", type=str, default="xml"
)
args.add_argument(
"--incremental", action="store_true", help="Enable incremental update."
)
args.add_argument(
"--save_path",
help="The format of the help doc.",
type=str,
default="./vectordb/docs/",
)


parsed_args = args.parse_args()


def main():
"""
Main function.
"""

indexer.create_indexer(parsed_args.app, parsed_args.docs, parsed_args.format, parsed_args.incremental, parsed_args.save_path)
indexer.create_indexer(
parsed_args.app,
parsed_args.docs,
parsed_args.format,
parsed_args.incremental,
parsed_args.save_path,
)


if __name__ == "__main__":
main()
main()
17 changes: 13 additions & 4 deletions record_processor/parser/demonstration_record.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.


class DemonstrationStep:
"""
Class for the single step information in the user demonstration record.
Multiple steps will be recorded to achieve a specific request.
"""

def __init__(self, application: str, description: str, action: str, screenshot: str, comment: str):
def __init__(
self,
application: str,
description: str,
action: str,
screenshot: str,
comment: str,
):
"""
Create a new step.
"""
Expand All @@ -17,6 +25,7 @@ def __init__(self, application: str, description: str, action: str, screenshot:
self.comment = comment
self.screenshot = screenshot


class DemonstrationRecord:
"""
Class for the user demonstration record.
Expand All @@ -40,19 +49,19 @@ def set_request(self, request: str):
Set the request.
"""
self.__request = request

def get_request(self) -> str:
"""
Get the request.
"""
return self.__request

def get_applications(self) -> list:
"""
Get the application.
"""
return self.__applications

def get_step_num(self) -> int:
"""
Get the step number.
Expand Down
90 changes: 55 additions & 35 deletions record_processor/parser/psr_record_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@ def parse_to_record(self) -> DemonstrationRecord:
"""
boundary = self.__find_boundary()
self.parts_dict = self.__split_file_by_boundary(boundary)
self.comments = self.__get_comments(
self.parts_dict['main.htm']['Content'])
self.steps = self.__get_steps(self.parts_dict['main.htm']['Content'])
self.comments = self.__get_comments(self.parts_dict["main.htm"]["Content"])
self.steps = self.__get_steps(self.parts_dict["main.htm"]["Content"])
record = DemonstrationRecord(
list(set(self.applications)), len(self.steps), **self.steps)
list(set(self.applications)), len(self.steps), **self.steps
)

return record

Expand All @@ -54,14 +54,14 @@ def __find_boundary(self) -> str:
if boundary_start != -1:
boundary_start += len("boundary=")
boundary_end = self.content.find("\n", boundary_start)
boundary = self.content[boundary_start:boundary_end].strip('\"')
boundary = self.content[boundary_start:boundary_end].strip('"')
return boundary
else:
raise ValueError("Boundary not found in the .mht file.")

def __split_file_by_boundary(self, boundary: str) -> dict:
"""
Split the file by the boundary into parts,
Split the file by the boundary into parts,
Store the parts in a dictionary, including the content type,
content location and content transfer encoding.
boundary: The boundary of the file.
Expand All @@ -72,27 +72,36 @@ def __split_file_by_boundary(self, boundary: str) -> dict:
for part in parts:
content_type_start = part.find("Content-Type:")
content_location_start = part.find("Content-Location:")
content_transfer_encoding_start = part.find(
"Content-Transfer-Encoding:")
content_transfer_encoding_start = part.find("Content-Transfer-Encoding:")
part_info = {}
if content_location_start != -1:
content_location_end = part.find("\n", content_location_start)
content_location = part[content_location_start:content_location_end].split(":")[
1].strip()
content_location = (
part[content_location_start:content_location_end]
.split(":")[1]
.strip()
)

# add the content location
if content_type_start != -1:
content_type_end = part.find("\n", content_type_start)
content_type = part[content_type_start:content_type_end].split(":")[
1].strip()
content_type = (
part[content_type_start:content_type_end].split(":")[1].strip()
)
part_info["Content-Type"] = content_type

# add the content transfer encoding
if content_transfer_encoding_start != -1:
content_transfer_encoding_end = part.find(
"\n", content_transfer_encoding_start)
content_transfer_encoding = part[content_transfer_encoding_start:content_transfer_encoding_end].split(":")[
1].strip()
"\n", content_transfer_encoding_start
)
content_transfer_encoding = (
part[
content_transfer_encoding_start:content_transfer_encoding_end
]
.split(":")[1]
.strip()
)
part_info["Content-Transfer-Encoding"] = content_transfer_encoding

content = part[content_location_end:].strip()
Expand All @@ -112,25 +121,30 @@ def __get_steps(self, content: str) -> dict:
"""

user_action_data = re.search(
r'<UserActionData>(.*?)</UserActionData>', content, re.DOTALL)
r"<UserActionData>(.*?)</UserActionData>", content, re.DOTALL
)
if user_action_data:

root = ET.fromstring(user_action_data.group(1))
steps = {}

for each_action in root.findall('EachAction'):
for each_action in root.findall("EachAction"):

action_number = each_action.get('ActionNumber')
application = each_action.get('FileName')
description = each_action.find('Description').text
action = each_action.find('Action').text
screenshot_file_name = each_action.find(
'ScreenshotFileName').text
action_number = each_action.get("ActionNumber")
application = each_action.get("FileName")
description = each_action.find("Description").text
action = each_action.find("Action").text
screenshot_file_name = each_action.find("ScreenshotFileName").text
screenshot = self.__get_screenshot(screenshot_file_name)
step_key = f"step_{int(action_number) - 1}"

step = DemonstrationStep(
application, description, action, screenshot, self.comments.get(step_key))
application,
description,
action,
screenshot,
self.comments.get(step_key),
)
steps[step_key] = step
self.applications.append(application)
return steps
Expand All @@ -143,16 +157,21 @@ def __get_comments(self, content: str) -> dict:
content: The content of the main.htm file.
return: A dictionary of comments for each step.
"""
soup = BeautifulSoup(content, 'html.parser')
soup = BeautifulSoup(content, "html.parser")
body = soup.body
steps_html = body.find('div', id='Steps')
steps = steps_html.find_all(lambda tag: tag.name == 'div' and tag.has_attr(
'id') and re.match(r'^Step\d+$', tag['id']))
steps_html = body.find("div", id="Steps")
steps = steps_html.find_all(
lambda tag: tag.name == "div"
and tag.has_attr("id")
and re.match(r"^Step\d+$", tag["id"])
)

comments = {}
for index, step in enumerate(steps):
comment_tag = step.find('b', text='Comment: ')
comments[f'step_{index}'] = comment_tag.next_sibling if comment_tag else None
comment_tag = step.find("b", text="Comment: ")
comments[f"step_{index}"] = (
comment_tag.next_sibling if comment_tag else None
)
return comments

def __get_screenshot(self, screenshot_file_name: str) -> str:
Expand All @@ -163,11 +182,12 @@ def __get_screenshot(self, screenshot_file_name: str) -> str:
return: The screenshot in base64 string.
"""
screenshot_part = self.parts_dict[screenshot_file_name]
content = screenshot_part['Content']
content_type = screenshot_part['Content-Type']
content_transfer_encoding = screenshot_part['Content-Transfer-Encoding']
content = screenshot_part["Content"]
content_type = screenshot_part["Content-Type"]
content_transfer_encoding = screenshot_part["Content-Transfer-Encoding"]

screenshot = 'data:{type};{encoding}, {content}'.format(
type=content_type, encoding=content_transfer_encoding, content=content)
screenshot = "data:{type};{encoding}, {content}".format(
type=content_type, encoding=content_transfer_encoding, content=content
)

return screenshot
Loading

0 comments on commit 6b5d53a

Please sign in to comment.