Merge pull request microsoft#69 from microsoft/dev/reformat

Dev/reformat
Hitomi-Hoshi · May 8, 2024 · 6b5d53a · 6b5d53a
2 parents 1fad66b + c87ec89
commit 6b5d53a
Show file tree

Hide file tree

Showing 51 changed files with 3,388 additions and 2,431 deletions.
diff --git a/README.md b/README.md
@@ -33,7 +33,7 @@ Both agents leverage the multi-modal capabilities of GPT-Vision to comprehend th
 
 
 ## 📢 News
-- 📅 2024-05-07: **New Release for v0.1.1!** We've made some significant updates! Previously known as AppAgent and ActAgent, we've rebranded them to HostAgent and AppAgent to better align with their functionalities. Explore the latest enhancements:
+- 📅 2024-05-08: **New Release for v0.1.1!** We've made some significant updates! Previously known as AppAgent and ActAgent, we've rebranded them to HostAgent and AppAgent to better align with their functionalities. Explore the latest enhancements:
     1. **Learning from Human Demonstration:** UFO now supports learning from human demonstration! Utilize the [Windows Step Recorder](https://support.microsoft.com/en-us/windows/record-steps-to-reproduce-a-problem-46582a9b-620f-2e36-00c9-04e25d784e47) to record your steps and demonstrate them for UFO. Refer to our detailed guide in [README.md](/record_processor/README.md) for more information.
     2. **Win32 Support:** We've incorporated support for [Win32](https://learn.microsoft.com/en-us/windows/win32/controls/window-controls) as a control backend, enhancing our UI automation capabilities.
     3. **Extended Application Interaction:** UFO now goes beyond UI controls, allowing interaction with your application through keyboard inputs and native APIs! Presently, we support Word ([examples](/ufo/prompts/apps/word/api.yaml)), with more to come soon. Customize and build your own interactions.

diff --git a/learner/basic.py b/learner/basic.py
@@ -16,15 +16,13 @@ def __init__(self, extensions: str = None, directory: str = None):
         self.extensions = extensions
         self.directory = directory
 
-
     def load_file_name(self):
         """
         Load the documents from the given directory.
         :param directory: The directory to load from.
         :return: The list of loaded documents.
         """
         return utils.find_files_with_extension(self.directory, self.extensions)
-
 
     def construct_document_list(self):
         """
@@ -33,7 +31,3 @@ def construct_document_list(self):
         :return: The list of metadata for the loaded documents.
         """
         pass
-
-
-
-
diff --git a/learner/indexer.py b/learner/indexer.py
@@ -6,8 +6,8 @@
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
 import os
-os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
 
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
 
 
 def create_indexer(app: str, docs: str, format: str, incremental: bool, save_path: str):
@@ -31,35 +31,41 @@ def create_indexer(app: str, docs: str, format: str, incremental: bool, save_pat
     loader = xml_loader.XMLLoader(docs)
     documents = loader.construct_document()
 
-    print_with_color("Creating indexer for {num} documents for {app}...".format(num=len(documents), app=app), "yellow")
+    print_with_color(
+        "Creating indexer for {num} documents for {app}...".format(
+            num=len(documents), app=app
+        ),
+        "yellow",
+    )
 
     if format == "xml":
-        embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
+        embeddings = HuggingFaceEmbeddings(
+            model_name="sentence-transformers/all-mpnet-base-v2"
+        )
     else:
         raise ValueError("Invalid format: " + format)
-    
+
     db = FAISS.from_documents(documents, embeddings)
 
     if incremental:
         if app in records:
             print_with_color("Merging with previous indexer...", "yellow")
             prev_db = FAISS.load_local(records[app], embeddings)
             db.merge_from(prev_db)
-        
+
     db_file_path = os.path.join(save_path, app)
     db_file_path = os.path.abspath(db_file_path)
     db.save_local(db_file_path)
 
     records[app] = db_file_path
 
-
     save_json_file("./learner/records.json", records)
 
-    print_with_color("Indexer for {app} created successfully. Save in {path}.".format(app=app, path=db_file_path), "green")
+    print_with_color(
+        "Indexer for {app} created successfully. Save in {path}.".format(
+            app=app, path=db_file_path
+        ),
+        "green",
+    )
 
     return db_file_path
-
-
-
-
-
diff --git a/learner/learn.py b/learner/learn.py
@@ -6,28 +6,42 @@
 
 
 args = argparse.ArgumentParser()
-args.add_argument("--app", help="The name of application to learn.",
-                  type=str, default="./")
-args.add_argument("--docs", help="The help application of the app.", type=str,
-                  default="./")
-args.add_argument("--format", help="The format of the help doc.", type=str,
-                  default="xml")
-args.add_argument('--incremental', action='store_true', help='Enable incremental update.')
-args.add_argument("--save_path", help="The format of the help doc.", type=str,
-                  default="./vectordb/docs/")
-
-
+args.add_argument(
+    "--app", help="The name of application to learn.", type=str, default="./"
+)
+args.add_argument(
+    "--docs", help="The help application of the app.", type=str, default="./"
+)
+args.add_argument(
+    "--format", help="The format of the help doc.", type=str, default="xml"
+)
+args.add_argument(
+    "--incremental", action="store_true", help="Enable incremental update."
+)
+args.add_argument(
+    "--save_path",
+    help="The format of the help doc.",
+    type=str,
+    default="./vectordb/docs/",
+)
 
 
 parsed_args = args.parse_args()
 
+
 def main():
     """
     Main function.
     """
 
-    indexer.create_indexer(parsed_args.app, parsed_args.docs, parsed_args.format, parsed_args.incremental, parsed_args.save_path)
+    indexer.create_indexer(
+        parsed_args.app,
+        parsed_args.docs,
+        parsed_args.format,
+        parsed_args.incremental,
+        parsed_args.save_path,
+    )
 
 
 if __name__ == "__main__":
-    main()
+    main()
diff --git a/record_processor/parser/demonstration_record.py b/record_processor/parser/demonstration_record.py
@@ -1,13 +1,21 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 
+
 class DemonstrationStep:
     """
     Class for the single step information in the user demonstration record.
     Multiple steps will be recorded to achieve a specific request.
     """
 
-    def __init__(self, application: str, description: str, action: str, screenshot: str, comment: str):
+    def __init__(
+        self,
+        application: str,
+        description: str,
+        action: str,
+        screenshot: str,
+        comment: str,
+    ):
         """
         Create a new step.
         """
@@ -17,6 +25,7 @@ def __init__(self, application: str, description: str, action: str, screenshot:
         self.comment = comment
         self.screenshot = screenshot
 
+
 class DemonstrationRecord:
     """
     Class for the user demonstration record.
@@ -40,19 +49,19 @@ def set_request(self, request: str):
         Set the request.
         """
         self.__request = request
-    
+
     def get_request(self) -> str:
         """
         Get the request.
         """
         return self.__request
-    
+
     def get_applications(self) -> list:
         """
         Get the application.
         """
         return self.__applications
-    
+
     def get_step_num(self) -> int:
         """
         Get the step number.

diff --git a/record_processor/parser/psr_record_parser.py b/record_processor/parser/psr_record_parser.py
@@ -36,11 +36,11 @@ def parse_to_record(self) -> DemonstrationRecord:
         """
         boundary = self.__find_boundary()
         self.parts_dict = self.__split_file_by_boundary(boundary)
-        self.comments = self.__get_comments(
-            self.parts_dict['main.htm']['Content'])
-        self.steps = self.__get_steps(self.parts_dict['main.htm']['Content'])
+        self.comments = self.__get_comments(self.parts_dict["main.htm"]["Content"])
+        self.steps = self.__get_steps(self.parts_dict["main.htm"]["Content"])
         record = DemonstrationRecord(
-            list(set(self.applications)), len(self.steps), **self.steps)
+            list(set(self.applications)), len(self.steps), **self.steps
+        )
 
         return record
 
@@ -54,14 +54,14 @@ def __find_boundary(self) -> str:
         if boundary_start != -1:
             boundary_start += len("boundary=")
             boundary_end = self.content.find("\n", boundary_start)
-            boundary = self.content[boundary_start:boundary_end].strip('\"')
+            boundary = self.content[boundary_start:boundary_end].strip('"')
             return boundary
         else:
             raise ValueError("Boundary not found in the .mht file.")
 
     def __split_file_by_boundary(self, boundary: str) -> dict:
         """
-        Split the file by the boundary into parts, 
+        Split the file by the boundary into parts,
         Store the parts in a dictionary, including the content type,
         content location and content transfer encoding.
         boundary: The boundary of the file.
@@ -72,27 +72,36 @@ def __split_file_by_boundary(self, boundary: str) -> dict:
         for part in parts:
             content_type_start = part.find("Content-Type:")
             content_location_start = part.find("Content-Location:")
-            content_transfer_encoding_start = part.find(
-                "Content-Transfer-Encoding:")
+            content_transfer_encoding_start = part.find("Content-Transfer-Encoding:")
             part_info = {}
             if content_location_start != -1:
                 content_location_end = part.find("\n", content_location_start)
-                content_location = part[content_location_start:content_location_end].split(":")[
-                    1].strip()
+                content_location = (
+                    part[content_location_start:content_location_end]
+                    .split(":")[1]
+                    .strip()
+                )
 
                 # add the content location
                 if content_type_start != -1:
                     content_type_end = part.find("\n", content_type_start)
-                    content_type = part[content_type_start:content_type_end].split(":")[
-                        1].strip()
+                    content_type = (
+                        part[content_type_start:content_type_end].split(":")[1].strip()
+                    )
                     part_info["Content-Type"] = content_type
 
                 # add the content transfer encoding
                 if content_transfer_encoding_start != -1:
                     content_transfer_encoding_end = part.find(
-                        "\n", content_transfer_encoding_start)
-                    content_transfer_encoding = part[content_transfer_encoding_start:content_transfer_encoding_end].split(":")[
-                        1].strip()
+                        "\n", content_transfer_encoding_start
+                    )
+                    content_transfer_encoding = (
+                        part[
+                            content_transfer_encoding_start:content_transfer_encoding_end
+                        ]
+                        .split(":")[1]
+                        .strip()
+                    )
                     part_info["Content-Transfer-Encoding"] = content_transfer_encoding
 
                 content = part[content_location_end:].strip()
@@ -112,25 +121,30 @@ def __get_steps(self, content: str) -> dict:
         """
 
         user_action_data = re.search(
-            r'<UserActionData>(.*?)</UserActionData>', content, re.DOTALL)
+            r"<UserActionData>(.*?)</UserActionData>", content, re.DOTALL
+        )
         if user_action_data:
 
             root = ET.fromstring(user_action_data.group(1))
             steps = {}
 
-            for each_action in root.findall('EachAction'):
+            for each_action in root.findall("EachAction"):
 
-                action_number = each_action.get('ActionNumber')
-                application = each_action.get('FileName')
-                description = each_action.find('Description').text
-                action = each_action.find('Action').text
-                screenshot_file_name = each_action.find(
-                    'ScreenshotFileName').text
+                action_number = each_action.get("ActionNumber")
+                application = each_action.get("FileName")
+                description = each_action.find("Description").text
+                action = each_action.find("Action").text
+                screenshot_file_name = each_action.find("ScreenshotFileName").text
                 screenshot = self.__get_screenshot(screenshot_file_name)
                 step_key = f"step_{int(action_number) - 1}"
 
                 step = DemonstrationStep(
-                    application, description, action, screenshot, self.comments.get(step_key))
+                    application,
+                    description,
+                    action,
+                    screenshot,
+                    self.comments.get(step_key),
+                )
                 steps[step_key] = step
                 self.applications.append(application)
             return steps
@@ -143,16 +157,21 @@ def __get_comments(self, content: str) -> dict:
         content: The content of the main.htm file.
         return: A dictionary of comments for each step.
         """
-        soup = BeautifulSoup(content, 'html.parser')
+        soup = BeautifulSoup(content, "html.parser")
         body = soup.body
-        steps_html = body.find('div', id='Steps')
-        steps = steps_html.find_all(lambda tag: tag.name == 'div' and tag.has_attr(
-            'id') and re.match(r'^Step\d+$', tag['id']))
+        steps_html = body.find("div", id="Steps")
+        steps = steps_html.find_all(
+            lambda tag: tag.name == "div"
+            and tag.has_attr("id")
+            and re.match(r"^Step\d+$", tag["id"])
+        )
 
         comments = {}
         for index, step in enumerate(steps):
-            comment_tag = step.find('b', text='Comment: ')
-            comments[f'step_{index}'] = comment_tag.next_sibling if comment_tag else None
+            comment_tag = step.find("b", text="Comment: ")
+            comments[f"step_{index}"] = (
+                comment_tag.next_sibling if comment_tag else None
+            )
         return comments
 
     def __get_screenshot(self, screenshot_file_name: str) -> str:
@@ -163,11 +182,12 @@ def __get_screenshot(self, screenshot_file_name: str) -> str:
         return: The screenshot in base64 string.
         """
         screenshot_part = self.parts_dict[screenshot_file_name]
-        content = screenshot_part['Content']
-        content_type = screenshot_part['Content-Type']
-        content_transfer_encoding = screenshot_part['Content-Transfer-Encoding']
+        content = screenshot_part["Content"]
+        content_type = screenshot_part["Content-Type"]
+        content_transfer_encoding = screenshot_part["Content-Transfer-Encoding"]
 
-        screenshot = 'data:{type};{encoding}, {content}'.format(
-            type=content_type, encoding=content_transfer_encoding, content=content)
+        screenshot = "data:{type};{encoding}, {content}".format(
+            type=content_type, encoding=content_transfer_encoding, content=content
+        )
 
         return screenshot