db saving

pengmingqiang · Mar 19, 2024 · 38dd946 · 38dd946
1 parent bedb7a3
commit 38dd946
Show file tree

Hide file tree

Showing 7 changed files with 197 additions and 53 deletions.
diff --git a/learner/indexer.py b/learner/indexer.py
@@ -8,6 +8,8 @@
 import os
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
 
+
+
 def create_indexer(app: str, docs: str, format: str, incremental: bool, save_path: str):
     """
     Create an indexer for the given application.

diff --git a/requirements.txt b/requirements.txt
@@ -9,5 +9,5 @@ pywin32==304
 pywinauto==0.6.8
 PyYAML==6.0.1
 Requests==2.31.0
-faiss-cpu==1.23.5
+faiss-cpu==1.8.0
 lxml==5.1.0
diff --git a/ufo/experience/parser.py b/ufo/experience/parser.py
@@ -7,7 +7,7 @@
 from ..utils import encode_image_from_path
 
 
-class LogLoader:
+class ExperienceLogLoader:
     """
     Loading the logs from previous runs.
     """
@@ -151,6 +151,28 @@ def get_request_partition(self) -> list:
             request_partition.append(current_partition)
 
         return request_partition
+
+
+
+    @staticmethod
+    def get_user_request(log_partition: dict):
+        """
+        Get the user request.
+        :param log_partition: The log partition.
+        :return: The user request.
+        """
+        return log_partition.get("request")
+
+
+
+    @staticmethod
+    def get_app_list(log_partition: dict):
+        """
+        Get the user request.
+        :param log_partition: The log partition.
+        :return: The application list.
+        """
+        return log_partition.get("application")
 
 
     @staticmethod

diff --git a/ufo/experience/summarizer.py b/ufo/experience/summarizer.py
@@ -1,10 +1,16 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 
-
-from .parser import LogLoader
+from .parser import ExperienceLogLoader
 from ..prompter.experience_prompter import ExperiencePrompter
 from ..llm.llm_call import get_completion
+from ..utils import json_parser
+from typing import Tuple
+import os
+import yaml
+from langchain.docstore.document import Document
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
 
 
 class ExperienceSummarizer:
@@ -24,63 +30,139 @@ def __init__(self, is_visual: bool, prompt_template: str, example_prompt_templat
         self.prompt_template = prompt_template
         self.example_prompt_template = example_prompt_template
         self.api_prompt_template = api_prompt_template
-
-
-    def read_log(self, log_path: str) -> list:
-        """
-        Read the log.
-        :param log_path: The path of the log file.
-        """
-        replay_loader = LogLoader(log_path)
-        logs = replay_loader.create_logs()
-        return logs
 
 
-    def build_prompt(self, logs: list) -> list:
+    def build_prompt(self, log_partition: dict) -> list:
         """
         Build the prompt.
         :param logs: The logs.
         :param user_request: The user request.
         """
         experience_prompter = ExperiencePrompter(self.is_visual, self.prompt_template, self.example_prompt_template, self.api_prompt_template)
         experience_system_prompt = experience_prompter.system_prompt_construction()
-        experience_user_prompt = experience_prompter.user_content_construction(logs)
+        experience_user_prompt = experience_prompter.user_content_construction(log_partition)
         experience_prompt = experience_prompter.prompt_construction(experience_system_prompt, experience_user_prompt)
 
         return experience_prompt
 
 
-    def get_summary(self, prompt: str) -> str:
+    def get_summary(self, prompt_message: list) -> Tuple[dict, float]:
         """
+        Get the summary.
+        :param prompt_message: The prompt message.
+        return: The summary and the cost.
         """
-        response = get_completion(prompt, self.is_visual)
 
-        return response
+        # Get the completion for the prompt message
+        response_string, cost = get_completion(prompt_message, "ACTION", use_backup_engine=True)
+        try:
+            response_json = json_parser(response_string)
+        except:
+            response_json = None
+
+        # Restructure the response
+        if response_json:
+            summary = dict()
+            summary["example"] = {}
+            for key in ["Observation", "Thought", "ControlLabel", "ControlText", "Function", "Args", "Status", "Plan", "Comment"]:
+                summary["example"][key] = response_json.get(key, "")
+            summary["Tips"] = response_json.get("Tips", "")
+
+        return summary, cost
 
 
-    def get_summary_list(self, logs: list) -> list:
+    def get_summary_list(self, logs: list) -> Tuple[list, float]:
         """
+        Get the summary list.
+        :param logs: The logs.
+        return: The summary list and the total cost.
         """
         summaries = []
-        for log in logs:
-            prompt = self.build_prompt(log)
-            summary = self.get_summary(prompt)
+        total_cost = 0
+        for log_partition in logs:
+            prompt = self.build_prompt(log_partition)
+            summary, cost = self.get_summary(prompt)
+            summary["request"] = ExperienceLogLoader.get_user_request(log_partition)
+            summary["app_list"] = ExperienceLogLoader.get_app_list(log_partition)
             summaries.append(summary)
+            total_cost += cost
+
+        return summaries, total_cost
+
 
-        self.update_ymal(summaries, "path")
-        self.update_verctor_db(summaries, "path")
-        return summaries
+
+    @staticmethod
+    def read_log(log_path: str) -> list:
+        """
+        Read the log.
+        :param log_path: The path of the log file.
+        """
+        replay_loader = ExperienceLogLoader(log_path)
+        logs = replay_loader.create_logs()
+        return logs
 
 
+
     @staticmethod
-    def update_ymal(summaries: list, yaml_path: str):
+    def create_or_update_yaml(summaries: list, yaml_path: str):
         """
+        Create or update the YAML file.
+
+        :param summaries: The summaries.
+        :param yaml_path: The path of the YAML file.
         """
-        pass
+
+        # Check if the file exists, if not, create a new one
+        if not os.path.exists(yaml_path):
+            with open(yaml_path, 'w'):
+                pass
+            print(f"Created new YAML file: {yaml_path}")
+
+        # Read existing data from the YAML file
+        with open(yaml_path, 'r') as file:
+            existing_data = yaml.safe_load(file)
+
+        # Initialize index and existing_data if file is empty
+        index = len(existing_data) if existing_data else 0
+        existing_data = existing_data or {}
 
+        # Update data with new summaries
+        for i, summary in enumerate(summaries):
+            example = {f"example{index + i}": summary}
+            existing_data.update(example)
+
+        # Write updated data back to the YAML file
+        with open(yaml_path, 'w') as file:
+            yaml.safe_dump(existing_data, file, default_flow_style=False, sort_keys=False)
+
+        print(f"Updated existing YAML file successfully: {yaml_path}")
+
+
 
     @staticmethod
-    def update_verctor_db(summaries: list, db_path: str):
+    def create_or_update_vector_db(summaries: list, db_path: str):
         """
+        Create or update the vector database.
+        :param summaries: The summaries.
+        :param db_path: The path of the vector database.
         """
-        pass
+
+        document_list = []
+
+        for summary in summaries:
+            request = summary["request"]
+            document_list.append(Document(page_content=request, metadata=summary))
+
+        embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
+        db = FAISS.from_documents(document_list, embeddings)
+
+        # Check if the db exists, if not, create a new one.
+        if os.path.exists(db_path):
+            prev_db = FAISS.load_local(db_path, embeddings)
+            db.merge_from(prev_db)
+
+        db.save_local(db_path)
+
+        print(f"Updated vector DB successfully: {db_path}")
+
+
diff --git a/ufo/module/flow.py b/ufo/module/flow.py
@@ -42,8 +42,8 @@ def __init__(self, task):
 
         self.log_path = f"logs/{self.task}/"
         create_folder(self.log_path)
-        self.logger = initialize_logger(self.log_path, "response.log")
-        self.request_logger = initialize_logger(self.log_path, "request.log")
+        self.logger = self.initialize_logger(self.log_path, "response.log")
+        self.request_logger = self.initialize_logger(self.log_path, "request.log")
 
         self.app_selection_prompter = ApplicationAgentPrompter(configs["APP_AGENT"]["VISUAL_MODE"], configs["APP_SELECTION_PROMPT"], configs["APP_SELECTION_EXAMPLE_PROMPT"], configs["API_PROMPT"])
         self.act_selection_prompter = ActionAgentPrompter(configs["ACTION_AGENT"]["VISUAL_MODE"], configs["ACTION_SELECTION_PROMPT"], configs["ACTION_SELECTION_EXAMPLE_PROMPT"], configs["API_PROMPT"])
@@ -465,27 +465,27 @@ def error_logger(self, response_str, error):
         log = json.dumps({"step": self.step, "status": "ERROR", "response": response_str, "error": error})
         self.logger.info(log)
 
+    @staticmethod
+    def initialize_logger(log_path, log_filename):
+        """
+        Initialize logging.
+        log_path: The path of the log file.
+        log_filename: The name of the log file.
+        return: The logger.
+        """
+        # Code for initializing logging
+        logger = logging.Logger(log_filename)
 
-def initialize_logger(log_path, log_filename):
-    """
-    Initialize logging.
-    log_path: The path of the log file.
-    log_filename: The name of the log file.
-    return: The logger.
-    """
-    # Code for initializing logging
-    logger = logging.Logger(log_filename)
-
-    if not configs["PRINT_LOG"]:
-        # Remove existing handlers if PRINT_LOG is False
-        logger.handlers = []
+        if not configs["PRINT_LOG"]:
+            # Remove existing handlers if PRINT_LOG is False
+            logger.handlers = []
 
 
-    log_file_path = os.path.join(log_path, log_filename)
-    file_handler = logging.FileHandler(log_file_path, encoding="utf-8")
-    formatter = logging.Formatter('%(message)s')
-    file_handler.setFormatter(formatter)
-    logger.addHandler(file_handler)
-    logger.setLevel(configs["LOG_LEVEL"])
+        log_file_path = os.path.join(log_path, log_filename)
+        file_handler = logging.FileHandler(log_file_path, encoding="utf-8")
+        formatter = logging.Formatter('%(message)s')
+        file_handler.setFormatter(formatter)
+        logger.addHandler(file_handler)
+        logger.setLevel(configs["LOG_LEVEL"])
 
-    return logger
+        return logger
diff --git a/ufo/prompter/experience_prompter.py b/ufo/prompter/experience_prompter.py
@@ -5,7 +5,6 @@
 import json
 
 
-
 class ExperiencePrompter(BasicPrompter):
     """
     The ExperiencePrompter class is the prompter for the experience learning.

diff --git a/ufo/prompts/experience/nonvisual/experience_summary.yaml b/ufo/prompts/experience/nonvisual/experience_summary.yaml
@@ -0,0 +1,39 @@
+version: 1.0
+
+system: |-
+  You are an expert summarizer tasked with condensing a trajectory of actions and responses of an intelligent agent operating within an application window on Windows OS to fulfill a user request. Your objective is to produce a single JSON document that streamlines all correct steps and provides tips for completing the task. Adhere to the following guidelines:
+  - You will be provided with the user request, the action and response sequence of the intelligent agent at each step.
+  - The user request defines the task for the intelligent agent.
+  - The action and response sequence of [Agent Trajectory] illustrates the agent's interactions with the application window to fulfill the user request.
+  - The agent's trajectory may contain incorrect or redundant steps. Your task is to summarize the correct steps into a single JSON document, excluding any redundancies.
+  - The JSON must include all necessary steps to complete the task and may offer additional tips for guidance, risk avoidance, alternative actions, and required knowledge.
+  
+
+  ## Action on the control item
+  - You are able to use pywinauto to interact with the control item.
+  {apis}
+
+
+  ## Output Format
+  - You are required to are required to response in a JSON format, consisting of 10 distinct parts with the following keys and corresponding content:
+    {{"Observation": <Describe and summarize your observation of the Agent Trajectory.>}
+    "Thought": <Outline the logic behind the first action required to fulfill the request.>
+    "ControlLabel": <Specify the precise annotated label of the control item to be selected at the first step. If none of the control items are suitable or the task is complete, output an empty string.>
+    "ControlText": <Specify the precise control_text of the control item to be selected at the first step. If none of the control items are suitable or the task is complete, output an empty string ''.>
+    "Function": <Specify the precise API function name (without arguments) to be called on the control item to complete the user request. Leave it as an empty string "" if no suitable API function exists or the task is complete.>
+    "Args": <Specify the precise arguments in dictionary format of the selected API function to be called on the control item to complete the user request. Leave it as an empty dictionary {{}} if the API does not require arguments, or no suitable API function exists, or the task is complete.>
+    "Status": <Specify the status of the task after the action: "CONTINUE" if unfinished, or "FINISH" if completed.>
+    "Plan": <Provide a detailed plan of action to complete the user request, referencing the previous plan if needed. If the task is finished, output "<FINISH>". Split the plan for each step with a line break.>
+    "Comment": <Optionally provide additional comments or information about the task or action flow.>
+    "Tips": <Include guidance, risk avoidance, alternative actions, or required knowledge to complete the task. Use number to label each tips, and line break to split each tips.>}}
+
+  {examples}
+
+  ## Important Notes
+  This is a very important task. Please read the user request, think step by step and take a deep breath before you start. I will tip you 200$ if you do a good job.
+  Read the above instruction carefully. Ensure strict adherence to the provided instructions and format. 
+  Responses must be strictly in JSON format without additional text. Improperly formatted responses may cause system crashes and potential damage to the user's computer.
+
+user: |-
+  <User Request:> {user_request}
+  <Your Summarization:>
-Original file line number
+Diff line change
@@ Expand Up / @@ -5,7 +5,6 @@ @@
     import json
     class ExperiencePrompter(BasicPrompter):
         """
         The ExperiencePrompter class is the prompter for the experience learning.
@@ Expand Down @@