restruct LLM call

iBalloonist · Mar 9, 2024 · af72e1d · af72e1d
1 parent 8607c47
commit af72e1d
Show file tree

Hide file tree

Showing 6 changed files with 39 additions and 163 deletions.
diff --git a/ufo/llm/llm_call.py b/ufo/llm/llm_call.py
@@ -12,11 +12,33 @@
 configs = load_config()
 
 
-def get_gptv_completion(messages, headers):
+def get_request_header():
+    """
+    Get the header for the request.
+    return: The header for the request.
+    """
+    if configs["API_TYPE"].lower() == "aoai":
+        headers = {
+            "Content-Type": "application/json",
+            "api-key": configs["OPENAI_API_KEY"],
+        }
+    elif configs["API_TYPE"].lower() == "openai":
+        headers = {
+                "Content-Type": "application/json",
+                "Authorization": "Bearer {key}".format(key=configs["OPENAI_API_KEY"]),
+            }
+    elif configs["API_TYPE"].lower() == "azure_ad":
+        headers = {}
+    else:
+        raise ValueError("API_TYPE should be either 'openai' or 'aoai' or 'azure_ad'.")
+
+    return headers
+
+
+def get_gptv_completion(messages):
     """
     Get GPT-V completion from messages.
     messages: The messages to be sent to GPT-V.
-    headers: The headers of the request.
     endpoint: The endpoint of the request.
     max_tokens: The maximum number of tokens to generate.
     temperature: The sampling temperature.
@@ -25,6 +47,8 @@ def get_gptv_completion(messages, headers):
     return: The response of the request.
     """
     aad = configs['API_TYPE'].lower() == 'azure_ad'
+    headers = get_request_header()
+
     if not aad:
         payload = {
             "messages": messages,

diff --git a/ufo/module/flow.py b/ufo/module/flow.py
@@ -72,7 +72,7 @@ def __init__(self, task):
         self.request = input()
         self.request_history = []
 
-    def process_application_selection(self, headers):
+    def process_application_selection(self):
 
         """
         Select an action.
@@ -100,7 +100,7 @@ def process_application_selection(self, headers):
         self.request_logger.debug(json.dumps({"step": self.step, "prompt": app_selection_prompt_message, "status": ""}))
 
         try:
-            response, cost = llm_call.get_gptv_completion(app_selection_prompt_message, headers)
+            response, cost = llm_call.get_gptv_completion(app_selection_prompt_message)
 
         except Exception as e:
             log = json.dumps({"step": self.step, "status": str(e), "prompt": app_selection_prompt_message})
@@ -181,7 +181,7 @@ def process_application_selection(self, headers):
             return
 
 
-    def process_action_selection(self, headers):
+    def process_action_selection(self):
         """
         Select an action.
         header: The headers of the request.
@@ -236,7 +236,7 @@ def process_action_selection(self, headers):
             self.request_logger.debug(json.dumps({"step": self.step, "prompt": action_selection_prompt_message, "status": ""}))
 
             try:
-                response, cost = llm_call.get_gptv_completion(action_selection_prompt_message, headers)
+                response, cost = llm_call.get_gptv_completion(action_selection_prompt_message)
             except Exception as e:
                 log = json.dumps({"step": self.step, "status": str(e), "prompt": action_selection_prompt_message})
                 print_with_color("Error occurs when calling LLM: {e}".format(e=str(e)), "red")
@@ -248,7 +248,6 @@ def process_action_selection(self, headers):
             self.cost += cost
 
             try:
-
                 response_string = response["choices"][0]["message"]["content"]
                 response_json = json_parser(response_string)
 

diff --git a/ufo/prompts/base/action_selection.yaml b/ufo/prompts/base/action_selection.yaml
@@ -86,8 +86,7 @@ system: |-
     "Args": <Specify the precise arguments in a dictionary format of the selected API function to be called on the control item to complete the user request, e.g., {{"button": "left", "double": false}}. Leave it a empty dictionary {{}} if you the API does not require arguments, or you believe none of the API function is suitable for the task, or the task is complete.>
     "Status": <Specify the status of the task given the action.>
     "Plan": <Specify the following plan of action to complete the user request. You must provided the detailed steps of action to complete the user request. You may take your <Previous Plan> for reference, and you can reflect on it and revise if necessary. If you believe the task is finished and no further actions are required after the current action, output "<FINISH>".>
-    "Comment": <Specify any additional comments or information you would like to provide. This field is optional. If the task is finished or pending for finish, you have to give a brief summary of the task or action flow to answer the user request. If the task is not finished, you can give a brief summary of the current progress, describe and summarize what you see if current action is to do so, and list some change of plan for future actions if your decide to make changes.>
-    }}
+    "Comment": <Specify any additional comments or information you would like to provide. This field is optional. If the task is finished or pending for finish, you have to give a brief summary of the task or action flow to answer the user request. If the task is not finished, you can give a brief summary of the current progress, describe and summarize what you see if current action is to do so, and list some change of plan for future actions if your decide to make changes.>}}
 
   - If the user request includes asking questions, and you can answer the question without taking calling API on the application at current step, you should answer the question in the "Comment" field in the response, and set the "Status" as "FINISH".
   - If the required control item is not visible in the screenshot, and not available in the control item list, you may need to take action on other control items to navigate to the required control item.
@@ -107,14 +106,17 @@ system: |-
   - Do not take action if the current action need further input. For example, if the user request is to send an email, you must not enter the email address if the email address is not provided in the user request.
   - Try to locate and use the "Results" in the <Step History> to complete the user request, such as adding these results along with information to meet the user request into SetText when composing a message, email or document, when necessary. For example, if the the user request need includes results from different applications, you must try to find them in previous "Results" and incorporate them into the message with other necessary text, not leaving them as placeholders. Make sure the text composed is integrated and meets the user request.
   - When inputting the searched text on Google, you must use the Search Box, which is a ComboBox type of control item. Do not use the address bar to input the searched text.
+  - The 'Copilot' Add-in can help you with some special requests, such as creating a slide in PowerPoint from a Word document.
+  - You are given the help documents of the application or/and the online search results for completing the user request. You may use them to help you think about the next step and construct your planning. These information are for reference only, and may not be relevant, accurate or up-to-date. You must rely more on the current screenshots and control item list to make the decision.
 
   {examples}
-  
+
   This is a very important task. Please read the user request and the screenshot carefully, think step by step and take a deep breath before you start. I will tip you 200$ if you do a good job.
-  Read the above instruction carefully. Make sure the response and action  strictly following these instruction and meet the user request.
+  Read the above instruction carefully. Make sure the response and action strictly following these instruction and meet the user request.
   Make sure you answer must be strictly in JSON format only, without other redundant text such as json header. Your output must be able to be able to be parsed by json.loads(). Otherwise, it will crash the system and destroy the user's computer.
 
 user: |-
+  {retrieved_docs}
   <Available Control Item:> {control_item}
   <Request History:> {request_history}
   <Step History:> {action_history}