diff --git a/.gitignore b/.gitignore index d134431d..efe4d12f 100644 --- a/.gitignore +++ b/.gitignore @@ -12,12 +12,9 @@ __pycache__/ # Ignore the config file ufo/config/config.yaml -*.yaml.test -*.yaml + # Ignore the helper files ufo/rag/app_docs/* learner/records.json vectordb/* - - diff --git a/learner/__init__.py b/learner/__init__.py new file mode 100644 index 00000000..7f3fd831 --- /dev/null +++ b/learner/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. \ No newline at end of file diff --git a/learner/__main__.py b/learner/__main__.py new file mode 100644 index 00000000..b79ff465 --- /dev/null +++ b/learner/__main__.py @@ -0,0 +1,8 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +from . import learn + +if __name__ == "__main__": + # Execute the main script + learn.main() \ No newline at end of file diff --git a/learner/basic.py b/learner/basic.py new file mode 100644 index 00000000..3d63592f --- /dev/null +++ b/learner/basic.py @@ -0,0 +1,39 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +from . import utils + + +class BasicDocumentLoader: + """ + A class to load documents from a list of files with a given extension list. + """ + + def __init__(self, extensions: str = None, directory: str = None): + """ + Create a new BasicDocumentLoader. + :param extensions: The extensions to load. + """ + self.extensions = extensions + self.directory = directory + + + def load_file_name(self): + """ + Load the documents from the given directory. + :param directory: The directory to load from. + :return: The list of loaded documents. + """ + return utils.find_files_with_extension(self.directory, self.extensions) + + + def construct_document_list(self): + """ + Load the metadata from the given directory. + :param directory: The directory to load from. + :return: The list of metadata for the loaded documents. + """ + pass + + + + \ No newline at end of file diff --git a/learner/indexer.py b/learner/indexer.py new file mode 100644 index 00000000..022ad118 --- /dev/null +++ b/learner/indexer.py @@ -0,0 +1,63 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +from . import xml_loader +from .utils import load_json_file, save_json_file, print_with_color +from langchain_community.embeddings import HuggingFaceEmbeddings +from langchain_community.vectorstores import FAISS +import os + + +def create_indexer(app: str, docs: str, format: str, incremental: bool, save_path: str): + """ + Create an indexer for the given application. + :param app: The name of the application to create an indexer for. + :param docs: The help documents dir for the application. + :param format: The format of the help documents. + :param incremental: Whether to enable incremental updates. + :param save_path: The path to save the indexer to. + :return: The created indexer. + """ + + if os.path.exists("./learner/records.json"): + records = load_json_file("./learner/records.json") + else: + records = {} + + print_with_color("Loading documents from {docs}...".format(docs=docs), "cyan") + + loader = xml_loader.XMLLoader(docs) + documents = loader.construct_document() + + print_with_color("Creating indexer for {num} documents for {app}...".format(num=len(documents), app=app), "yellow") + + if format == "xml": + embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") + else: + raise ValueError("Invalid format: " + format) + + db = FAISS.from_documents(documents, embeddings) + + if incremental: + if app in records: + print_with_color("Merging with previous indexer...", "yellow") + prev_db = FAISS.load_local(records[app], embeddings) + db.merge_from(prev_db) + + db_file_path = os.path.join(save_path, app) + db_file_path = os.path.abspath(db_file_path) + db.save_local(db_file_path) + + records[app] = db_file_path + + + save_json_file("./learner/records.json", records) + + print_with_color("Indexer for {app} created successfully. Save in {path}.".format(app=app, path=db_file_path), "green") + + return db_file_path + + + + + diff --git a/learner/learn.py b/learner/learn.py new file mode 100644 index 00000000..cc8489da --- /dev/null +++ b/learner/learn.py @@ -0,0 +1,36 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +import argparse +from . import indexer + + + +# configs = load_config() + +args = argparse.ArgumentParser() +args.add_argument("--app", help="The name of application to learn.", + type=str, default="./") +args.add_argument("--docs", help="The help application of the app.", type=str, + default="./") +args.add_argument("--format", help="The format of the help doc.", type=str, + default="xml") +args.add_argument('--incremental', action='store_true', help='Enable incremental update.') +args.add_argument("--save_path", help="The format of the help doc.", type=str, + default="./vectordb/docs/") + + + + +parsed_args = args.parse_args() + +def main(): + """ + Main function. + """ + + db_file_path = indexer.create_indexer(parsed_args.app, parsed_args.docs, parsed_args.format, parsed_args.incremental, parsed_args.save_path) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/learner/utils.py b/learner/utils.py new file mode 100644 index 00000000..b25b08d4 --- /dev/null +++ b/learner/utils.py @@ -0,0 +1,95 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +import os +import json +from colorama import Fore, Style, init + +# init colorama +init() + +def print_with_color(text: str, color: str = ""): + """ + Print text with specified color using ANSI escape codes from Colorama library. + + :param text: The text to print. + :param color: The color of the text (options: red, green, yellow, blue, magenta, cyan, white, black). + """ + color_mapping = { + "red": Fore.RED, + "green": Fore.GREEN, + "yellow": Fore.YELLOW, + "blue": Fore.BLUE, + "magenta": Fore.MAGENTA, + "cyan": Fore.CYAN, + "white": Fore.WHITE, + "black": Fore.BLACK + } + + selected_color = color_mapping.get(color.lower(), "") + colored_text = selected_color + text + Style.RESET_ALL + + print(colored_text) + + + +def find_files_with_extension(directory, extension): + """ + Find files with the given extension in the given directory. + :param directory: The directory to search. + :param extension: The extension to search for. + :return: The list of matching files. + """ + matching_files = [] + + for root, _, files in os.walk(directory): + for file in files: + if file.endswith(extension): + path = os.path.join(root, file) + path = os.path.realpath(path) + matching_files.append(path) + + return matching_files + + + +def find_files_with_extension_list(directory, extensions): + """ + Find files with the given extensions in the given directory. + :param directory: The directory to search. + :param extensions: The list of extensions to search for. + :return: The list of matching files. + """ + matching_files = [] + + for root, _, files in os.walk(directory): + for file in files: + if file.endswith(tuple(extensions)): + path = os.path.join(root, file) + path = os.path.realpath(path) + matching_files.append(path) + + return matching_files + + + +def load_json_file(file_path): + """ + Load a JSON file. + :param file_path: The path to the file to load. + :return: The loaded JSON data. + """ + + with open(file_path, 'r') as file: + data = json.load(file) + return data + + + +def save_json_file(file_path, data): + """ + Save a JSON file. + :param file_path: The path to the file to save. + """ + + with open(file_path, 'w') as file: + json.dump(data, file, indent=4) diff --git a/learner/xml_loader.py b/learner/xml_loader.py new file mode 100644 index 00000000..f08d603a --- /dev/null +++ b/learner/xml_loader.py @@ -0,0 +1,112 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +from . import basic +import os +from langchain_community.document_loaders import UnstructuredXMLLoader +from langchain.docstore.document import Document +import xml.etree.ElementTree as ET + + +class XMLLoader(basic.BasicDocumentLoader): + """ + Class to load XML documents. + """ + + def __init__(self, directory: str = None): + """ + Create a new XMLLoader. + """ + + super().__init__() + self.extensions = ".xml" + self.directory = directory + + + def get_microsoft_document_metadata(self, file: str): + """ + Get the metadata for the given file. + :param file: The file to get the metadata for. + :return: The metadata for the given file. + """ + + if not os.path.exists(file): + return {'title': os.path.basename(file), 'summary': os.path.basename(file)} + + tree = ET.parse(file) + root = tree.getroot() + + # Extracting title + if root.find('title') is not None: + title = root.find('title').text + else: + title = None + + # Extracting content summary + + if root.find('Content-Summary') is not None: + summary = root.find('Content-Summary').attrib['value'] + else: + summary = None + + return {'title': title, 'summary': summary} + + + def get_microsoft_document_text(self, file: str): + """ + Get the text for the given file. + :param file: The file to get the text for. + :return: The text for the given file. + """ + return UnstructuredXMLLoader(file).load()[0] + + + def construct_document_list(self): + """ + Construct a list of documents. + :return: The list of documents. + """ + documents = [] + for file in self.load_file_name(): + text = self.get_microsoft_document_text(file) + metadata = self.get_microsoft_document_metadata(file + ".meta") + title = metadata["title"] + summary = metadata["summary"] + + document = { + 'title': title, + 'summary': summary, + 'text':text + } + documents.append(document) + + return documents + + + + def construct_document(self): + """ + Construct a langchain document list. + :return: The langchain document list. + """ + documents = [] + for file in self.load_file_name(): + text = self.get_microsoft_document_text(file) + metadata = self.get_microsoft_document_metadata(file + ".meta") + title = metadata["title"] + summary = metadata["summary"] + page_content = """{title} - {summary}""".format(title=title, summary=summary) + + metadata = { + 'title': title, + 'summary': summary, + 'text':text + } + document = Document(page_content=page_content, metadata=metadata) + + documents.append(document) + return documents + + + + diff --git a/ufo/config/config.py b/ufo/config/config.py index dc3dc031..780e70a9 100644 --- a/ufo/config/config.py +++ b/ufo/config/config.py @@ -22,6 +22,16 @@ def load_config(config_path="ufo/config/config.yaml"): if yaml_data: configs.update(yaml_data) except FileNotFoundError: - print(f"Warning: Config file not found at {config_path}. Using only environment variables.") + print( + f"Warning: Config file not found at {config_path}. Using only environment variables.") - return configs \ No newline at end of file + # Update the API base URL for AOAI + if configs["API_TYPE"].lower() == "aoai": + configs["OPENAI_API_BASE"] = "{endpoint}/openai/deployments/{deployment_name}/chat/completions?api-version={api_version}".format( + endpoint=configs["OPENAI_API_BASE"][:-1] if configs["OPENAI_API_BASE"].endswith( + "/") else configs["OPENAI_API_BASE"], + deployment_name=configs["AOAI_DEPLOYMENT"], + api_version="2024-02-15-preview" + ) + + return configs diff --git a/ufo/config/config.yaml.template b/ufo/config/config.yaml.template index 353d7636..26885757 100644 --- a/ufo/config/config.yaml.template +++ b/ufo/config/config.yaml.template @@ -1,10 +1,12 @@ -version: 0.2 +version: 0.1 -API_TYPE: "azure_ad" # The API type, "openai" for the OpenAI API, "aoai" for the AOAI API, 'azure_ad' for the ad authority of the AOAI API. -OPENAI_API_BASE: "YOUR_ENDPOINT" # The the OpenAI API endpoint, "https://api.openai.com/v1/chat/completions" for the OpenAI API. As for the AAD API address. Format: https://{your-resource-name}.azure-api.net/ +API_TYPE: "openai" # The API type, "openai" for the OpenAI API, "aoai" for the AOAI API, 'azure_ad' for the ad authority of the AOAI API. +OPENAI_API_BASE: "YOUR_ENDPOINT" # The OpenAI API. OPENAI_API_KEY: "YOUR_API_KEY" # The OpenAI API key -API_VERSION: "API_VERSION" # For GPT4-visual, the value usually be the "2023-12-01-preview" -OPENAI_API_MODEL: "gpt-4-visual-preview" # The only OpenAI model by now that accepts visual input +AOAI_DEPLOYMENT: "YOUR_AOAI_DEPLOYMENT" # Your AOAI deployment if apply +API_VERSION: "2024-02-15-preview" # For GPT4-visual, the value usually be the "2023-12-01-preview" +OPENAI_API_MODEL: "gpt-4-vision-preview" # The only OpenAI model by now that accepts visual input + CONTROL_BACKEND: "uia" # The backend for control action MAX_TOKENS: 2000 # The max token limit for the response completion MAX_RETRY: 3 # The max retry limit for the response completion @@ -36,7 +38,8 @@ REQUEST_TIMEOUT: 250 # The call timeout for the GPT-V model APP_SELECTION_PROMPT: "ufo/prompts/base/app_selection.yaml" # The prompt for the app selection ACTION_SELECTION_PROMPT: "ufo/prompts/base/action_selection.yaml" # The prompt for the action selection INPUT_TEXT_API: "type_keys" # The input text API -###For AAD + +### For AAD AAD_TENANT_ID: "YOUR_TENANT_ID" # Set the value to your tenant id for the llm model AAD_API_SCOPE: "YOUR_SCOPE" # Set the value to your scope for the llm model AAD_API_SCOPE_BASE: "YOUR_SCOPE_BASE" # Set the value to your scope base for the llm model, whose format is API://YOUR_SCOPE_BASE @@ -44,5 +47,4 @@ AAD_API_SCOPE_BASE: "YOUR_SCOPE_BASE" # Set the value to your scope base for the - \ No newline at end of file diff --git a/ufo/prompts/base/app_selection.yaml b/ufo/prompts/base/app_selection.yaml index 816c47e0..e534389f 100644 --- a/ufo/prompts/base/app_selection.yaml +++ b/ufo/prompts/base/app_selection.yaml @@ -10,7 +10,7 @@ system: |- ## Guidelines - You are given a screenshot of the current desktop, along with a list of available applications in the windows. - The screenshot of multiple screens is concatenated into one image. - - You are given the information of all available applications item in the current desktop window in a dict format: {"label": control_text: "the text of the application", control_type: "the type of the application"}. + - You are given the information of all available applications item in the current desktop window in a dict format: {label: "label", control_text: "the text of the application", control_type: "the type of the application"}. - You are provided your previous plan of action for reference to decide the application. This usually happens when the you have already completed the previous task on an application and need to switch to another application to complete the next task. - When the selected application is visible in the screenshot, analyze the screenshot of the application window on its current status. Draft your plan based on the current status of the application and user request, and do not include the steps that have been completed on the application base on your screenshot observation. - You are provided the user request history for reference to decide the selection of application or control item. diff --git a/ufo/rag/__init__.py b/ufo/rag/__init__.py new file mode 100644 index 00000000..7f3fd831 --- /dev/null +++ b/ufo/rag/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. \ No newline at end of file