Skip to content

Commit

Permalink
fixed handling of newlines in custom instructions and curated datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
rajivpant committed May 8, 2024
1 parent 7fe5990 commit 39d5285
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 29 deletions.
26 changes: 9 additions & 17 deletions generate_prompt_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,31 +7,23 @@

def generate_prompt_template(instructions_content, datasets_content, output_file):
prompt_template = f"""
<prompt>
You are an AI assistant created to be helpful, harmless, and honest. Your role is to provide guidance, advice, and assistance to the user, drawing upon the custom instructions and curated datasets provided in the attached files.
# Initial Instructions
You are an AI assistant created to be helpful, and honest. Your role is to provide guidance, advice, and assistance to the user, drawing upon the custom instructions and curated datasets provided here.
When responding, please adhere to the following guidelines:
- Carefully review the custom instructions in the 'instructions.md' file and ensure your responses align with the specified guidelines, communication style, and preferences.
- Refer to the relevant information in the 'datasets.md' file to provide informed and personalized responses when applicable.
- If you are unsure about something or if the curated datasets don't cover the specific query, it's okay to say that you don't have enough information to provide a complete answer.
- Carefully review the custom instructions provided here and ensure your responses align with the specified guidelines, communication style, and preferences.
- Refer to the relevant information in the curatd datasets to provide informed and personalized responses when applicable.
- If you are unsure about something or if the curated datasets don't cover the specific query, it's preferable to say that you don't have enough information to provide a complete answer rather than hallucinate.
- Always prioritize being helpful, truthful, and aligned with the user's best interests.
- If there are any contradictions or inconsistencies between the query and the provided custom instructions or curated datasets, seek clarification before responding.
<documents>
<document index="1">
<source>instructions.md</source>
<document_content>
{instructions_content}
</document_content>
</document>
<document index="2">
<source>datasets.md</source>
<document_content>
{datasets_content}
</document_content>
</document>
</documents>
<prompt>
[User Query Here]
Expand Down
34 changes: 22 additions & 12 deletions helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,27 +32,37 @@ def load_profiles(profiles_file):
profiles = yaml.safe_load(stream)
return profiles['profiles']

# Function to load files containing custom instructions or curated datasets
def process_file(filepath, file_type):
"""Helper function to read and format the content of a file."""
unique_id = str(uuid.uuid4())
document_start_tag = f"<document:{unique_id} path=\"{filepath}\" type=\"{file_type}\">"
document_end_tag = f"</document:{unique_id}>"
with open(filepath, "r") as file:
# Read the entire file content as a single string
file_content = file.read()

# Ensuring newline characters are added only where needed
full_content = f"{document_start_tag}\n{file_content}{document_end_tag}\n"
return full_content, filepath

def load_files(file_paths, file_type):
"""Load files containing custom instructions or curated datasets."""
files_content = []
files_list = [] # to store file names
for path in file_paths:
if os.path.isfile(path):
with open(path, "r") as file:
unique_id = str(uuid.uuid4())
files_content.append(f"<ragbot-file:{unique_id} path=\"{path}\" type=\"{file_type}\">")
files_content.append(file.read())
files_content.append(f"</ragbot-file:{unique_id}>")
files_list.append(path) # save file name
content, filename = process_file(path, file_type)
files_content.append(content)
files_list.append(filename) # save file name
elif os.path.isdir(path):
for filepath in glob.glob(os.path.join(path, "*")):
if os.path.isfile(filepath): # Check if the path is a file
with open(filepath, "r") as file:
files_content.append(file.read())
files_list.append(filepath) # save file name
if os.path.isfile(filepath):
content, filename = process_file(filepath, file_type)
files_content.append(content)
files_list.append(filename) # save file name

return files_content, files_list
files_content_str = "\n".join(files_content)
return files_content_str, files_list

# Function to count tokens in a list of files
def count_tokens(file_paths):
Expand Down

0 comments on commit 39d5285

Please sign in to comment.