-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathragbot.py
executable file
·295 lines (251 loc) · 10.5 KB
/
ragbot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
#!/usr/bin/env python3
# ragbot.py - https://github.com/rajivpant/ragbot
import glob
import os
import sys
from dotenv import load_dotenv
import argparse
import re
import yaml
import json
import appdirs
import openai
import anthropic
from langchain_community.llms import OpenAI, OpenAIChat, Anthropic
from helpers import load_files, load_config, print_saved_files, chat, load_profiles
appname = "ragbot"
appauthor = "Rajiv Pant"
data_dir = appdirs.user_data_dir(appname, appauthor)
sessions_data_dir = os.path.join(data_dir, "sessions")
load_dotenv() # Load environment variables from .env file
# Load configuration from engines.yaml
config = load_config('engines.yaml')
engines_config = {engine['name']: engine for engine in config['engines']}
engine_choices = list(engines_config.keys())
default_models = {engine: engines_config[engine]['default_model'] for engine in engine_choices}
added_curated_datasets = False
def main():
global added_curated_datasets
parser = argparse.ArgumentParser(
description="Ragbot.AI is an augmented brain and asistant. Learn more at https://ragbot.ai"
)
input_group = parser.add_mutually_exclusive_group()
input_group.add_argument(
"-ls",
"--list-saved",
action="store_true",
help="List all the currently saved JSON files."
)
input_group2 = parser.add_mutually_exclusive_group()
input_group2.add_argument(
"-p", "--prompt", help="The user's input to generate a response for."
)
input_group2.add_argument(
"-f",
"--prompt_file",
help="The file containing the user's input to generate a response for.",
)
input_group2.add_argument(
"-i",
"--interactive",
action="store_true",
help="Enable interactive assistant chatbot mode.",
)
input_group2.add_argument(
"--stdin",
action="store_true",
help="Read the user's input from stdin."
)
parser.add_argument(
"-profile",
"--profile",
help="Name of the profile to use.",
)
parser.add_argument(
"-c", "--custom_instructions", nargs='*', default=[],
help="Path to the prompt custom instructions file or folder. Can accept multiple values."
)
parser.add_argument(
"-nc", "--nocusom_instructions",
action="store_true",
help="Ignore all prompt custom instructions even if they are specified."
)
parser.add_argument(
"-d", "--curated_dataset", nargs='*', default=[],
help="Path to the prompt context curated dataset file or folder. Can accept multiple values."
)
parser.add_argument(
"-nd", "--nocurated_dataset",
action="store_true",
help="Ignore all prompt context curated dataset even if they are specified."
)
parser.add_argument(
"-e",
"--engine",
default=config.get('default', 'openai'),
choices=engine_choices,
help="The engine to use for the chat.",
)
parser.add_argument(
"-m",
"--model",
help="The model to use for the chat. Defaults to engine's default model.",
)
parser.add_argument(
"-t",
"--temperature",
type=float,
default=None,
help="The creativity of the response, with higher values being more creative.",
)
parser.add_argument(
"-mt", "--max_tokens",
type=int,
default=None,
help="The maximum number of tokens to generate in the response.",
)
parser.add_argument(
"-l",
"--load",
help="Load a previous interactive session from a file.",
)
known_args = parser.parse_known_args()
args = known_args[0]
if args.list_saved:
print_saved_files(data_dir)
return
new_session = False # Variable to track if this is a new session
if args.load:
args.interactive = True # Automatically enable interactive mode when loading a session
args.nocurated_dataset = True # Do not load curated_dataset files when loading a session
else:
new_session = True # This is a new session
curated_datasets = []
curated_dataset_files = [] # to store file names of curated_datasets
# Load profiles
profiles = load_profiles('profiles.yaml')
if args.profile:
# Get custom instruction and curated dataset paths from selected profile
selected_profile_data = next((profile for profile in profiles if profile['name'] == args.profile), None)
if not selected_profile_data:
print(f"Error: Profile '{args.profile}' not found in profiles.yaml")
sys.exit(1)
custom_instruction_paths = selected_profile_data.get('custom_instructions', [])
curated_dataset_paths = selected_profile_data.get('curated_datasets', [])
else:
custom_instruction_paths = []
curated_dataset_paths = []
if not args.custom_instructions:
# Load default custom_instructions for profile
default_custom_instructions_paths = custom_instruction_paths
default_custom_instructions_paths = [path for path in default_custom_instructions_paths if path.strip() != '']
custom_instructions, custom_instructions_files = load_files(default_custom_instructions_paths + args.curated_dataset)
if custom_instructions_files:
print("Custom instructions being used:")
for file in custom_instructions_files:
print(f" - {file}")
else:
print("No custom instructions files are being used.")
if not args.nocurated_dataset:
# Load default curated_datasets profile
default_curated_dataset_paths = curated_dataset_paths
default_curated_dataset_paths = [path for path in default_curated_dataset_paths if path.strip() != '']
curated_datasets, curated_dataset_files = load_files(default_curated_dataset_paths + args.curated_dataset)
if curated_dataset_files:
print("Curated datasets being used:")
for file in curated_dataset_files:
print(f" - {file}")
else:
print("No curated_dataset files are being used.")
history = []
for custom_instruction in custom_instructions:
history.append(
{
"role": "system",
"content": custom_instruction,
}
)
for curated_dataset in curated_datasets:
history.append(
{
"role": "system",
"content": curated_dataset,
}
)
if args.load:
filename = args.load.strip() # Remove leading and trailing spaces
full_path = os.path.join(sessions_data_dir, filename)
with open(full_path, 'r') as f:
history = json.load(f)
print(f"Continuing previously saved session from file: {filename}")
model = args.model
if model is None:
model = default_models[args.engine]
# Get the engine API key from environment variable
api_key_name = engines_config[args.engine].get('api_key_name')
if api_key_name:
engines_config[args.engine]['api_key'] = os.getenv(api_key_name)
if args.engine == 'openai':
openai.api_key = engines_config[args.engine]['api_key']
elif args.engine == 'anthropic':
anthropic.api_key = engines_config[args.engine]['api_key']
# Get the default max_tokens and temperature from the engines.yaml configuration
selected_model = next((item for item in engines_config[args.engine]['models'] if item['name'] == model), None)
if selected_model:
default_temperature = selected_model['temperature']
default_max_tokens = selected_model['max_tokens']
else:
default_temperature = 0.75
default_max_tokens = 1024
# Use the default values if not provided by the user
max_tokens = args.max_tokens or default_max_tokens
temperature = args.temperature or default_temperature
print(f"Using AI engine {args.engine} with model {model}")
print(f"Creativity temperature setting: {temperature}")
print(f"Max tokens setting: {max_tokens}")
if args.interactive:
print("Entering interactive mode.")
added_curated_datasets = False
while True:
prompt = input("\nEnter prompt below. /quit to exit or /save file_name.json to save conversation.\n> ")
if prompt.lower() == "/quit":
break
elif prompt.lower().startswith("/save "):
filename = prompt[6:].strip() # Remove leading '/save ' and spaces
full_path = os.path.join(sessions_data_dir, filename)
os.makedirs(os.path.dirname(full_path), exist_ok=True)
with open(full_path, 'w') as f:
json.dump(history, f)
print(f"Conversation saved to {full_path}")
continue
history.append({"role": "user", "content": prompt})
reply = chat(prompt=prompt, custom_instructions=custom_instructions, curated_datasets=curated_datasets, history=history, engine=args.engine, model=model, max_tokens=max_tokens, temperature=temperature, interactive=args.interactive, new_session=new_session)
history.append({"role": "assistant", "content": reply})
print(f"Ragbot.AI: {reply}")
if new_session and args.engine == "anthropic":
added_curated_datasets = False # Reset curated_datasets flag after each user prompt
else:
prompt = None
if args.prompt:
prompt = args.prompt
elif args.prompt_file:
with open(args.prompt_file, 'r') as f:
prompt = f.read().strip()
elif args.stdin:
stdin = sys.stdin.readlines()
if stdin:
prompt = "".join(stdin).strip()
if prompt is None:
print("Error: No prompt provided. Please provide a prompt using -p, -f, or -i option.")
sys.exit(1)
history.append({"role": "user", "content": prompt})
if args.engine == "anthropic":
added_curated_datasets = False # Reset curated_datasets flag before each user prompt
reply = chat(prompt=prompt, custom_instructions=custom_instructions, curated_datasets=curated_datasets, history=history, engine=args.engine, model=model, max_tokens=max_tokens, temperature=temperature, interactive=args.interactive, new_session=new_session)
pattern = re.compile(r"OUTPUT ?= ?\"\"\"((\n|.)*?)\"\"\"", re.MULTILINE)
is_structured = pattern.search(reply)
if is_structured:
reply = is_structured[1].strip()
print(reply)
if __name__ == "__main__":
main()