Skip to content

Commit

Permalink
Local II
Browse files Browse the repository at this point in the history
  • Loading branch information
KillianLucas committed Jun 2, 2024
1 parent b031e0c commit 0f8bf8a
Show file tree
Hide file tree
Showing 15 changed files with 568 additions and 93 deletions.
27 changes: 27 additions & 0 deletions interpreter/core/computer/ai/ai.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,19 +118,46 @@ def __init__(self, computer):
self.computer = computer

def chat(self, text):
messages = [
{
"role": "system",
"type": "message",
"content": "You are a helpful AI assistant.",
},
{"role": "user", "type": "message", "content": text},
]
response = ""
for chunk in self.computer.interpreter.llm.run(messages):
if "content" in chunk:
response += chunk.get("content")
return response

# Old way
old_messages = self.computer.interpreter.llm.interpreter.messages
old_system_message = self.computer.interpreter.llm.interpreter.system_message
old_import_computer_api = self.computer.import_computer_api
old_execution_instructions = (
self.computer.interpreter.llm.execution_instructions
)
try:
self.computer.interpreter.llm.interpreter.system_message = (
"You are an AI assistant."
)
self.computer.interpreter.llm.interpreter.messages = []
self.computer.import_computer_api = False
self.computer.interpreter.llm.execution_instructions = ""

response = self.computer.interpreter.llm.interpreter.chat(text)
finally:
self.computer.interpreter.llm.interpreter.messages = old_messages
self.computer.interpreter.llm.interpreter.system_message = (
old_system_message
)
self.computer.import_computer_api = old_import_computer_api
self.computer.interpreter.llm.execution_instructions = (
old_execution_instructions
)

return response[-1].get("content")

def query(self, text, query, custom_reduce_query=None):
Expand Down
6 changes: 5 additions & 1 deletion interpreter/core/computer/utils/html_to_png_base64.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,16 @@

from html2image import Html2Image

from ....core.utils.lazy_import import lazy_import

html2image = lazy_import("html2image")

from ....terminal_interface.utils.local_storage_path import get_storage_path


def html_to_png_base64(code):
# Convert the HTML into an image using html2image
hti = Html2Image()
hti = html2image.Html2Image()

# Generate a random filename for the temporary image
temp_filename = "".join(random.choices(string.digits, k=10)) + ".png"
Expand Down
21 changes: 19 additions & 2 deletions interpreter/core/llm/llm.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import os

os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
import litellm

litellm.suppress_debug_info = True
Expand Down Expand Up @@ -72,6 +75,7 @@ def run(self, messages):
model = "openai/i"
if not hasattr(self.interpreter, "conversation_id"): # Only do this once
self.context_window = 7000
self.api_key = "x"
self.max_tokens = 1000
self.api_base = "https://api.openinterpreter.com/v0"
self.interpreter.conversation_id = str(uuid.uuid4())
Expand Down Expand Up @@ -117,12 +121,25 @@ def run(self, messages):
elif self.supports_vision == False and self.vision_renderer:
for img_msg in image_messages:
if img_msg["format"] != "description":
self.interpreter.display_message("*Viewing image...*")

if img_msg["format"] == "path":
precursor = f"The image I'm referring to ({img_msg['content']}) contains the following: "
if self.interpreter.computer.import_computer_api:
postcursor = f"\nIf you want to ask questions about the image, run `computer.vision.query(path='{img_msg['content']}', query='(ask any question here)')` and a vision AI will answer it."
else:
postcursor = ""
else:
precursor = "Imagine I have just shown you an image with this description: "
postcursor = ""

img_msg["content"] = (
"Imagine I have just shown you an image with this description: "
precursor
+ self.vision_renderer(lmc=img_msg)
+ "\n---\nThe image contains the following text exactly, extracted via OCR: '''\n"
+ "\n---\nThe image contains the following text exactly: '''\n"
+ self.interpreter.computer.vision.ocr(lmc=img_msg)
+ "\n'''"
+ postcursor
)
img_msg["format"] = "description"

Expand Down
2 changes: 2 additions & 0 deletions interpreter/core/respond.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import json
import os
import re
import traceback

os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
import litellm

from ..terminal_interface.utils.display_markdown_message import display_markdown_message
Expand Down
38 changes: 35 additions & 3 deletions interpreter/terminal_interface/local_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,9 +234,19 @@ def download_model(models_dir, models, interpreter):
if line.strip()
] # Extract names, trim out ":latest", skip header

for model in ["llama3", "phi3", "wizardlm2"]:
if "llama3" in names:
names.remove("llama3")
names = ["llama3"] + names

if "codestral" in names:
names.remove("codestral")
names = ["codestral"] + names

for model in ["llama3", "phi3", "wizardlm2", "codestral"]:
if model not in names:
names.append("→ Download " + model)
names.append("↓ Download " + model)

names.append("Browse Models ↗")

# Create a new inquirer selection from the names
name_question = [
Expand All @@ -253,15 +263,37 @@ def download_model(models_dir, models, interpreter):

selected_name = name_answer["name"]

if "download" in selected_name.lower():
if "↓ Download " in selected_name:
model = selected_name.split(" ")[-1]
interpreter.display_message(f"\nDownloading {model}...\n")
subprocess.run(["ollama", "pull", model], check=True)
elif "Browse Models ↗" in selected_name:
interpreter.display_message(
"Opening [ollama.com/library](ollama.com/library)."
)
import webbrowser

webbrowser.open("https://ollama.com/library")
exit()
else:
model = selected_name.strip()

# Set the model to the selected model
interpreter.llm.model = f"ollama/{model}"

# Send a ping, which will actually load the model
interpreter.display_message("Loading model...")

old_max_tokens = interpreter.llm.max_tokens
old_context_window = interpreter.llm.context_window
interpreter.llm.max_tokens = 1
interpreter.llm.context_window = 100

interpreter.computer.ai.chat("ping")

interpreter.llm.max_tokens = old_max_tokens
interpreter.llm.context_window = old_context_window

interpreter.display_message(f"> Model set to `{model}`")

# If Ollama is not installed or not recognized as a command, prompt the user to download Ollama and try again
Expand Down
4 changes: 2 additions & 2 deletions interpreter/terminal_interface/profiles/defaults/01.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
interpreter.llm.supports_vision = True
interpreter.shrink_images = True # Faster but less accurate

interpreter.llm.model = "gpt-4-vision-preview"
interpreter.llm.model = "gpt-4o"

interpreter.llm.supports_functions = False
interpreter.llm.context_window = 110000
interpreter.llm.max_tokens = 4096
interpreter.auto_run = True

interpreter.computer.import_computer_api = True
interpreter.force_task_completion = True
interpreter.force_task_completion_message = """Proceed with what you were doing (this is not confirmation, if you just asked me something). You CAN run code on my machine. If you want to run code, start your message with "```"! If the entire task is done, say exactly 'The task is done.' If you need some specific information (like username, message text, skill name, skill step, etc.) say EXACTLY 'Please provide more information.' If it's impossible, say 'The task is impossible.' (If I haven't provided a task, say exactly 'Let me know what you'd like to do next.') Otherwise keep going. CRITICAL: REMEMBER TO FOLLOW ALL PREVIOUS INSTRUCTIONS. If I'm teaching you something, remember to run the related `computer.skills.new_skill` function."""
interpreter.force_task_completion_breakers = [
Expand Down
Loading

0 comments on commit 0f8bf8a

Please sign in to comment.