Fixed wandb with image uploads, added docker-compose, made Ollama wai…

…t longer
tlo9 · Apr 5, 2024 · 20e3d40 · 20e3d40
1 parent 8f8edb7
commit 20e3d40
Show file tree

Hide file tree

Showing 5 changed files with 45 additions and 5 deletions.
diff --git a/README.md b/README.md
@@ -26,6 +26,21 @@ export OPENAI_API_KEY=xxx
 python -m openui
 ```
 
+### Docker Compose
+
+> DISCLAIMER: This is going to likely be very slow.  If you have a GPU you may need to change the tag of the `ollama` container to one that supports it.  If you're running on a Mac, follow the instructions above and run llama natively to take advantage of the M1/M2.
+
+From the root directory you can run:
+
+```bash
+docker-compose up -d
+docker exec -it openui-ollama-1 ollama pull llava
+```
+
+If you have your OPENAI_API_KEY set in your environment already, just remove `=xxx` from the `OPENAI_API_KEY` line. You can also replace `llava` in the command above with your open source model of choice *(llava is one of the only Ollama models that support images currently)*.  You should now be able to access OpenUI at [http://localhost:7878](http://localhost:7878).
+
+If you make changes to the frontend or backend, you'll need to run `docker-compose build` to have them reflected in the service.
+
 ### Docker
 
 You can build and run the docker file from the `/backend` directory:

diff --git a/backend/openui/ollama.py b/backend/openui/ollama.py
@@ -1,13 +1,14 @@
 import asyncio
 import base64
-from datetime import datetime
 import uuid
 import traceback
 import time
 from openai.types.chat import ChatCompletionChunk
 from .logs import logger
 
 date_format = "%Y-%m-%dT%H:%M:%S.%fZ"
+# 🥱 three minutes
+MAX_OLLAMA_WAIT_SECONDS=180
 
 # Ollama
 # {"model":"llava:latest","created_at":"2024-02-05T06:32:11.073667Z","message":{"role":"assistant","content":" "},"done":false}
@@ -68,7 +69,7 @@ async def ollama_stream_generator(response, inputs):
     first_sse = None
     try:
         logger.debug("Booting up ollama...")
-        buffer = await asyncio.wait_for(response.__anext__(), 20)
+        buffer = await asyncio.wait_for(response.__anext__(), MAX_OLLAMA_WAIT_SECONDS)
         chunks.append(ollama_to_openai(buffer, id))
         first_sse = ollama_chunk_to_sse(buffer, id)
     except Exception as e:

diff --git a/backend/openui/server.py b/backend/openui/server.py
@@ -65,8 +65,8 @@ async def lifespan(app: FastAPI):
     description="API for proxying LLM requests to different services",
 )
 
-openai = AsyncOpenAI()
-ollama = AsyncClient()  # AsyncOpenAI(base_url="http://127.0.0.1:11434/v1")
+openai = AsyncOpenAI() # AsyncOpenAI(base_url="http://127.0.0.1:11434/v1")
+ollama = AsyncClient()  
 router = APIRouter()
 session_store = DBSessionStore()
 github_sso = GithubSSO(

diff --git a/backend/pyproject.toml b/backend/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 dependencies = [
-    "weave@git+https://github.com/wandb/weave.git@master",
+    "weave@git+https://github.com/wandb/weave.git@fix/tiktoken-images",
     "openai>=1.12.0",
     "ollama>=0.1.7",
     "itsdangerous>=2.0.1",

diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -0,0 +1,24 @@
+version: '3.8'
+
+services:
+  ollama:
+    image: ollama/ollama:latest
+    ports:
+      - "11434:11434"
+    volumes:
+      - ${OLLAMA_VOLUME_PATH:-ollama}:/root/.ollama
+
+  backend:
+    build:
+      context: ./backend
+      dockerfile: Dockerfile
+    ports:
+      - "7878:7878"
+    environment:
+      - OLLAMA_HOST=http://ollama:11434
+      # Just remove `=xxx` to have the env variable passed forward
+      - OPENAI_API_KEY=xxx
+    depends_on:
+      - ollama
+volumes:
+  ollama: