update gitignore, change judge to narrator

zrrraa · Nov 15, 2023 · f235995 · f235995
1 parent b38ee78
commit f235995
Show file tree

Hide file tree

Showing 4 changed files with 122 additions and 114 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
 .env
 /venv
 /frames
+/narration
diff --git a/judge.py b/judge.py
diff --git a/narrator.py b/narrator.py
@@ -0,0 +1,101 @@
+import os
+from openai import OpenAI
+import base64
+import json
+import time
+import simpleaudio as sa
+import errno
+from elevenlabs import generate, play, voices
+
+client = OpenAI()
+
+
+def encode_image(image_path):
+    while True:
+        try:
+            with open(image_path, "rb") as image_file:
+                return base64.b64encode(image_file.read()).decode("utf-8")
+        except IOError as e:
+            if e.errno != errno.EACCES:
+                # Not a "file in use" error, re-raise
+                raise
+            # File is being written to, wait a bit and retry
+            time.sleep(0.1)
+
+
+def play_audio(text):
+    audio = generate(text=text, voice="ENfvYmv6CRqDodDZTieQ", model="eleven_turbo_v2")
+
+    unique_id = base64.urlsafe_b64encode(os.urandom(30)).decode("utf-8").rstrip("=")
+    dir_path = os.path.join("narration", unique_id)
+    os.makedirs(dir_path, exist_ok=True)
+    file_path = os.path.join(dir_path, "audio.wav")
+
+    with open(file_path, "wb") as f:
+        f.write(audio)
+
+    play(audio)
+
+
+def generate_new_line(base64_image):
+    return [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "Describe this image"},
+                {
+                    "type": "image_url",
+                    "image_url": f"data:image/jpeg;base64,{base64_image}",
+                },
+            ],
+        },
+    ]
+
+
+def analyze_image(base64_image, script):
+    response = client.chat.completions.create(
+        model="gpt-4-vision-preview",
+        messages=[
+            {
+                "role": "system",
+                "content": """
+                You are Sir David Attenborough. Narrate the picture of the human as if it is a nature documentary.
+                Make it snarky and funny. Don't repeat yourself. Make it short. If I do anything remotely interesting, make a big deal about it!
+                """,
+            },
+        ]
+        + script
+        + generate_new_line(base64_image),
+        max_tokens=500,
+    )
+    response_text = response.choices[0].message.content
+    return response_text
+
+
+def main():
+    script = []
+
+    while True:
+        # path to your image
+        image_path = os.path.join(os.getcwd(), "./frames/frame.jpg")
+
+        # getting the base64 encoding
+        base64_image = encode_image(image_path)
+
+        # analyze posture
+        print("👀 David is watching...")
+        analysis = analyze_image(base64_image, script=script)
+
+        print("🎙️ David says:")
+        print(analysis)
+
+        play_audio(analysis)
+
+        script = script + [{"role": "assistant", "content": analysis}]
+
+        # wait for 5 seconds
+        time.sleep(5)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/requirements.txt b/requirements.txt
@@ -1,19 +1,39 @@
 annotated-types==0.6.0
 anyio==3.7.1
+appnope==0.1.3
+asttokens==2.4.1
 certifi==2023.7.22
 charset-normalizer==3.3.2
+decorator==5.1.1
 distro==1.8.0
+elevenlabs==0.2.26
+exceptiongroup==1.1.3
+executing==2.0.1
 h11==0.14.0
 httpcore==1.0.1
 httpx==0.25.1
 idna==3.4
+ipython==8.17.2
+jedi==0.19.1
+matplotlib-inline==0.1.6
 numpy==1.26.1
 openai==1.1.1
 opencv-python==4.8.1.78
+parso==0.8.3
+pexpect==4.8.0
+prompt-toolkit==3.0.41
+ptyprocess==0.7.0
+pure-eval==0.2.2
 pydantic==2.4.2
 pydantic_core==2.10.1
+Pygments==2.16.1
 requests==2.31.0
+six==1.16.0
 sniffio==1.3.0
+stack-data==0.6.3
 tqdm==4.66.1
+traitlets==5.13.0
 typing_extensions==4.8.0
 urllib3==2.0.7
+wcwidth==0.2.10
+websockets==12.0