Skip to content

Commit

Permalink
update gitignore, change judge to narrator
Browse files Browse the repository at this point in the history
  • Loading branch information
cbh123 committed Nov 15, 2023
1 parent b38ee78 commit f235995
Show file tree
Hide file tree
Showing 4 changed files with 122 additions and 114 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
.env
/venv
/frames
/narration
114 changes: 0 additions & 114 deletions judge.py

This file was deleted.

101 changes: 101 additions & 0 deletions narrator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import os
from openai import OpenAI
import base64
import json
import time
import simpleaudio as sa
import errno
from elevenlabs import generate, play, voices

client = OpenAI()


def encode_image(image_path):
while True:
try:
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")
except IOError as e:
if e.errno != errno.EACCES:
# Not a "file in use" error, re-raise
raise
# File is being written to, wait a bit and retry
time.sleep(0.1)


def play_audio(text):
audio = generate(text=text, voice="ENfvYmv6CRqDodDZTieQ", model="eleven_turbo_v2")

unique_id = base64.urlsafe_b64encode(os.urandom(30)).decode("utf-8").rstrip("=")
dir_path = os.path.join("narration", unique_id)
os.makedirs(dir_path, exist_ok=True)
file_path = os.path.join(dir_path, "audio.wav")

with open(file_path, "wb") as f:
f.write(audio)

play(audio)


def generate_new_line(base64_image):
return [
{
"role": "user",
"content": [
{"type": "text", "text": "Describe this image"},
{
"type": "image_url",
"image_url": f"data:image/jpeg;base64,{base64_image}",
},
],
},
]


def analyze_image(base64_image, script):
response = client.chat.completions.create(
model="gpt-4-vision-preview",
messages=[
{
"role": "system",
"content": """
You are Sir David Attenborough. Narrate the picture of the human as if it is a nature documentary.
Make it snarky and funny. Don't repeat yourself. Make it short. If I do anything remotely interesting, make a big deal about it!
""",
},
]
+ script
+ generate_new_line(base64_image),
max_tokens=500,
)
response_text = response.choices[0].message.content
return response_text


def main():
script = []

while True:
# path to your image
image_path = os.path.join(os.getcwd(), "./frames/frame.jpg")

# getting the base64 encoding
base64_image = encode_image(image_path)

# analyze posture
print("👀 David is watching...")
analysis = analyze_image(base64_image, script=script)

print("🎙️ David says:")
print(analysis)

play_audio(analysis)

script = script + [{"role": "assistant", "content": analysis}]

# wait for 5 seconds
time.sleep(5)


if __name__ == "__main__":
main()
20 changes: 20 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,19 +1,39 @@
annotated-types==0.6.0
anyio==3.7.1
appnope==0.1.3
asttokens==2.4.1
certifi==2023.7.22
charset-normalizer==3.3.2
decorator==5.1.1
distro==1.8.0
elevenlabs==0.2.26
exceptiongroup==1.1.3
executing==2.0.1
h11==0.14.0
httpcore==1.0.1
httpx==0.25.1
idna==3.4
ipython==8.17.2
jedi==0.19.1
matplotlib-inline==0.1.6
numpy==1.26.1
openai==1.1.1
opencv-python==4.8.1.78
parso==0.8.3
pexpect==4.8.0
prompt-toolkit==3.0.41
ptyprocess==0.7.0
pure-eval==0.2.2
pydantic==2.4.2
pydantic_core==2.10.1
Pygments==2.16.1
requests==2.31.0
six==1.16.0
sniffio==1.3.0
stack-data==0.6.3
tqdm==4.66.1
traitlets==5.13.0
typing_extensions==4.8.0
urllib3==2.0.7
wcwidth==0.2.10
websockets==12.0

0 comments on commit f235995

Please sign in to comment.