Skip to content

Commit

Permalink
add video
Browse files Browse the repository at this point in the history
  • Loading branch information
swyxio committed Oct 1, 2024
1 parent a912a9d commit aa3da7a
Showing 1 changed file with 0 additions and 123 deletions.
123 changes: 0 additions & 123 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,7 @@
import json
import requests
import time
<<<<<<< HEAD
import subprocess
=======
>>>>>>> 55931718a2f0d4000a8192254d25b14605265944
from datetime import datetime
from openai import OpenAI
from pydub import AudioSegment
Expand Down Expand Up @@ -51,7 +48,6 @@ def generate_dialogue():

log("BRAINSTORM", "Generating important news stories and discussion topics...")
brainstorm_response = openai_client.chat.completions.create(
<<<<<<< HEAD
model=basemodel,
messages=[
{"role": "user", "content": "You are an AI assistant tasked with brainstorming important tech news stories and discussion topics. Focus on new models, data, news, rumors and hot topics. Ignore mundane support or debugging issues."},
Expand All @@ -75,43 +71,15 @@ def generate_dialogue():
brainstormed_questions = questions_response.choices[0].message.content
log("QUESTION_GEN", "Questions generation completed.")
log("QUESTION_GEN_OUTPUT", brainstormed_questions)
=======
model="gpt-4o",
messages=[
{"role": "system", "content": "You are an AI assistant tasked with brainstorming important tech news stories and discussion topics. Focus on new models, data, news, and hot topics. Ignore mundane support or debugging issues."},
{"role": "user", "content": f"Based on the following content, brainstorm the top 5 most important and interesting tech news stories or discussion items. For each topic, provide a brief explanation of why it's significant and how it relates to AI Engineering, machine learning, or tech innovation.\n\nContent: {content}"}
]
)

brainstormed_topics = brainstorm_response.choices[0].message.content
log("BRAINSTORM", "Topics generation completed.")

log("QUESTION_GEN", "Generating key questions for each topic...")
questions_response = openai_client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "You are an AI assistant tasked with generating insightful questions about tech news items."},
{"role": "user", "content": f"Based on the following brainstormed topics, generate 2-3 key questions for each topic that the reader or listener might want answered. These questions should be thought-provoking, slightly humorous, and encourage detailed explanations from Sarah.\n\nBrainstormed Topics:\n{brainstormed_topics}"}
]
)

brainstormed_questions = questions_response.choices[0].message.content
log("QUESTION_GEN", "Questions generation completed.")
>>>>>>> 55931718a2f0d4000a8192254d25b14605265944

log("DIALOGUE_GEN", "Generating dialogue using OpenAI GPT-4...")
start_time = time.time()
response = openai_client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "You are an AI assistant tasked with generating a dialogue about tech news."},
<<<<<<< HEAD
{"role": "user", "content": f"Based on the following brainstormed news items, questions, and original content, generate a dialogue discussing the top 5 news of the day for a show called AI News Pod. Include an introduction by a host (Charlie) mentioning today's date, {datetime.now().strftime('%Y-%m-%d')}, and then a discussion between two voices: Karan (male) and Sarah (female). The host (Charlie) should only speak briefly at the start, just mentioning the date and major topics (not introducing himself or the Karan or Sarah), and then at each change of topic, and introduce the headline news and facts that Karan and Sarah will then discuss. Sarah should introduce the news (mentioning the sources they are from) and answer questions, while Karan should make funny/amusing but technical observations for an AI Engineer audience and ask follow-up questions for Sarah to answer. Use the brainstormed questions as a guide for Karan's inquiries. Give credit to the source discussing these topics. End with Charlie again telling listeners to send feedback to @smol_ai on Twitter.\n\nBrainstormed Topics:\n{brainstormed_topics}\n\nBrainstormed Questions:\n{brainstormed_questions}\n\nOriginal Content:\n{content}"},
{"role": "user", "content": f"Sarah is a 35-year-old AI engineer. She has a Ph.D. in Computer Science from MIT and spent 7 years working as a researcher at Google DeepMind. Sarah is known for her in-depth knowledge and no-nonsense approach to tech news. She's an avid rock climber and often uses climbing metaphors in her explanations, but also loves cooking Thai food and surfing. Her catchphrase is 'What a time to be alive!' and her favorite AI lab is DeepMind.\n\nKaran is a 60-year-old Irish stand-up comedian with a degree in Communications from NYU. He fell into tech journalism by accident when his comedy podcast about ridiculous tech gadgets went viral. Karan brings a fresh, humorous perspective to tech news, often pointing out the absurd and making witty pop culture references. He's a passionate gamer and often relates tech news to video game scenarios, famous movies and tv shows or science fiction/fantasy books. His catchphrase is 'Super easy, barely an inconvenience!' and his favorite AI lab is OpenAI, mainly because he finds their name 'kind of ironic'."}
=======
{"role": "user", "content": f"Based on the following brainstormed news items, questions, and original content, generate a dialogue discussing the top 5 news of the day for a show called AI News Pod. Include an introduction by a host, and then a discussion between two voices: Alex (male) and Sarah (female). The host should only speak at the start, and then at each change of topic, and introduce the headline news and facts that Alex and Sarah will then discuss. Sarah should introduce the news (mentioning the sources they are from) and answer questions, while Alex should make funny/amusing observations and ask follow-up questions for Sarah to answer. Use the brainstormed questions as a guide for Alex's inquiries. Give credit to the source discussing these topics.\n\nBrainstormed Topics:\n{brainstormed_topics}\n\nBrainstormed Questions:\n{brainstormed_questions}\n\nOriginal Content:\n{content}"},
{"role": "user", "content": f"Sarah is a 35-year-old former software engineer turned tech journalist. She has a Ph.D. in Computer Science from MIT and spent 5 years working at Google DeepMind. Sarah is known for her in-depth knowledge and no-nonsense approach to tech news. She's an avid rock climber and often uses climbing metaphors in her explanations. Her catchphrase is 'Feel the AGI!' and her favorite AI lab is DeepMind.\n\nAlex is a 29-year-old stand-up comedian with a degree in Communications from NYU. He fell into tech journalism by accident when his comedy podcast about ridiculous tech gadgets went viral. Alex brings a fresh, humorous perspective to tech news, often pointing out the absurd and making witty pop culture references. He's a passionate gamer and often relates tech news to video game scenarios. His catchphrase is 'Super easy, barely an inconvenience!' and his favorite AI lab is OpenAI, mainly because he finds their name 'kind of ironic'."}
>>>>>>> 55931718a2f0d4000a8192254d25b14605265944
],
functions=[
{
Expand Down Expand Up @@ -143,7 +111,6 @@ def generate_dialogue():

return response.choices[0].message.function_call.arguments

<<<<<<< HEAD
def text_to_speech_file(name: str, text: str, voice_id: str, temp_folder: str, history: list, use_cartesia: bool = False, progress: tuple = None) -> tuple:
if progress:
current, total = progress
Expand Down Expand Up @@ -235,46 +202,6 @@ def text_to_speech_file(name: str, text: str, voice_id: str, temp_folder: str, h
end_time = time.time()
log("TTS", f"Audio file saved: {os.path.basename(save_file_path)} (generated in {end_time - start_time:.2f} seconds)")
return save_file_path, generation_id, duration_sec # {{ edit_5 }}
=======
def text_to_speech_file(text: str, voice_id: str, temp_folder: str, history: list) -> tuple:
log("TTS", f"Converting text to speech for voice {voice_id}...")
start_time = time.time()
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}/stream"

headers = {
"Accept": "audio/mpeg",
"Content-Type": "application/json",
"xi-api-key": ELEVENLABS_API_KEY
}

data = {
"text": text,
"model_id": "eleven_turbo_v2",
"voice_settings": {
"stability": 0.5,
"similarity_boost": 0.75
}
}

if history:
data["history"] = history[-3:] # Use up to 3 previous generations

response = requests.post(url, json=data, headers=headers)

if response.status_code == 200:
save_file_path = os.path.join(temp_folder, f"{uuid.uuid4()}.mp3")
with open(save_file_path, "wb") as f:
f.write(response.content)

end_time = time.time()
log("TTS", f"Audio file saved: {save_file_path} (generated in {end_time - start_time:.2f} seconds)")

generation_id = response.headers.get("x-request-id")
return save_file_path, generation_id
else:
log("TTS_ERROR", f"Error: {response.status_code} - {response.text}")
return None, None
>>>>>>> 55931718a2f0d4000a8192254d25b14605265944

def combine_audio_files(file_paths, output_file):
log("AUDIO_COMBINE", "Combining audio files...")
Expand All @@ -287,27 +214,12 @@ def combine_audio_files(file_paths, output_file):
combined = combined[:-200] # Remove the last silence
combined.export(output_file, format="mp3")
end_time = time.time()
<<<<<<< HEAD
log("AUDIO_COMBINE", f"Audio files combined with 300ms gaps in {end_time - start_time:.2f} seconds")
=======
log("AUDIO_COMBINE", f"Audio files combined in {end_time - start_time:.2f} seconds")
>>>>>>> 55931718a2f0d4000a8192254d25b14605265944

def main():
log("PROCESS_START", "Starting the dialogue generation and text-to-speech process...")
dialogue_json = generate_dialogue()
dialogue = json.loads(dialogue_json)['dialogue']
<<<<<<< HEAD
=======

temp_folder = f"temp_{uuid.uuid4()}"
os.makedirs(temp_folder, exist_ok=True)
log("TEMP_FOLDER", f"Created temporary folder: {temp_folder}")

voice_host_id = "ThT5KcBeYPX3keUQqHPh" # Charlie pre-made voice
voice_alex_id = "pNInz6obpgDQGcFmaJgB" # Adam pre-made voice for Alex
voice_sarah_id = "21m00Tcm4TlvDq8ikWAM" # Rachel pre-made voice for Sarah
>>>>>>> 55931718a2f0d4000a8192254d25b14605265944

temp_folder = f"temp_{datetime.now().strftime('%Y-%m-%d_%H-%M')}_{uuid.uuid4()}"
os.makedirs(temp_folder, exist_ok=True)
Expand All @@ -323,7 +235,6 @@ def main():
history_sarah = []

log("DIALOGUE_PROCESS", f"Processing {len(dialogue)} dialogue lines...")
<<<<<<< HEAD
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
future_to_index = {
executor.submit(
Expand Down Expand Up @@ -382,40 +293,6 @@ def main():
else:
log("AUDIO_ERROR", "No audio files were generated successfully.")

=======
for i, line in enumerate(dialogue):
log("DIALOGUE_LINE", f"Processing line {i+1}/{len(dialogue)}: {line['speaker']}")
if line['speaker'] == "Host":
voice_id = voice_host_id
history = history_host
elif line['speaker'] == "Alex":
voice_id = voice_alex_id
history = history_alex
else: # Sarah
voice_id = voice_sarah_id
history = history_sarah

audio_file, generation_id = text_to_speech_file(line['text'], voice_id, temp_folder, history)
if audio_file:
audio_files.append(audio_file)
history.append({"text": line['text'], "generation_id": generation_id})
else:
log("TTS_FAIL", f"Failed to generate audio for line {i+1}")

if audio_files:
output_file = "combined_dialogue.mp3"
combine_audio_files(audio_files, output_file)
log("OUTPUT", f"Combined audio saved as: {output_file}")
else:
log("AUDIO_ERROR", "No audio files were generated successfully.")

dialogue_output_file = "dialogue_transcript.txt"
with open(dialogue_output_file, "w") as f:
for line in dialogue:
f.write(f"{line['speaker']}: {line['text']}\n\n")
log("OUTPUT", f"Dialogue transcript saved as: {dialogue_output_file}")

>>>>>>> 55931718a2f0d4000a8192254d25b14605265944
log("PROCESS_END", "Process completed successfully!")

if __name__ == "__main__":
Expand Down

0 comments on commit aa3da7a

Please sign in to comment.