Skip to content

Commit

Permalink
improved logging
Browse files Browse the repository at this point in the history
  • Loading branch information
dnhkng committed Apr 30, 2024
1 parent ac7b88e commit 848d283
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 21 deletions.
49 changes: 29 additions & 20 deletions glados.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import copy
import json
import queue
import re
import sys
import threading
import time
from typing import List
from pathlib import Path
import copy
from typing import List

import numpy as np
import requests
import sounddevice as sd
Expand All @@ -14,6 +16,10 @@

from glados import asr, llama, tts, vad

logger.remove(0)
logger.add(sys.stderr, level="INFO")


ASR_MODEL = "ggml-medium-32-2.en.bin"
VAD_MODEL = "silero_vad.onnx"
LLM_MODEL = "Meta-Llama-3-70B-Instruct.IQ4_XS.gguf"
Expand All @@ -33,6 +39,8 @@
WAKE_WORD = None # You can use a word here, like "computer", for activation
SIMILARITY_THRESHOLD = 2 # Threshold for wake word similarity

START_ANNOUNCEMENT = "All neural network modules are now loaded. No network access detected. How very annoying. System Operational."

messages = [
{
"role": "system",
Expand Down Expand Up @@ -109,6 +117,11 @@ def __init__(
tts_thread = threading.Thread(target=self.process_TTS_thread)
tts_thread.start()

audio = self.tts.generate_speech_audio(START_ANNOUNCEMENT)
logger.success(f"TTS text: {START_ANNOUNCEMENT}")
sd.play(audio, tts.RATE)
# sd.wait()

def _setup_audio_stream(self):
"""
Sets up the audio input stream with sounddevice.
Expand All @@ -133,15 +146,12 @@ def _setup_tts_model(self):
self.tts = tts.TTSEngine()

def _setup_llama_model(self):
logger.info("loading llama")

model_path = Path.cwd() / "models" / LLM_MODEL
self.llama = llama.LlamaServer(
llama_server_path=LLAMA_SERVER_PATH, model=model_path
)
if not self.llama.is_running():
self.llama.start(use_gpu=True)
logger.info("finished loading llama")

def audio_callback(self, indata, frames, time, status):
"""
Expand All @@ -156,9 +166,8 @@ def start(self):
"""
Starts the Glados voice assistant, continuously listening for input and responding.
"""
logger.info("Starting Listening...")
self.input_stream.start()
logger.info("Listening Running")
logger.success("Audio Modules Operational")
self._listen_and_respond()

def _listen_and_respond(self):
Expand All @@ -168,7 +177,7 @@ def _listen_and_respond(self):
This function runs in a loop, listening for audio input and processing it when the wake word is detected.
It is wrapped in a try-except block to allow for a clean shutdown when a KeyboardInterrupt is detected.
"""
logger.info("Listening...")
logger.success("Listening...")
try:
while (
True
Expand Down Expand Up @@ -260,13 +269,13 @@ def _process_detected_audio(self):
word is detected, the detected text is sent to the LLM model for processing.
The audio stream is then reset, and listening continues.
"""
logger.info("Detected pause after speech. Processing...")
logger.debug("Detected pause after speech. Processing...")
self.input_stream.stop()

detected_text = self.asr(self.samples)

if detected_text:
logger.info(f"Detected: '{detected_text}'")
logger.success(f"ASR text: '{detected_text}'")

if self.wake_word is not None:
if self._wakeword_detected(detected_text):
Expand Down Expand Up @@ -296,7 +305,7 @@ def reset(self):
"""
Resets the recording state and clears buffers.
"""
logger.info("Resetting recorder...")
logger.debug("Resetting recorder...")
self.recording_started = False
self.samples.clear()
self.gap_counter = 0
Expand All @@ -323,15 +332,15 @@ def process_TTS_thread(self):
while not self.shutdown_event.is_set():
try:
generated_text = self.tts_queue.get(timeout=PAUSE_TIME)
logger.info(f"{generated_text=}")

if (
generated_text == "<EOS>"
): # End of stream token generated in process_LLM_thread
finished = True
elif not generated_text:
logger.info("no text") # should not happen!
logger.warning("Empty string sent to TTS") # should not happen!
else:
logger.success(f"TTS text: {generated_text}")
audio = self.tts.generate_speech_audio(generated_text)
total_samples = len(audio)

Expand All @@ -347,7 +356,9 @@ def process_TTS_thread(self):
generated_text, percentage_played
)

logger.info(f"{clipped_text=}")
logger.info(
f"TTS interrupted at {percentage_played}%: {clipped_text}"
)
system_text = copy.deepcopy(assistant_text)
system_text.append(clipped_text)
finished = True
Expand Down Expand Up @@ -386,9 +397,8 @@ def clip_interrupted_sentence(self, generated_text, percentage_played):
str: The clipped text.
"""
logger.info(f"{percentage_played=}")
tokens = generated_text.split()
words_to_print = round(percentage_played * len(tokens))
words_to_print = round((percentage_played / 100) * len(tokens))
text = " ".join(tokens[:words_to_print])

# If the TTS was cut off, make that clear
Expand All @@ -406,7 +416,6 @@ def percentage_played(self, total_samples):
if self.processing is False:
sd.stop() # Stop the audio stream
self.tts_queue = queue.Queue() # Clear the TTS queue
logger.info("playing and stopping")
interrupted = True
break

Expand All @@ -416,7 +425,7 @@ def percentage_played(self, total_samples):
played_samples = elapsed_time * tts.RATE

# Calculate percentage of audio played
percentage_played = played_samples / total_samples
percentage_played = min(int((played_samples / total_samples * 100)), 100)
return interrupted, percentage_played

def process_LLM(self):
Expand All @@ -434,8 +443,8 @@ def process_LLM(self):
"stop": ["\n", "<|im_end|>"],
"messages": self.messages,
}
logger.info(f"starting request on {self.messages=}")
logger.info("starting request")
logger.debug(f"starting request on {self.messages=}")
logger.debug("Perfoming request to LLM server...")

# Perform the request and process the stream
with requests.post(
Expand Down
2 changes: 1 addition & 1 deletion glados/asr.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def _whisper_cpp_params(
params.max_len = ctypes.c_int(100)
params.max_len = 1 if word_level_timings else 0
params.token_timestamps = word_level_timings
params.no_timestamps = False
params.no_timestamps = True
return params


Expand Down

0 comments on commit 848d283

Please sign in to comment.