talk : talk with AI in the terminal

eetti · Dec 10, 2022 · 3b1aacb · 3b1aacb
1 parent d1da35d
commit 3b1aacb
Show file tree

Hide file tree

Showing 14 changed files with 1,753 additions and 5 deletions.
diff --git a/.gitignore b/.gitignore
@@ -14,6 +14,7 @@ build-sanitize-thread/
 main
 stream
 command
+talk
 bench
 sync.sh
 libwhisper.so

diff --git a/Makefile b/Makefile
@@ -154,7 +154,7 @@ libwhisper.so: ggml.o whisper.o
 	$(CXX) $(CXXFLAGS) -shared -o libwhisper.so ggml.o whisper.o $(LDFLAGS)
 
 clean:
-	rm -f *.o main stream command bench libwhisper.a libwhisper.so
+	rm -f *.o main stream command talk bench libwhisper.a libwhisper.so
 
 #
 # Examples
@@ -172,6 +172,9 @@ stream: examples/stream/stream.cpp ggml.o whisper.o
 command: examples/command/command.cpp ggml.o whisper.o
 	$(CXX) $(CXXFLAGS) examples/command/command.cpp ggml.o whisper.o -o command $(CC_SDL) $(LDFLAGS)
 
+talk: examples/talk/talk.cpp  examples/talk/gpt-2.cpp ggml.o whisper.o
+	$(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp ggml.o whisper.o -o talk $(CC_SDL) $(LDFLAGS)
+
 bench: examples/bench/bench.cpp ggml.o whisper.o
 	$(CXX) $(CXXFLAGS) examples/bench/bench.cpp ggml.o whisper.o -o bench $(LDFLAGS)
 

diff --git a/README.md b/README.md
@@ -462,7 +462,7 @@ Some of the examples are even ported to run in the browser using WebAssembly. Ch
 | [bench](examples/bench) | | Benchmark the performance of Whisper on your machine |
 | [stream](examples/stream) | [stream.wasm](examples/stream.wasm) | Real-time transcription of raw microphone capture |
 | [command](examples/command) | [command.wasm](examples/command.wasm) | Basic voice assistant example for receiving voice commands from the mic |
-| | [talk.wasm](examples/talk.wasm) | Talk with a GPT-2 bot in your browser |
+| [talk](examples/talk) | [talk.wasm](examples/talk.wasm) | Talk with a GPT-2 bot |
 | [whisper.objc](examples/whisper.objc) | | iOS mobile application using whisper.cpp |
 | [whisper.nvim](examples/whisper.nvim) | | Speech-to-text plugin for Neovim |
 | [generate-karaoke.sh](examples/generate-karaoke.sh) | | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture |

diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -28,4 +28,5 @@ else()
     add_subdirectory(stream)
     add_subdirectory(command)
     add_subdirectory(bench)
+    add_subdirectory(talk)
 endif()
diff --git a/examples/command/command.cpp b/examples/command/command.cpp
@@ -34,7 +34,6 @@ struct whisper_params {
 
     bool speed_up      = false;
     bool translate     = false;
-    bool no_context    = true;
     bool print_special = false;
     bool print_energy  = false;
     bool no_timestamps = true;

diff --git a/examples/talk.wasm/README.md b/examples/talk.wasm/README.md
@@ -6,6 +6,8 @@ Talk with an Artificial Intelligence in your browser:
 
 Online demo: https://whisper.ggerganov.com/talk/
 
+Terminal version: [examples/talk](/examples/talk)
+
 ## How it works?
 
 This demo leverages 2 modern neural network models to create a high-quality voice chat directly in your browser:

diff --git a/examples/talk/.gitignore b/examples/talk/.gitignore
@@ -0,0 +1 @@
+eleven-labs.py
diff --git a/examples/talk/CMakeLists.txt b/examples/talk/CMakeLists.txt
@@ -0,0 +1,7 @@
+if (WHISPER_SUPPORT_SDL2)
+    # talk
+    set(TARGET talk)
+    add_executable(${TARGET} talk.cpp gpt-2.cpp)
+    target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
+    target_link_libraries(${TARGET} PRIVATE whisper ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
+endif ()
diff --git a/examples/talk/README.md b/examples/talk/README.md
@@ -0,0 +1,33 @@
+# talk
+
+Talk with an Artificial Intelligence in your terminal
+
+[Demo Talk](https://user-images.githubusercontent.com/1991296/206805012-48e71cc2-588d-4745-8798-c1c70ea3b40d.mp4)
+
+Web version: [examples/talk.wasm](/examples/talk.wasm)
+
+## Building
+
+The `talk` tool depends on SDL2 library to capture audio from the microphone. You can build it like this:
+
+```bash
+# Install SDL2 on Linux
+sudo apt-get install libsdl2-dev
+
+# Install SDL2 on Mac OS
+brew install sdl2
+
+# Build the "talk" executable
+make talk
+
+# Run it
+./talk -p Santa
+```
+
+To run this, you will need a ggml GPT-2 model: [instructions](https://github.com/ggerganov/ggml/tree/master/examples/gpt-2#downloading-and-converting-the-original-models)
+
+Alternatively, you can simply download the smallest ggml GPT-2 117M model (240 MB) like this:
+
+```
+wget --quiet --show-progress -O models/ggml-gpt-2-117M.bin https://ggml.ggerganov.com/ggml-model-gpt-2-117M.bin
+```
-Original file line number
+Diff line change
@@ Expand Up / @@ -14,6 +14,7 @@ build-sanitize-thread/ @@
     main
     stream
     command
+    talk
     bench
     sync.sh
     libwhisper.so
@@ Expand Down @@