feat: update to pydantic 2.0, update error handling, fix voice settin…

…gs/fetching, fix voice design/clone
malyvsen · Sep 8, 2023 · 1af6a16 · 1af6a16
1 parent 3d1f80a
commit 1af6a16
Show file tree

Hide file tree

Showing 11 changed files with 131 additions and 507 deletions.
diff --git a/API.md b/API.md
@@ -168,7 +168,7 @@ Models(
 
 ### `Voices`
 
-The `Voices` API is used to get a list of all available voices for the authenticated user. The `Voice` contains all the info for a voice, such as the voice's stability and similarity_boost, and can be passed to the `generate` function as the `voice` argument to select the voice. The voice settings can be changed to control the voice behaviour.
+The `Voices` API is used to get a list of all available voices for the authenticated user. The `Voice` contains all the info for a voice, and can be passed to the `generate` function as the `voice` argument to select the voice. The voice settings can be changed to control the voice behaviour.
 
 ```py
 from elevenlabs.api import Voices
@@ -184,7 +184,7 @@ Voice(
     voice_id='21m00Tcm4TlvDq8ikWAM',
     name='Rachel',
     category='premade',
-    settings=VoiceSettings(stability=0.75, similarity_boost=0.75)
+    settings=None
 )
 ```
 
@@ -195,72 +195,27 @@ Voices(
             voice_id='21m00Tcm4TlvDq8ikWAM',
             name='Rachel',
             category='premade',
-            settings=VoiceSettings(stability=0.75, similarity_boost=0.75)
+            settings=None
         ),
         Voice(
             voice_id='AZnzlk1XvdvUeBnXmlld',
             name='Domi',
             category='premade',
-            settings=VoiceSettings(stability=0.1, similarity_boost=0.75)
+            settings=None
         ),
         Voice(
             voice_id='EXAVITQu4vr4xnSDxMaL',
             name='Bella',
             category='premade',
-            settings=VoiceSettings(stability=0.245, similarity_boost=0.75)
+            settings=None
         ),
-        Voice(
-            voice_id='ErXwobaYiN019PkySvjV',
-            name='Antoni',
-            category='premade',
-            settings=VoiceSettings(stability=0.195, similarity_boost=0.75)
-        ),
-        Voice(
-            voice_id='MF3mGyEYCl7XYWbV9V6O',
-            name='Elli',
-            category='premade',
-            settings=VoiceSettings(stability=0.755, similarity_boost=0.75)
-        ),
-        Voice(
-            voice_id='TxGEqnHWrfWFTfGW9XjX',
-            name='Josh',
-            category='premade',
-            settings=VoiceSettings(stability=0.15, similarity_boost=0.51)
-        ),
-        Voice(
-            voice_id='VR6AewLTigWG4xSOukaG',
-            name='Arnold',
-            category='premade',
-            settings=VoiceSettings(stability=0.15, similarity_boost=0.75)
-        ),
-        Voice(
-            voice_id='pNInz6obpgDQGcFmaJgB',
-            name='Adam',
-            category='premade',
-            settings=VoiceSettings(stability=0.2, similarity_boost=0.75)
-        ),
-        Voice(
-            voice_id='yoZ06aMxZJJ28mfd3POQ',
-            name='Sam',
-            category='premade',
-            settings=VoiceSettings(stability=0.25, similarity_boost=0.75)
-        )
+        ...
     ]
 )
 ```
 
 </details>
 
-An example of changing the `Voice` object to control the voice behaviour:
-
-```py
-my_voice = voices[0]
-my_voice.settings.stability = 0.1
-my_voice.settings.similarity_boost = 0.75
-
-text = f""" Hi! My name is {my_voice.name}, nice to meet you! """
-audio = generate(text, voice=my_voice)
-```
 
 ### `VoiceDesign`
 
@@ -272,6 +227,7 @@ from elevenlabs import Voice, VoiceDesign, Gender, Age, Accent, play
 design = VoiceDesign(
     name='Lexa',
     text="Hello, my name is Lexa. I'm your personal assistant, I can help you with your daily tasks and I can also read you the news.",
+    voice_description="Calm and soft with a slight British accent.",
     gender=Gender.female,
     age=Age.young,
     accent=Accent.british,

diff --git a/README.md b/README.md
@@ -18,14 +18,15 @@ pip install elevenlabs
 [![Open in Spaces](https://img.shields.io/badge/🤗-Open%20in%20Spaces-blue.svg)](https://huggingface.co/spaces/elevenlabs/tts)
 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/gist/flavioschneider/49468d728a816c6538fd2f56b3b50b96/elevenlabs-python.ipynb)
 
+We support two main models: the newest `eleven_multilingual_v2`, a single foundational model supporting 28 languages including English, Chinese, Spanish, Hindi, Portuguese, French, German, Japanese, Arabic, Korean, Indonesian, Italian, Dutch, Turkish, Polish, Swedish, Filipino, Malay, Romanian, Ukrainian, Greek, Czech, Danish, Finnish, Bulgarian, Croatian, Slovak, and Tamil; and `eleven_monolingual_v1`, a low-latency model specifically trained for English speech.
 
 ```py
 from elevenlabs import generate, play
 
 audio = generate(
-  text="Hi! My name is Bella, nice to meet you!",
+  text="Hello! 你好! Hola! नमस्ते! Bonjour! こんにちは! مرحبا! 안녕하세요! Ciao! Cześć! Привіт! வணக்கம்!",
   voice="Bella",
-  model="eleven_monolingual_v1"
+  model="eleven_multilingual_v2"
 )
 
 play(audio)
@@ -35,78 +36,17 @@ play(audio)
 
 <i> Don't forget to unmute the player! </i>
 
-<b>Adam</b>
-
-[Adam.webm](https://user-images.githubusercontent.com/12028621/232730475-4babdd1b-6078-47d0-811a-68678d009918.webm)
-
-<b>Antoni</b>
-
-[Antoni.webm](https://user-images.githubusercontent.com/12028621/232730870-164e2423-26d2-4423-89ff-36e78483e4e2.webm)
-
-<b>Arnold</b>
-
-[Arnold.webm](https://user-images.githubusercontent.com/12028621/232731257-f7cccbf0-e4d3-49de-9bc8-280e54a29e88.webm)
-
-<b>Bella</b>
-
-[Bella.webm](https://user-images.githubusercontent.com/12028621/232731276-00a8e665-5f7c-4fe7-adcf-47ac0d634874.webm)
-
-<b>Domi</b>
-
-[Domi.webm](https://user-images.githubusercontent.com/12028621/232731299-ca33fdab-fa79-4343-afad-ece0d4363ffe.webm)
-
-<b>Elli</b>
-
-[Elli.webm](https://user-images.githubusercontent.com/12028621/232731318-a1debbd9-ce06-4e71-8199-119cddb2f19c.webm)
-
-<b>Josh</b>
-
-[Josh.webm](https://user-images.githubusercontent.com/12028621/232731374-f81bcc7c-d30c-4958-8086-2271274d6f12.webm)
-
-<b>Rachel</b>
-
-[Rachel.webm](https://user-images.githubusercontent.com/12028621/232731393-9ccdcf54-a957-44ac-b882-67a95e95d7d0.webm)
-
-<b>Sam</b>
-
-[Sam.webm](https://user-images.githubusercontent.com/12028621/232731428-18bca274-6b84-42e4-b4d8-819b0bd0a19a.webm)
 
 </details>
 
-## 🌎 Multilingual
-
-The `eleven_multilingual_v1` model supports multiple languages, including English, German, Polish, Spanish, Italian, French, Portuguese, and Hindi.
-```py
-from elevenlabs import generate, play
-
-audio = generate(
-    text="¡Hola! Mi nombre es Arnold, encantado de conocerte!",
-    voice="Arnold",
-    model='eleven_multilingual_v1'
-)
-
-play(audio)
-```
-
-<details> <summary> Play </summary>
-
-<i> Don't forget to unmute the player! </i>
-
-[hola.webm](https://user-images.githubusercontent.com/12028621/235474694-584f7103-dab2-4c39-bb9a-8e5f00be85da.webm)
-
-</details>
-
-
 ## 🗣️ Voices
 
 List all your available voices with `voices()`.
 ```py
 from elevenlabs import voices, generate
 
 voices = voices()
-
 audio = generate(text="Hello there!", voice=voices[0])
-
 print(voices)
 ```
 
@@ -119,80 +59,39 @@ Voices(
             voice_id='21m00Tcm4TlvDq8ikWAM',
             name='Rachel',
             category='premade',
-            settings=VoiceSettings(stability=0.75, similarity_boost=0.75)
+            settings=None,
         ),
         Voice(
             voice_id='AZnzlk1XvdvUeBnXmlld',
             name='Domi',
             category='premade',
-            settings=VoiceSettings(stability=0.1, similarity_boost=0.75)
+            settings=None,
         ),
-        Voice(
-            voice_id='EXAVITQu4vr4xnSDxMaL',
-            name='Bella',
-            category='premade',
-            settings=VoiceSettings(stability=0.245, similarity_boost=0.75)
-        ),
-        Voice(
-            voice_id='ErXwobaYiN019PkySvjV',
-            name='Antoni',
-            category='premade',
-            settings=VoiceSettings(stability=0.195, similarity_boost=0.75)
-        ),
-        Voice(
-            voice_id='MF3mGyEYCl7XYWbV9V6O',
-            name='Elli',
-            category='premade',
-            settings=VoiceSettings(stability=0.755, similarity_boost=0.75)
-        ),
-        Voice(
-            voice_id='TxGEqnHWrfWFTfGW9XjX',
-            name='Josh',
-            category='premade',
-            settings=VoiceSettings(stability=0.15, similarity_boost=0.51)
-        ),
-        Voice(
-            voice_id='VR6AewLTigWG4xSOukaG',
-            name='Arnold',
-            category='premade',
-            settings=VoiceSettings(stability=0.15, similarity_boost=0.75)
-        ),
-        Voice(
-            voice_id='pNInz6obpgDQGcFmaJgB',
-            name='Adam',
-            category='premade',
-            settings=VoiceSettings(stability=0.2, similarity_boost=0.75)
-        ),
-        Voice(
-            voice_id='yoZ06aMxZJJ28mfd3POQ',
-            name='Sam',
-            category='premade',
-            settings=VoiceSettings(stability=0.25, similarity_boost=0.75)
-        ),
-        Voice(
-            voice_id='3KehPe3gxEYqOFSGDzGM',
-            name='test',
-            category='cloned',
-            settings=VoiceSettings(stability=0.75, similarity_boost=0.75)
-        ),
-        Voice(
-            voice_id='duorpit9NOULscUJ2OAp',
-            name='test',
-            category='cloned',
-            settings=VoiceSettings(stability=0.75, similarity_boost=0.75)
-        ),
-        Voice(
-            voice_id='h2rNV1Iql95D2QSSuvLY',
-            name='test',
-            category='cloned',
-            settings=VoiceSettings(stability=0.75, similarity_boost=0.75)
-        )
+        ...
     ]
 )
 ```
 
 </details>
 
+Build a voice object with custom settings to personalize the voice style, or call `voice.fetch_settings()` to get the default settings for the voice.
+
+```py
+from elevenlabs import Voice, VoiceSettings, generate
+
+audio = generate(
+    text="Hello! My name is Bella.",
+    voice=Voice(
+        voice_id='EXAVITQu4vr4xnSDxMaL',
+        settings=VoiceSettings(stability=0.71, similarity_boost=0.5, style=0.0, use_speaker_boost=True)
+    )
+)
+
+play(audio)
+```
+
+</details>
+
 
 ### Clone Voice
 
@@ -228,7 +127,7 @@ stream(audio_stream)
 ```
 
 ### Input streaming
-Stream text chunks into audio as it's being generated. Note chunks must end with a space or punctuation (" ", ".", "?", "!").
+Stream text chunks into audio as it's being generated, with <1s latency. Note: if chunks don't end with space or punctuation (" ", ".", "?", "!"), the stream will wait for more text.
 ```py
 from elevenlabs import generate, stream
 
@@ -248,7 +147,7 @@ stream(audio_stream)
 
 ## 🔑 API Key
 
-The basic API has a limited number of characters. To increase this limit, you can get a free API key from [Elevenlabs](https://elevenlabs.io/) ([step-by-step guide](https://docs.elevenlabs.io/authentication/01-xi-api-key)) and set is as environment variable `ELEVEN_API_KEY`. Alternatively you can provide the `api_key` string argument to the `generate` function, or set it globally in code with:
+The basic API has a limited number of characters. To increase this limit, you can get a free API key from [Elevenlabs](https://elevenlabs.io/) ([step-by-step guide](https://docs.elevenlabs.io/authentication/01-xi-api-key)) and set is as environment variable `ELEVEN_API_KEY`. Alternatively, you can provide the `api_key` string argument to the `generate` function, or set it globally in code with:
 
 ```py
 from elevenlabs import set_api_key

diff --git a/elevenlabs/api/base.py b/elevenlabs/api/base.py
@@ -1,3 +1,4 @@
+import json
 import os
 from typing import Optional, Sequence
 
@@ -7,7 +8,6 @@
 from .error import (
     APIError,
     AuthorizationError,
-    HTTPError,
     RateLimitError,
     UnauthenticatedRateLimitError,
 )
@@ -16,12 +16,12 @@
 
 
 class API(BaseModel):
-    class Config:
+    class ConfigDict:
         # Parse enum to strings when converting to dict
         use_enum_values = True
         # Validate fields when setting manually
         validate_assignment = True
-        #
+        # Allows having a field called `model_id` in the class
         protected_namespaces = ()
 
     @staticmethod
@@ -43,18 +43,29 @@ def request(url: str, method: str, api_key: Optional[str] = None, **kwargs):
         if status_code == 200:
             return response
 
-        error = HTTPError(response)
+        # Parse the error message and status
+        error = json.loads(response.text)
+        message, status = "", ""
+        if "detail" in error:
+            detail = error["detail"]
+            if isinstance(error, dict):
+                message = detail.get("message", "")
+                status = detail.get("status", "")
+        else:
+            message = str(error)
+            status = str(response.status_code)
 
+        # Raise the appropriate error
         if status_code == 401:
-            if error.status == "quota_exceeded":
+            if status == "quota_exceeded":
                 if api_key is None:
-                    raise UnauthenticatedRateLimitError(error)
+                    raise UnauthenticatedRateLimitError(message)
                 else:
-                    raise RateLimitError(error)
-            elif error.status == "needs_authorization":
-                raise AuthorizationError(error)
+                    raise RateLimitError(message)
+            elif status == "needs_authorization":
+                raise AuthorizationError(message)
 
-        raise APIError(error)
+        raise APIError(message, status)
 
     @staticmethod
     def get(url: str, *args, **kwargs):