From aa1c3456298ce5b62ae0f522ac9b5a8254970f96 Mon Sep 17 00:00:00 2001
From: unknown <956090487@qq.com>
Date: Sun, 15 Aug 2021 22:53:40 +0800
Subject: [PATCH 1/2] Add Chinese character input support

---
 synthesizer/inference.py |  6 +++++-
 toolbox/ui.py            | 13 ++-----------
 2 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/synthesizer/inference.py b/synthesizer/inference.py
index af7bf083..07cf881d 100644
--- a/synthesizer/inference.py
+++ b/synthesizer/inference.py
@@ -9,7 +9,7 @@
 from typing import Union, List
 import numpy as np
 import librosa
-
+from pypinyin import lazy_pinyin, Style
 
 class Synthesizer:
     sample_rate = hparams.sample_rate
@@ -91,6 +91,10 @@ def synthesize_spectrograms(self, texts: List[str],
             simple_table([("Tacotron", str(tts_k) + "k"),
                         ("r", self._model.r)])
 
+        #convert chinese char to pinyin
+        list_of_pinyin = lazy_pinyin(texts, style=Style.TONE3)
+        texts = [" ".join([v for v in list_of_pinyin if v.strip()])]
+
         # Preprocess text inputs
         inputs = [text_to_sequence(text.strip(), hparams.tts_cleaner_names) for text in texts]
         if not isinstance(embeddings, list):
diff --git a/toolbox/ui.py b/toolbox/ui.py
index d56b5740..6ae6a7e4 100644
--- a/toolbox/ui.py
+++ b/toolbox/ui.py
@@ -36,17 +36,8 @@
 ], dtype=np.float) / 255 
 
 default_text = \
-    "Welcome to the toolbox! To begin, load an utterance from your datasets or record one " \
-    "yourself.\nOnce its embedding has been created, you can synthesize any text written here.\n" \
-    "The synthesizer expects to generate " \
-    "outputs that are somewhere between 5 and 12 seconds.\nTo mark breaks, write a new line. " \
-    "Each line will be treated separately.\nThen, they are joined together to make the final " \
-    "spectrogram. Use the vocoder to generate audio.\nThe vocoder generates almost in constant " \
-    "time, so it will be more time efficient for longer inputs like this one.\nOn the left you " \
-    "have the embedding projections. Load or record more utterances to see them.\nIf you have " \
-    "at least 2 or 3 utterances from a same speaker, a cluster should form.\nSynthesized " \
-    "utterances are of the same color as the speaker whose voice was used, but they're " \
-    "represented with a cross."
+    "欢迎使用工具箱, 现已支持中文输入！"
+
 
    
 class UI(QDialog):

From f6306b5c1eb0af5fe0172d9bf6c3c813daefd1bf Mon Sep 17 00:00:00 2001
From: Vega Chen <>
Date: Mon, 16 Aug 2021 22:22:55 +0800
Subject: [PATCH 2/2] Add chinese support in readme

---
 README-CN.md | 2 ++
 README.md    | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/README-CN.md b/README-CN.md
index 20f841ee..6b7241cc 100644
--- a/README-CN.md
+++ b/README-CN.md
@@ -49,6 +49,8 @@ https://github.com/CorentinJ/Real-Time-Voice-Cloning/wiki/Pretrained-models
 然后您可以尝试使用工具箱：
 `python demo_toolbox.py -d <datasets_root>`
 
+> Good news🤩: 可直接使用中文
+
 ## TODO
 - [X]  添加演示视频
 - [X] 添加对更多数据集的支持
diff --git a/README.md b/README.md
index 170b4eba..29b800f8 100644
--- a/README.md
+++ b/README.md
@@ -54,6 +54,8 @@ You can then try the toolbox:
 or  
 `python demo_toolbox.py`  
 
+> Good news🤩: Chinese Characters are supported
+
 ## TODO
 - [x] Add demo video
 - [X] Add support for more dataset