WIP: Beatrice

UCypy · Nov 3, 2023 · 56e26d4 · 56e26d4
1 parent e62a140
commit 56e26d4
Show file tree

Hide file tree

Showing 12 changed files with 297 additions and 57 deletions.
diff --git a/.gitignore b/.gitignore
@@ -66,4 +66,5 @@ start_trainer.sh
 venv/
 
 
-beatrice_internal_api.cp310-win_amd64.pyd
+beatrice_internal_api.cp310-win_amd64.pyd
+108_average_110b_10.bin
diff --git a/LICENSE-NOTICE b/LICENSE-NOTICE
@@ -5,3 +5,9 @@
    Diffusion SVC and DDSP SVC uses DiffSinger Community Vocoders. Please check the license from the following link.
    Please place it on pretrain\\nsf_hifigan if you are using a different model.
    https://openvpi.github.io/vocoders/
+
+2. Beatrice JVS Corpus Edition のライセンスについてはこちらを確認してください。
+  [readme](https://github.com/w-okada/voice-changer/blob/master/server/voice_changer/Beatrice/)
+
+  Please check here for the license of the Beatrice JVS Corpus Edition.
+  [readme](https://github.com/w-okada/voice-changer/blob/master/server/voice_changer/Beatrice/)
diff --git a/README.md b/README.md
@@ -28,8 +28,9 @@
   - [so-vits-svc](https://github.com/svc-develop-team/so-vits-svc)
   - [RVC(Retrieval-based-Voice-Conversion)](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI)
   - [DDSP-SVC](https://github.com/yxlllc/DDSP-SVC)
+  - [Beatrice JVS Corpus Edition](https://prj-beatrice.com/) * experimental,  (***NOT MIT Licnsence*** see [readme](https://github.com/w-okada/voice-changer/blob/master/server/voice_changer/Beatrice/))
 
-2. 本ソフトウェアは、ネットワークを介した利用も可能であり、ゲームなどの高負荷なアプリケーションと同時に使用する場合などに音声変換処理の負荷を外部にオフロードすることができます。
+1. 本ソフトウェアは、ネットワークを介した利用も可能であり、ゲームなどの高負荷なアプリケーションと同時に使用する場合などに音声変換処理の負荷を外部にオフロードすることができます。
 
 ![image](https://user-images.githubusercontent.com/48346627/206640768-53f6052d-0a96-403b-a06c-6714a0b7471d.png)
 

diff --git a/README_en.md b/README_en.md
@@ -26,8 +26,9 @@
 - [so-vits-svc](https://github.com/svc-develop-team/so-vits-svc)
 - [RVC(Retrieval-based-Voice-Conversion)](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI)
 - [DDSP-SVC](https://github.com/yxlllc/DDSP-SVC)
+- [Beatrice JVS Corpus Edition](https://prj-beatrice.com/) * experimental,  (***NOT MIT Licnsence*** see [readme](https://github.com/w-okada/voice-changer/blob/master/server/voice_changer/Beatrice/))
 
-2. Distribute the load by running Voice Changer on a different PC
+1. Distribute the load by running Voice Changer on a different PC
    The real-time voice changer of this application works on a server-client configuration. By running the MMVC server on a separate PC, you can run it while minimizing the impact on other resource-intensive processes such as gaming commentary.
 
 ![image](https://user-images.githubusercontent.com/48346627/206640768-53f6052d-0a96-403b-a06c-6714a0b7471d.png)

diff --git a/server/.vscode/settings.json b/server/.vscode/settings.json
@@ -1,17 +1,13 @@
 {
+  "[python]": {
+    "editor.defaultFormatter": "ms-python.black-formatter"
+  },
+  "flake8.args": ["--max-line-length=1024", "--ignore=E402,E203,E722"],
   "workbench.colorCustomizations": {
     "tab.activeBackground": "#65952acc"
   },
-  "python.formatting.provider": "black",
-  "python.linting.mypyEnabled": false,
-  "[python]": {
-    "editor.defaultFormatter": null, // Prettier を使わないようにする
-    "editor.formatOnSave": true // ファイル保存時に自動フォーマット
-  },
-  "python.formatting.blackArgs": ["--line-length", "550"],
-  "python.linting.flake8Enabled": true,
-  "python.linting.flake8Args": [
-    "--max-line-length=99999"
-  ],
-  "python.linting.enabled": true
+  "black-formatter.args": ["--line-length", "550"],
+  "python.testing.pytestArgs": ["test"],
+  "python.testing.unittestEnabled": false,
+  "python.testing.pytestEnabled": true
 }
diff --git a/server/const.py b/server/const.py
@@ -15,6 +15,8 @@
     "Beatrice",
 ]
 
+StaticSlot: TypeAlias = Literal["Beatrice-JVS",]
+
 STORED_SETTING_FILE = "stored_setting.json"
 
 SERVER_DEVICE_SAMPLE_RATES = [16000, 32000, 44100, 48000, 96000, 192000]
@@ -23,6 +25,8 @@
 SSL_KEY_DIR = os.path.join(tmpdir.name, "keys") if hasattr(sys, "_MEIPASS") else "keys"
 MODEL_DIR = os.path.join(tmpdir.name, "logs") if hasattr(sys, "_MEIPASS") else "logs"
 UPLOAD_DIR = os.path.join(tmpdir.name, "upload_dir") if hasattr(sys, "_MEIPASS") else "upload_dir"
+UPLOAD_DIR = os.path.join(tmpdir.name, "upload_dir") if hasattr(sys, "_MEIPASS") else "upload_dir"
+
 NATIVE_CLIENT_FILE_WIN = os.path.join(sys._MEIPASS, "voice-changer-native-client.exe") if hasattr(sys, "_MEIPASS") else "voice-changer-native-client"  # type: ignore
 NATIVE_CLIENT_FILE_MAC = (
     os.path.join(
@@ -36,6 +40,9 @@
     else "voice-changer-native-client"
 )
 
+MODEL_DIR_STATIC = os.path.join(sys._MEIPASS, "model_dir_static") if hasattr(sys, "_MEIPASS") else "model_dir_static"
+
+
 HUBERT_ONNX_MODEL_PATH = os.path.join(sys._MEIPASS, "model_hubert/hubert_simple.onnx") if hasattr(sys, "_MEIPASS") else "model_hubert/hubert_simple.onnx"  # type: ignore
 
 
@@ -48,11 +55,7 @@ def getFrontendPath():
     return frontend_path
 
 
-EmbedderType: TypeAlias = Literal[
-    "hubert_base",
-    "contentvec",
-    "hubert-base-japanese"
-]
+EmbedderType: TypeAlias = Literal["hubert_base", "contentvec", "hubert-base-japanese"]
 
 
 class EnumInferenceTypes(Enum):
@@ -67,9 +70,7 @@ class EnumInferenceTypes(Enum):
     onnxRVCNono = "onnxRVCNono"
 
 
-DiffusionSVCInferenceType: TypeAlias = Literal[
-    "combo",
-]
+DiffusionSVCInferenceType: TypeAlias = Literal["combo",]
 
 
 PitchExtractorType: TypeAlias = Literal[
@@ -82,10 +83,7 @@ class EnumInferenceTypes(Enum):
     "rmvpe_onnx",
 ]
 
-ServerAudioDeviceType: TypeAlias = Literal[
-    "audioinput",
-    "audiooutput"
-]
+ServerAudioDeviceType: TypeAlias = Literal["audioinput", "audiooutput"]
 
 RVCSampleMode: TypeAlias = Literal[
     "production",
@@ -147,7 +145,6 @@ def getSampleJsonAndModelIds(mode: RVCSampleMode):
             ("test-ddpn-v2-nof0-40k-l12-hubert_o_full", {"useIndex": False}),
             ("test-ddpn-v2-f0-40k-l12-hubert_jp_o_full", {"useIndex": False}),
             ("test-ddpn-v2-nof0-40k-l12-hubert_jp_o_full", {"useIndex": False}),
-
         ]
     elif mode == "testOfficial":
         return [

diff --git a/server/data/ModelSlot.py b/server/data/ModelSlot.py
@@ -1,5 +1,5 @@
 from typing import TypeAlias, Union
-from const import MAX_SLOT_NUM, DiffusionSVCInferenceType, EnumInferenceTypes, EmbedderType, VoiceChangerType
+from const import MAX_SLOT_NUM, MODEL_DIR_STATIC, DiffusionSVCInferenceType, EnumInferenceTypes, EmbedderType, StaticSlot, VoiceChangerType
 
 from dataclasses import dataclass, asdict, field
 
@@ -9,7 +9,7 @@
 
 @dataclass
 class ModelSlot:
-    slotIndex: int = -1
+    slotIndex: int | StaticSlot = -1
     voiceChangerType: VoiceChangerType | None = None
     name: str = ""
     description: str = ""
@@ -40,7 +40,7 @@ class RVCModelSlot(ModelSlot):
     sampleId: str = ""
     speakers: dict = field(default_factory=lambda: {0: "target"})
 
-    version:str =  "v2"
+    version: str = "v2"
 
 
 @dataclass
@@ -137,7 +137,7 @@ class BeatriceModelSlot(ModelSlot):
 ModelSlots: TypeAlias = Union[ModelSlot, RVCModelSlot, MMVCv13ModelSlot, MMVCv15ModelSlot, SoVitsSvc40ModelSlot, DDSPSVCModelSlot, DiffusionSVCModelSlot, BeatriceModelSlot]
 
 
-def loadSlotInfo(model_dir: str, slotIndex: int) -> ModelSlots:
+def loadSlotInfo(model_dir: str, slotIndex: int | StaticSlot) -> ModelSlots:
     slotDir = os.path.join(model_dir, str(slotIndex))
     jsonFile = os.path.join(slotDir, "params.json")
     if not os.path.exists(jsonFile):
@@ -165,6 +165,9 @@ def loadSlotInfo(model_dir: str, slotIndex: int) -> ModelSlots:
         return DiffusionSVCModelSlot(**{k: v for k, v in jsonDict.items() if k in slotInfoKey})
     elif slotInfo.voiceChangerType == "Beatrice":
         slotInfoKey.extend(list(BeatriceModelSlot.__annotations__.keys()))
+        if slotIndex == "Beatrice-JVS":
+            return BeatriceModelSlot(**{k: v for k, v in jsonDict.items() if k in slotInfoKey})
+
         return BeatriceModelSlot(**{k: v for k, v in jsonDict.items() if k in slotInfoKey})
     else:
         return ModelSlot()
@@ -176,6 +179,9 @@ def loadAllSlotInfo(model_dir: str):
         slotInfo = loadSlotInfo(model_dir, slotIndex)
         slotInfo.slotIndex = slotIndex  # スロットインデックスは動的に注入
         slotInfos.append(slotInfo)
+
+    slotInfo = loadSlotInfo(MODEL_DIR_STATIC, "Beatrice-JVS")
+    slotInfos.append(slotInfo)
     return slotInfos
 
 

diff --git a/server/model_dir_static/Beatrice-JVS/params.json b/server/model_dir_static/Beatrice-JVS/params.json
@@ -0,0 +1,15 @@
+{
+  "slotIndex": "Beatrice-JVS",
+  "voiceChangerType": "Beatrice",
+  "name": "108_average_110b_10",
+  "description": "",
+  "credit": "",
+  "termsOfUseUrl": "",
+  "iconFile": "",
+  "speakers": {
+    "1": "user1",
+    "2": "user2"
+  },
+  "modelFile": "108_average_110b_10.bin",
+  "dstId": 1
+}
diff --git a/server/voice_changer/Beatrice/Beatrice.py b/server/voice_changer/Beatrice/Beatrice.py
@@ -1,46 +1,182 @@
+"""
+
+"""
+
+
+from dataclasses import asdict
+from typing import Union
+import os
+import numpy as np
+from const import MODEL_DIR_STATIC
 from data.ModelSlot import BeatriceModelSlot
 from mods.log_control import VoiceChangaerLogger
+from voice_changer.Beatrice.BeatriceSettings import BeatriceSettings
 
 from voice_changer.utils.VoiceChangerModel import AudioInOut, VoiceChangerModel
 from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
 
+from beatrice_internal_api import BeatriceInternalAPI
 
 logger = VoiceChangaerLogger.get_instance().getLogger()
 
 
+class BeatriceAPI(BeatriceInternalAPI):
+    def __init__(self, sample_rate: float = 48000.0):
+        if sample_rate < 1000.0:
+            raise ValueError(sample_rate)
+        super().__init__(float(sample_rate))
+
+    def get_n_speakers(self):
+        return 500
+
+    def get_target_speaker_names(self):
+        names = []
+        for i in range(1, 101):
+            names.append(f"[商用不可] jvs{i:03d}")
+            names.append(f"[商用不可] jvs{i:03d} -1")
+            names.append(f"[商用不可] jvs{i:03d} -2")
+            names.append(f"[商用不可] jvs{i:03d} +1")
+            names.append(f"[商用不可] jvs{i:03d} +2")
+        return names
+
+    def set_sample_rate(self, sample_rate: float):
+        if sample_rate < 1000.0:
+            raise ValueError(sample_rate)
+        super().set_sample_rate(float(sample_rate))
+
+    def set_target_speaker_id(self, target_speaker_id: int):
+        if not 0 <= target_speaker_id < self.get_n_speakers():
+            raise ValueError(target_speaker_id)
+        super().set_target_speaker_id(int(target_speaker_id))
+
+    def read_parameters(self, filename: Union[str, bytes, os.PathLike]):
+        super().read_parameters(filename)
+
+    def convert(self, in_wav: np.ndarray) -> np.ndarray:
+        if in_wav.ndim != 1:
+            raise ValueError(in_wav.ndim)
+        if in_wav.dtype != np.float32:
+            raise ValueError(in_wav.dtype)
+        out_wav = super().convert(in_wav)
+        assert in_wav.shape == out_wav.shape
+        return out_wav
+
+
 class Beatrice(VoiceChangerModel):
-    def __init__(self, params: VoiceChangerParams, slotInfo: BeatriceModelSlot):
-        raise RuntimeError("not implemented")
+    def __init__(self, params: VoiceChangerParams, slotInfo: BeatriceModelSlot, static: bool = False):
+        logger.info("[Voice Changer] [Beatrice] Creating instance ")
+        self.settings = BeatriceSettings()
+        self.params = params
+
+        self.prevVol = 0.0
+        self.slotInfo = slotInfo
+        self.audio_buffer: AudioInOut | None = None
+
+        self.static = static
 
     def initialize(self):
-        raise RuntimeError("not implemented")
+        logger.info("[Voice Changer] [Beatrice] Initializing... ")
+
+        self.beatrice_api = BeatriceAPI()
+        if self.static:
+            modelPath = os.path.join(MODEL_DIR_STATIC, str(self.slotInfo.slotIndex), os.path.basename(self.slotInfo.modelFile))
+        else:
+            modelPath = os.path.join(self.params.model_dir, str(self.slotInfo.slotIndex), os.path.basename(self.slotInfo.modelFile))
+        self.beatrice_api.read_parameters(modelPath)
+        self.beatrice_api.set_sample_rate(self.inputSampleRate)
+
+        # その他の設定
+        self.settings.dstId = self.slotInfo.dstId
+        logger.info("[Voice Changer] [Beatrice] Initializing... done")
 
     def setSamplingRate(self, inputSampleRate, outputSampleRate):
-        raise RuntimeError("not implemented")
+        if inputSampleRate == outputSampleRate:
+            self.inputSampleRate = inputSampleRate
+            self.outputSampleRate = outputSampleRate
+            self.initialize()
+        else:
+            print("inputSampleRate, outputSampleRate", inputSampleRate, outputSampleRate)
 
     def update_settings(self, key: str, val: int | float | str):
-        raise RuntimeError("not implemented")
+        logger.info(f"[Voice Changer][Beatrice]: update_settings {key}:{val}")
+        if key in self.settings.intData:
+            setattr(self.settings, key, int(val))
+        elif key in self.settings.floatData:
+            setattr(self.settings, key, float(val))
+        elif key in self.settings.strData:
+            setattr(self.settings, key, str(val))
+        else:
+            return False
+        return True
 
     def get_info(self):
-        raise RuntimeError("not implemented")
+        data = asdict(self.settings)
+        return data
 
     def get_processing_sampling_rate(self):
-        raise RuntimeError("not implemented")
+        return self.inputSampleRate
 
     def generate_input(
         self,
         newData: AudioInOut,
         crossfadeSize: int,
         solaSearchFrame: int = 0,
     ):
-        raise RuntimeError("not implemented")
+        newData = newData.astype(np.float32) / 32768.0
+        # 過去のデータに連結
+        if self.audio_buffer is not None:
+            self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0)
+        else:
+            self.audio_buffer = newData
+
+        convertSize = newData.shape[0] + crossfadeSize + solaSearchFrame
+
+        # バッファがたまっていない場合はzeroで補う
+        if self.audio_buffer.shape[0] < convertSize:
+            self.audio_buffer = np.concatenate([np.zeros([convertSize]), self.audio_buffer])
+
+        # 変換対象の部分だけ抽出
+        convertOffset = -1 * convertSize
+        self.audio_buffer = self.audio_buffer[convertOffset:]
+
+        return (self.audio_buffer,)
 
     def inference(self, receivedData: AudioInOut, crossfade_frame: int, sola_search_frame: int):
-        raise RuntimeError("not implemented")
+        data = self.generate_input(receivedData, crossfade_frame, sola_search_frame)
+        audio = (data[0]).astype(np.float32)
+
+        self.beatrice_api.set_target_speaker_id(self.settings.dstId)
+
+        block_size = 500
+        out_wav_blocks = []
+        head = 0
+        while head < len(audio):
+            in_wav_block = audio[head : head + block_size]
+            out_wav_block = self.beatrice_api.convert(in_wav_block)
+            out_wav_blocks.append(out_wav_block)
+            head += block_size
+        out_wav = np.concatenate(out_wav_blocks)
+        assert audio.shape == out_wav.shape
+
+        return (out_wav * 32767.0).astype(np.int16)
 
     def __del__(self):
         del self.pipeline
 
+    # def export2onnx(self):
+    #     modelSlot = self.slotInfo
+
+    #     if modelSlot.isONNX:
+    #         print("[Voice Changer] export2onnx, No pyTorch filepath.")
+    #         return {"status": "ng", "path": ""}
+
+    #     output_file_simple = export2onnx(self.settings.gpu, modelSlot)
+    #     return {
+    #         "status": "ok",
+    #         "path": f"/tmp/{output_file_simple}",
+    #         "filename": output_file_simple,
+    #     }
+
     def get_model_current(self):
         return [
             {