Skip to content

Commit

Permalink
WIP: Beatrice
Browse files Browse the repository at this point in the history
  • Loading branch information
w-okada committed Nov 3, 2023
1 parent e62a140 commit 56e26d4
Show file tree
Hide file tree
Showing 12 changed files with 297 additions and 57 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,5 @@ start_trainer.sh
venv/


beatrice_internal_api.cp310-win_amd64.pyd
beatrice_internal_api.cp310-win_amd64.pyd
108_average_110b_10.bin
6 changes: 6 additions & 0 deletions LICENSE-NOTICE
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,9 @@
Diffusion SVC and DDSP SVC uses DiffSinger Community Vocoders. Please check the license from the following link.
Please place it on pretrain\\nsf_hifigan if you are using a different model.
https://openvpi.github.io/vocoders/

2. Beatrice JVS Corpus Edition のライセンスについてはこちらを確認してください。
[readme](https://github.com/w-okada/voice-changer/blob/master/server/voice_changer/Beatrice/)

Please check here for the license of the Beatrice JVS Corpus Edition.
[readme](https://github.com/w-okada/voice-changer/blob/master/server/voice_changer/Beatrice/)
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@
- [so-vits-svc](https://github.com/svc-develop-team/so-vits-svc)
- [RVC(Retrieval-based-Voice-Conversion)](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI)
- [DDSP-SVC](https://github.com/yxlllc/DDSP-SVC)
- [Beatrice JVS Corpus Edition](https://prj-beatrice.com/) * experimental, (***NOT MIT Licnsence*** see [readme](https://github.com/w-okada/voice-changer/blob/master/server/voice_changer/Beatrice/))

2. 本ソフトウェアは、ネットワークを介した利用も可能であり、ゲームなどの高負荷なアプリケーションと同時に使用する場合などに音声変換処理の負荷を外部にオフロードすることができます。
1. 本ソフトウェアは、ネットワークを介した利用も可能であり、ゲームなどの高負荷なアプリケーションと同時に使用する場合などに音声変換処理の負荷を外部にオフロードすることができます。

![image](https://user-images.githubusercontent.com/48346627/206640768-53f6052d-0a96-403b-a06c-6714a0b7471d.png)

Expand Down
3 changes: 2 additions & 1 deletion README_en.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@
- [so-vits-svc](https://github.com/svc-develop-team/so-vits-svc)
- [RVC(Retrieval-based-Voice-Conversion)](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI)
- [DDSP-SVC](https://github.com/yxlllc/DDSP-SVC)
- [Beatrice JVS Corpus Edition](https://prj-beatrice.com/) * experimental, (***NOT MIT Licnsence*** see [readme](https://github.com/w-okada/voice-changer/blob/master/server/voice_changer/Beatrice/))

2. Distribute the load by running Voice Changer on a different PC
1. Distribute the load by running Voice Changer on a different PC
The real-time voice changer of this application works on a server-client configuration. By running the MMVC server on a separate PC, you can run it while minimizing the impact on other resource-intensive processes such as gaming commentary.

![image](https://user-images.githubusercontent.com/48346627/206640768-53f6052d-0a96-403b-a06c-6714a0b7471d.png)
Expand Down
20 changes: 8 additions & 12 deletions server/.vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,17 +1,13 @@
{
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter"
},
"flake8.args": ["--max-line-length=1024", "--ignore=E402,E203,E722"],
"workbench.colorCustomizations": {
"tab.activeBackground": "#65952acc"
},
"python.formatting.provider": "black",
"python.linting.mypyEnabled": false,
"[python]": {
"editor.defaultFormatter": null, // Prettier を使わないようにする
"editor.formatOnSave": true // ファイル保存時に自動フォーマット
},
"python.formatting.blackArgs": ["--line-length", "550"],
"python.linting.flake8Enabled": true,
"python.linting.flake8Args": [
"--max-line-length=99999"
],
"python.linting.enabled": true
"black-formatter.args": ["--line-length", "550"],
"python.testing.pytestArgs": ["test"],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true
}
23 changes: 10 additions & 13 deletions server/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
"Beatrice",
]

StaticSlot: TypeAlias = Literal["Beatrice-JVS",]

STORED_SETTING_FILE = "stored_setting.json"

SERVER_DEVICE_SAMPLE_RATES = [16000, 32000, 44100, 48000, 96000, 192000]
Expand All @@ -23,6 +25,8 @@
SSL_KEY_DIR = os.path.join(tmpdir.name, "keys") if hasattr(sys, "_MEIPASS") else "keys"
MODEL_DIR = os.path.join(tmpdir.name, "logs") if hasattr(sys, "_MEIPASS") else "logs"
UPLOAD_DIR = os.path.join(tmpdir.name, "upload_dir") if hasattr(sys, "_MEIPASS") else "upload_dir"
UPLOAD_DIR = os.path.join(tmpdir.name, "upload_dir") if hasattr(sys, "_MEIPASS") else "upload_dir"

NATIVE_CLIENT_FILE_WIN = os.path.join(sys._MEIPASS, "voice-changer-native-client.exe") if hasattr(sys, "_MEIPASS") else "voice-changer-native-client" # type: ignore
NATIVE_CLIENT_FILE_MAC = (
os.path.join(
Expand All @@ -36,6 +40,9 @@
else "voice-changer-native-client"
)

MODEL_DIR_STATIC = os.path.join(sys._MEIPASS, "model_dir_static") if hasattr(sys, "_MEIPASS") else "model_dir_static"


HUBERT_ONNX_MODEL_PATH = os.path.join(sys._MEIPASS, "model_hubert/hubert_simple.onnx") if hasattr(sys, "_MEIPASS") else "model_hubert/hubert_simple.onnx" # type: ignore


Expand All @@ -48,11 +55,7 @@ def getFrontendPath():
return frontend_path


EmbedderType: TypeAlias = Literal[
"hubert_base",
"contentvec",
"hubert-base-japanese"
]
EmbedderType: TypeAlias = Literal["hubert_base", "contentvec", "hubert-base-japanese"]


class EnumInferenceTypes(Enum):
Expand All @@ -67,9 +70,7 @@ class EnumInferenceTypes(Enum):
onnxRVCNono = "onnxRVCNono"


DiffusionSVCInferenceType: TypeAlias = Literal[
"combo",
]
DiffusionSVCInferenceType: TypeAlias = Literal["combo",]


PitchExtractorType: TypeAlias = Literal[
Expand All @@ -82,10 +83,7 @@ class EnumInferenceTypes(Enum):
"rmvpe_onnx",
]

ServerAudioDeviceType: TypeAlias = Literal[
"audioinput",
"audiooutput"
]
ServerAudioDeviceType: TypeAlias = Literal["audioinput", "audiooutput"]

RVCSampleMode: TypeAlias = Literal[
"production",
Expand Down Expand Up @@ -147,7 +145,6 @@ def getSampleJsonAndModelIds(mode: RVCSampleMode):
("test-ddpn-v2-nof0-40k-l12-hubert_o_full", {"useIndex": False}),
("test-ddpn-v2-f0-40k-l12-hubert_jp_o_full", {"useIndex": False}),
("test-ddpn-v2-nof0-40k-l12-hubert_jp_o_full", {"useIndex": False}),

]
elif mode == "testOfficial":
return [
Expand Down
14 changes: 10 additions & 4 deletions server/data/ModelSlot.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from typing import TypeAlias, Union
from const import MAX_SLOT_NUM, DiffusionSVCInferenceType, EnumInferenceTypes, EmbedderType, VoiceChangerType
from const import MAX_SLOT_NUM, MODEL_DIR_STATIC, DiffusionSVCInferenceType, EnumInferenceTypes, EmbedderType, StaticSlot, VoiceChangerType

from dataclasses import dataclass, asdict, field

Expand All @@ -9,7 +9,7 @@

@dataclass
class ModelSlot:
slotIndex: int = -1
slotIndex: int | StaticSlot = -1
voiceChangerType: VoiceChangerType | None = None
name: str = ""
description: str = ""
Expand Down Expand Up @@ -40,7 +40,7 @@ class RVCModelSlot(ModelSlot):
sampleId: str = ""
speakers: dict = field(default_factory=lambda: {0: "target"})

version:str = "v2"
version: str = "v2"


@dataclass
Expand Down Expand Up @@ -137,7 +137,7 @@ class BeatriceModelSlot(ModelSlot):
ModelSlots: TypeAlias = Union[ModelSlot, RVCModelSlot, MMVCv13ModelSlot, MMVCv15ModelSlot, SoVitsSvc40ModelSlot, DDSPSVCModelSlot, DiffusionSVCModelSlot, BeatriceModelSlot]


def loadSlotInfo(model_dir: str, slotIndex: int) -> ModelSlots:
def loadSlotInfo(model_dir: str, slotIndex: int | StaticSlot) -> ModelSlots:
slotDir = os.path.join(model_dir, str(slotIndex))
jsonFile = os.path.join(slotDir, "params.json")
if not os.path.exists(jsonFile):
Expand Down Expand Up @@ -165,6 +165,9 @@ def loadSlotInfo(model_dir: str, slotIndex: int) -> ModelSlots:
return DiffusionSVCModelSlot(**{k: v for k, v in jsonDict.items() if k in slotInfoKey})
elif slotInfo.voiceChangerType == "Beatrice":
slotInfoKey.extend(list(BeatriceModelSlot.__annotations__.keys()))
if slotIndex == "Beatrice-JVS":
return BeatriceModelSlot(**{k: v for k, v in jsonDict.items() if k in slotInfoKey})

return BeatriceModelSlot(**{k: v for k, v in jsonDict.items() if k in slotInfoKey})
else:
return ModelSlot()
Expand All @@ -176,6 +179,9 @@ def loadAllSlotInfo(model_dir: str):
slotInfo = loadSlotInfo(model_dir, slotIndex)
slotInfo.slotIndex = slotIndex # スロットインデックスは動的に注入
slotInfos.append(slotInfo)

slotInfo = loadSlotInfo(MODEL_DIR_STATIC, "Beatrice-JVS")
slotInfos.append(slotInfo)
return slotInfos


Expand Down
15 changes: 15 additions & 0 deletions server/model_dir_static/Beatrice-JVS/params.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"slotIndex": "Beatrice-JVS",
"voiceChangerType": "Beatrice",
"name": "108_average_110b_10",
"description": "",
"credit": "",
"termsOfUseUrl": "",
"iconFile": "",
"speakers": {
"1": "user1",
"2": "user2"
},
"modelFile": "108_average_110b_10.bin",
"dstId": 1
}
154 changes: 145 additions & 9 deletions server/voice_changer/Beatrice/Beatrice.py
Original file line number Diff line number Diff line change
@@ -1,46 +1,182 @@
"""
"""


from dataclasses import asdict
from typing import Union
import os
import numpy as np
from const import MODEL_DIR_STATIC
from data.ModelSlot import BeatriceModelSlot
from mods.log_control import VoiceChangaerLogger
from voice_changer.Beatrice.BeatriceSettings import BeatriceSettings

from voice_changer.utils.VoiceChangerModel import AudioInOut, VoiceChangerModel
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams

from beatrice_internal_api import BeatriceInternalAPI

logger = VoiceChangaerLogger.get_instance().getLogger()


class BeatriceAPI(BeatriceInternalAPI):
def __init__(self, sample_rate: float = 48000.0):
if sample_rate < 1000.0:
raise ValueError(sample_rate)
super().__init__(float(sample_rate))

def get_n_speakers(self):
return 500

def get_target_speaker_names(self):
names = []
for i in range(1, 101):
names.append(f"[商用不可] jvs{i:03d}")
names.append(f"[商用不可] jvs{i:03d} -1")
names.append(f"[商用不可] jvs{i:03d} -2")
names.append(f"[商用不可] jvs{i:03d} +1")
names.append(f"[商用不可] jvs{i:03d} +2")
return names

def set_sample_rate(self, sample_rate: float):
if sample_rate < 1000.0:
raise ValueError(sample_rate)
super().set_sample_rate(float(sample_rate))

def set_target_speaker_id(self, target_speaker_id: int):
if not 0 <= target_speaker_id < self.get_n_speakers():
raise ValueError(target_speaker_id)
super().set_target_speaker_id(int(target_speaker_id))

def read_parameters(self, filename: Union[str, bytes, os.PathLike]):
super().read_parameters(filename)

def convert(self, in_wav: np.ndarray) -> np.ndarray:
if in_wav.ndim != 1:
raise ValueError(in_wav.ndim)
if in_wav.dtype != np.float32:
raise ValueError(in_wav.dtype)
out_wav = super().convert(in_wav)
assert in_wav.shape == out_wav.shape
return out_wav


class Beatrice(VoiceChangerModel):
def __init__(self, params: VoiceChangerParams, slotInfo: BeatriceModelSlot):
raise RuntimeError("not implemented")
def __init__(self, params: VoiceChangerParams, slotInfo: BeatriceModelSlot, static: bool = False):
logger.info("[Voice Changer] [Beatrice] Creating instance ")
self.settings = BeatriceSettings()
self.params = params

self.prevVol = 0.0
self.slotInfo = slotInfo
self.audio_buffer: AudioInOut | None = None

self.static = static

def initialize(self):
raise RuntimeError("not implemented")
logger.info("[Voice Changer] [Beatrice] Initializing... ")

self.beatrice_api = BeatriceAPI()
if self.static:
modelPath = os.path.join(MODEL_DIR_STATIC, str(self.slotInfo.slotIndex), os.path.basename(self.slotInfo.modelFile))
else:
modelPath = os.path.join(self.params.model_dir, str(self.slotInfo.slotIndex), os.path.basename(self.slotInfo.modelFile))
self.beatrice_api.read_parameters(modelPath)
self.beatrice_api.set_sample_rate(self.inputSampleRate)

# その他の設定
self.settings.dstId = self.slotInfo.dstId
logger.info("[Voice Changer] [Beatrice] Initializing... done")

def setSamplingRate(self, inputSampleRate, outputSampleRate):
raise RuntimeError("not implemented")
if inputSampleRate == outputSampleRate:
self.inputSampleRate = inputSampleRate
self.outputSampleRate = outputSampleRate
self.initialize()
else:
print("inputSampleRate, outputSampleRate", inputSampleRate, outputSampleRate)

def update_settings(self, key: str, val: int | float | str):
raise RuntimeError("not implemented")
logger.info(f"[Voice Changer][Beatrice]: update_settings {key}:{val}")
if key in self.settings.intData:
setattr(self.settings, key, int(val))
elif key in self.settings.floatData:
setattr(self.settings, key, float(val))
elif key in self.settings.strData:
setattr(self.settings, key, str(val))
else:
return False
return True

def get_info(self):
raise RuntimeError("not implemented")
data = asdict(self.settings)
return data

def get_processing_sampling_rate(self):
raise RuntimeError("not implemented")
return self.inputSampleRate

def generate_input(
self,
newData: AudioInOut,
crossfadeSize: int,
solaSearchFrame: int = 0,
):
raise RuntimeError("not implemented")
newData = newData.astype(np.float32) / 32768.0
# 過去のデータに連結
if self.audio_buffer is not None:
self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0)
else:
self.audio_buffer = newData

convertSize = newData.shape[0] + crossfadeSize + solaSearchFrame

# バッファがたまっていない場合はzeroで補う
if self.audio_buffer.shape[0] < convertSize:
self.audio_buffer = np.concatenate([np.zeros([convertSize]), self.audio_buffer])

# 変換対象の部分だけ抽出
convertOffset = -1 * convertSize
self.audio_buffer = self.audio_buffer[convertOffset:]

return (self.audio_buffer,)

def inference(self, receivedData: AudioInOut, crossfade_frame: int, sola_search_frame: int):
raise RuntimeError("not implemented")
data = self.generate_input(receivedData, crossfade_frame, sola_search_frame)
audio = (data[0]).astype(np.float32)

self.beatrice_api.set_target_speaker_id(self.settings.dstId)

block_size = 500
out_wav_blocks = []
head = 0
while head < len(audio):
in_wav_block = audio[head : head + block_size]
out_wav_block = self.beatrice_api.convert(in_wav_block)
out_wav_blocks.append(out_wav_block)
head += block_size
out_wav = np.concatenate(out_wav_blocks)
assert audio.shape == out_wav.shape

return (out_wav * 32767.0).astype(np.int16)

def __del__(self):
del self.pipeline

# def export2onnx(self):
# modelSlot = self.slotInfo

# if modelSlot.isONNX:
# print("[Voice Changer] export2onnx, No pyTorch filepath.")
# return {"status": "ng", "path": ""}

# output_file_simple = export2onnx(self.settings.gpu, modelSlot)
# return {
# "status": "ok",
# "path": f"/tmp/{output_file_simple}",
# "filename": output_file_simple,
# }

def get_model_current(self):
return [
{
Expand Down
Loading

0 comments on commit 56e26d4

Please sign in to comment.