Skip to content

Commit

Permalink
第一次上传的一半
Browse files Browse the repository at this point in the history
  • Loading branch information
SkyFlap authored May 13, 2023
1 parent 819996d commit 3ac7889
Show file tree
Hide file tree
Showing 71 changed files with 7,608 additions and 0 deletions.
29 changes: 29 additions & 0 deletions cluster/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import numpy as np
import torch
from sklearn.cluster import KMeans

def get_cluster_model(ckpt_path):
checkpoint = torch.load(ckpt_path)
kmeans_dict = {}
for spk, ckpt in checkpoint.items():
km = KMeans(ckpt["n_features_in_"])
km.__dict__["n_features_in_"] = ckpt["n_features_in_"]
km.__dict__["_n_threads"] = ckpt["_n_threads"]
km.__dict__["cluster_centers_"] = ckpt["cluster_centers_"]
kmeans_dict[spk] = km
return kmeans_dict

def get_cluster_result(model, x, speaker):
"""
x: np.array [t, 256]
return cluster class result
"""
return model[speaker].predict(x)

def get_cluster_center_result(model, x,speaker):
"""x: np.array [t, 256]"""
predict = model[speaker].predict(x)
return model[speaker].cluster_centers_[predict]

def get_center(model, x,speaker):
return model[speaker].cluster_centers_[x]
Binary file added cluster/__pycache__/__init__.cpython-38.pyc
Binary file not shown.
72 changes: 72 additions & 0 deletions cluster/train_cluster.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import os
from glob import glob
from pathlib import Path
import torch
import logging
import argparse
import torch
import numpy as np
from sklearn.cluster import KMeans, MiniBatchKMeans
import tqdm
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
import time
import random

def train_cluster(in_dir, n_clusters, use_minibatch=True, verbose=False):

logger.info(f"Loading features from {in_dir}")
features = []
nums = 0
for path in tqdm.tqdm(in_dir.glob("*.soft.pt")):
features.append(torch.load(path).squeeze(0).numpy().T)
# print(features[-1].shape)
features = np.concatenate(features, axis=0)
print(nums, features.nbytes/ 1024**2, "MB , shape:",features.shape, features.dtype)
features = features.astype(np.float32)
logger.info(f"Clustering features of shape: {features.shape}")
t = time.time()
if use_minibatch:
kmeans = MiniBatchKMeans(n_clusters=n_clusters,verbose=verbose, batch_size=4096, max_iter=80).fit(features)
else:
kmeans = KMeans(n_clusters=n_clusters,verbose=verbose).fit(features)
print(time.time()-t, "s")

x = {
"n_features_in_": kmeans.n_features_in_,
"_n_threads": kmeans._n_threads,
"cluster_centers_": kmeans.cluster_centers_,
}
print("end")

return x


if __name__ == "__main__":

parser = argparse.ArgumentParser()
parser.add_argument('--dataset', type=Path, default="./dataset/44k",
help='path of training data directory')
parser.add_argument('--output', type=Path, default="./module/So-VITS/44k",
help='path of model output directory')

args = parser.parse_args()

checkpoint_dir = args.output
dataset = args.dataset
n_clusters = 10000

ckpt = {}
for spk in os.listdir(dataset):
if os.path.isdir(dataset/spk):
print(f"train kmeans for {spk}...")
in_dir = dataset/spk
x = train_cluster(in_dir, n_clusters, verbose=False)
ckpt[spk] = x

checkpoint_path = checkpoint_dir / f"kmeans_{n_clusters}.pt"
checkpoint_path.parent.mkdir(exist_ok=True, parents=True)
torch.save(
ckpt,
checkpoint_path,
)
93 changes: 93 additions & 0 deletions configs/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
{
"train": {
"log_interval": 200,
"eval_interval": 800,
"seed": 1234,
"epochs": 10000,
"learning_rate": 0.0001,
"betas": [
0.8,
0.99
],
"eps": 1e-09,
"batch_size": 6,
"fp16_run": false,
"lr_decay": 0.999875,
"segment_size": 10240,
"init_lr_ratio": 1,
"warmup_epochs": 0,
"c_mel": 45,
"c_kl": 1.0,
"use_sr": true,
"max_speclen": 512,
"port": "8001",
"keep_ckpts": 10
},
"data": {
"training_files": "filelists/train.txt",
"validation_files": "filelists/val.txt",
"max_wav_value": 32768.0,
"sampling_rate": 44100,
"filter_length": 2048,
"hop_length": 512,
"win_length": 2048,
"n_mel_channels": 80,
"mel_fmin": 0.0,
"mel_fmax": 22050
},
"model": {
"inter_channels": 192,
"hidden_channels": 192,
"filter_channels": 768,
"n_heads": 2,
"n_layers": 6,
"kernel_size": 3,
"p_dropout": 0.1,
"resblock": "1",
"resblock_kernel_sizes": [
3,
7,
11
],
"resblock_dilation_sizes": [
[
1,
3,
5
],
[
1,
3,
5
],
[
1,
3,
5
]
],
"upsample_rates": [
8,
8,
2,
2,
2
],
"upsample_initial_channel": 512,
"upsample_kernel_sizes": [
16,
16,
4,
4,
4
],
"n_layers_q": 3,
"use_spectral_norm": false,
"gin_channels": 256,
"ssl_dim": 256,
"n_speakers": 200
},
"spk": {
"speaker0": 0
}
}
Empty file added dataset/44k/dataset_here.txt
Empty file.
20 changes: 20 additions & 0 deletions dataset_raw/wav_structure.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
数据集准备

raw
├───speaker0
│ ├───xxx1-xxx1.wav
│ ├───...
│ └───Lxx-0xx8.wav
└───speaker1
├───xx2-0xxx2.wav
├───...
└───xxx7-xxx007.wav

此外还需要编辑config.json

"n_speakers": 10

"spk":{
"speaker0": 0,
"speaker1": 1,
}
7 changes: 7 additions & 0 deletions face_detection/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# -*- coding: utf-8 -*-

__author__ = """Adrian Bulat"""
__email__ = '[email protected]'
__version__ = '1.0.1'

from .api import FaceAlignment, LandmarksType, NetworkSize
Binary file added face_detection/__pycache__/__init__.cpython-39.pyc
Binary file not shown.
Binary file added face_detection/__pycache__/api.cpython-39.pyc
Binary file not shown.
Binary file added face_detection/__pycache__/models.cpython-39.pyc
Binary file not shown.
Binary file added face_detection/__pycache__/utils.cpython-39.pyc
Binary file not shown.
79 changes: 79 additions & 0 deletions face_detection/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from __future__ import print_function
import os
import torch
from torch.utils.model_zoo import load_url
from enum import Enum
import numpy as np
import cv2
try:
import urllib.request as request_file
except BaseException:
import urllib as request_file

from .models import FAN, ResNetDepth
from .utils import *


class LandmarksType(Enum):
"""Enum class defining the type of landmarks to detect.
``_2D`` - the detected points ``(x,y)`` are detected in a 2D space and follow the visible contour of the face
``_2halfD`` - this points represent the projection of the 3D points into 3D
``_3D`` - detect the points ``(x,y,z)``` in a 3D space
"""
_2D = 1
_2halfD = 2
_3D = 3


class NetworkSize(Enum):
# TINY = 1
# SMALL = 2
# MEDIUM = 3
LARGE = 4

def __new__(cls, value):
member = object.__new__(cls)
member._value_ = value
return member

def __int__(self):
return self.value

ROOT = os.path.dirname(os.path.abspath(__file__))

class FaceAlignment:
def __init__(self, landmarks_type, network_size=NetworkSize.LARGE,
device='cuda', flip_input=False, face_detector='sfd', verbose=False):
self.device = device
self.flip_input = flip_input
self.landmarks_type = landmarks_type
self.verbose = verbose

network_size = int(network_size)

if 'cuda' in device:
torch.backends.cudnn.benchmark = True

# Get the face detector
face_detector_module = __import__('face_detection.detection.' + face_detector,
globals(), locals(), [face_detector], 0)
self.face_detector = face_detector_module.FaceDetector(device=device, verbose=verbose)

def get_detections_for_batch(self, images):
images = images[..., ::-1]
detected_faces = self.face_detector.detect_from_batch(images.copy())
results = []

for i, d in enumerate(detected_faces):
if len(d) == 0:
results.append(None)
continue
d = d[0]
d = np.clip(d, 0, None)

x1, y1, x2, y2 = map(int, d[:-1])
results.append((x1, y1, x2, y2))

return results
1 change: 1 addition & 0 deletions face_detection/detection/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .core import FaceDetector
Binary file not shown.
Binary file not shown.
Loading

0 comments on commit 3ac7889

Please sign in to comment.