Skip to content

Commit

Permalink
Batch fix
Browse files Browse the repository at this point in the history
And support 20khz in image to audio playground UI
  • Loading branch information
hmartiro committed Feb 8, 2023
1 parent 38cce7a commit 0e6d743
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 11 deletions.
22 changes: 12 additions & 10 deletions riffusion/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,15 +213,19 @@ def sample_clips_batch(
mono: bool = False,
extension: str = "mp3",
num_threads: T.Optional[int] = None,
glob: str = "*",
limit: int = -1,
seed: int = -1,
):
"""
Sample short clips from a directory of audio files, multi-threaded.
"""
audio_paths = list(Path(audio_dir).glob("*"))
audio_paths = list(Path(audio_dir).glob(glob))
audio_paths.sort()

# Exclude json
audio_paths = [p for p in audio_paths if p.suffix != ".json"]

if limit > 0:
audio_paths = audio_paths[:limit]

Expand All @@ -242,26 +246,24 @@ def process_one(audio_path: Path) -> None:

segment_duration_ms = int(segment.duration_seconds * 1000)
for i in range(num_clips_per_file):
clip_start_ms = np.random.randint(0, segment_duration_ms - duration_ms)
try:
clip_start_ms = np.random.randint(0, segment_duration_ms - duration_ms)
except ValueError:
continue

clip = segment[clip_start_ms : clip_start_ms + duration_ms]

clip_name = (
f"{audio_path.stem}_{i}"
"start_{clip_start_ms}_ms_duration_{duration_ms}_ms.{extension}"
f"{audio_path.stem}_{i}_"
f"start_{clip_start_ms}_ms_dur_{duration_ms}_ms.{extension}"
)
clip.export(output_path / clip_name, format=extension)

pool = ThreadPool(processes=num_threads)
with tqdm.tqdm(total=len(audio_paths)) as pbar:
for result in pool.imap_unordered(process_one, audio_paths):
# process_one(audio_path)
pbar.update()

# with tqdm.tqdm(total=len(audio_paths)) as pbar:
# for i, _ in enumerate(pool.imap_unordered(process_one, audio_paths)):
# pass
# pbar.update()


if __name__ == "__main__":
argh.dispatch_commands(
Expand Down
11 changes: 10 additions & 1 deletion riffusion/streamlit/pages/image_to_audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ def render_image_to_audio() -> None:
device = streamlit_util.select_device(st.sidebar)
extension = streamlit_util.select_audio_extension(st.sidebar)

use_20k = st.sidebar.checkbox("Use 20kHz", value=False)

image_file = st.file_uploader(
"Upload a file",
type=streamlit_util.IMAGE_EXTENSIONS,
Expand All @@ -52,7 +54,14 @@ def render_image_to_audio() -> None:
params = SpectrogramParams.from_exif(exif=image.getexif())
except KeyError:
st.info("Could not find spectrogram parameters in exif data. Using defaults.")
params = SpectrogramParams()
if use_20k:
params = SpectrogramParams(
min_frequency=10,
max_frequency=20000,
stereo=True,
)
else:
params = SpectrogramParams()

with st.expander("Spectrogram Parameters", expanded=False):
st.json(dataclasses.asdict(params))
Expand Down

0 comments on commit 0e6d743

Please sign in to comment.