Batch fix

And support 20khz in image to audio playground UI
qinfrank · Feb 8, 2023 · 0e6d743 · 0e6d743
1 parent 38cce7a
commit 0e6d743
Show file tree

Hide file tree

Showing 2 changed files with 22 additions and 11 deletions.
diff --git a/riffusion/cli.py b/riffusion/cli.py
@@ -213,15 +213,19 @@ def sample_clips_batch(
     mono: bool = False,
     extension: str = "mp3",
     num_threads: T.Optional[int] = None,
+    glob: str = "*",
     limit: int = -1,
     seed: int = -1,
 ):
     """
     Sample short clips from a directory of audio files, multi-threaded.
     """
-    audio_paths = list(Path(audio_dir).glob("*"))
+    audio_paths = list(Path(audio_dir).glob(glob))
     audio_paths.sort()
 
+    # Exclude json
+    audio_paths = [p for p in audio_paths if p.suffix != ".json"]
+
     if limit > 0:
         audio_paths = audio_paths[:limit]
 
@@ -242,26 +246,24 @@ def process_one(audio_path: Path) -> None:
 
         segment_duration_ms = int(segment.duration_seconds * 1000)
         for i in range(num_clips_per_file):
-            clip_start_ms = np.random.randint(0, segment_duration_ms - duration_ms)
+            try:
+                clip_start_ms = np.random.randint(0, segment_duration_ms - duration_ms)
+            except ValueError:
+                continue
+
             clip = segment[clip_start_ms : clip_start_ms + duration_ms]
 
             clip_name = (
-                f"{audio_path.stem}_{i}"
-                "start_{clip_start_ms}_ms_duration_{duration_ms}_ms.{extension}"
+                f"{audio_path.stem}_{i}_"
+                f"start_{clip_start_ms}_ms_dur_{duration_ms}_ms.{extension}"
             )
             clip.export(output_path / clip_name, format=extension)
 
     pool = ThreadPool(processes=num_threads)
     with tqdm.tqdm(total=len(audio_paths)) as pbar:
         for result in pool.imap_unordered(process_one, audio_paths):
-            # process_one(audio_path)
             pbar.update()
 
-    # with tqdm.tqdm(total=len(audio_paths)) as pbar:
-    #     for i, _ in enumerate(pool.imap_unordered(process_one, audio_paths)):
-    #         pass
-    # pbar.update()
-
 
 if __name__ == "__main__":
     argh.dispatch_commands(

diff --git a/riffusion/streamlit/pages/image_to_audio.py b/riffusion/streamlit/pages/image_to_audio.py
@@ -32,6 +32,8 @@ def render_image_to_audio() -> None:
     device = streamlit_util.select_device(st.sidebar)
     extension = streamlit_util.select_audio_extension(st.sidebar)
 
+    use_20k = st.sidebar.checkbox("Use 20kHz", value=False)
+
     image_file = st.file_uploader(
         "Upload a file",
         type=streamlit_util.IMAGE_EXTENSIONS,
@@ -52,7 +54,14 @@ def render_image_to_audio() -> None:
         params = SpectrogramParams.from_exif(exif=image.getexif())
     except KeyError:
         st.info("Could not find spectrogram parameters in exif data. Using defaults.")
-        params = SpectrogramParams()
+        if use_20k:
+            params = SpectrogramParams(
+                min_frequency=10,
+                max_frequency=20000,
+                stereo=True,
+            )
+        else:
+            params = SpectrogramParams()
 
     with st.expander("Spectrogram Parameters", expanded=False):
         st.json(dataclasses.asdict(params))