Add help text to streamlit app

Topic: audio_to_audio
qinfrank · Jan 6, 2023 · b58982d · b58982d
1 parent a880466
commit b58982d
Show file tree

Hide file tree

Showing 6 changed files with 55 additions and 17 deletions.
diff --git a/.gitignore b/.gitignore
@@ -9,6 +9,9 @@ __pycache__/
 # VSCode
 .vscode
 
+# Cog
+.cog/
+
 # Distribution / packaging
 .Python
 build/

diff --git a/riffusion/streamlit/pages/audio_to_audio.py b/riffusion/streamlit/pages/audio_to_audio.py
@@ -79,6 +79,13 @@ def increment_counter():
         f"with overlap {overlap_duration_s}s."
     )
 
+    with st.expander("Clip Times"):
+        st.dataframe({
+            "Start Time [s]": clip_start_times,
+            "End Time [s]": clip_start_times + clip_duration_s,
+            "Duration [s]": clip_duration_s,
+        })
+
     with st.form("Conversion Params"):
 
         prompt = st.text_input("Text Prompt")
@@ -162,7 +169,7 @@ def increment_counter():
         # TODO(hayk): Scale something when computing audio
         closest_width = int(np.ceil(init_image.width / 32) * 32)
         closest_height = int(np.ceil(init_image.height / 32) * 32)
-        init_image = init_image.resize((closest_width, closest_height), Image.BICUBIC)
+        init_image_resized = init_image.resize((closest_width, closest_height), Image.BICUBIC)
 
         progress_callback = None
         if show_clip_details:
@@ -181,7 +188,7 @@ def increment_counter():
 
         image = streamlit_util.run_img2img(
             prompt=prompt,
-            init_image=init_image,
+            init_image=init_image_resized,
             denoising_strength=denoising_strength,
             num_inference_steps=num_inference_steps,
             guidance_scale=guidance_scale,
@@ -191,6 +198,9 @@ def increment_counter():
             device=device,
         )
 
+        # Resize back to original size
+        image = image.resize(init_image.size, Image.BICUBIC)
+
         result_images.append(image)
 
         if show_clip_details:

diff --git a/riffusion/streamlit/pages/interpolation.py b/riffusion/streamlit/pages/interpolation.py
@@ -23,6 +23,22 @@ def render_interpolation() -> None:
     """
     )
 
+    with st.expander("Help", False):
+        st.write(
+            """
+            This tool allows specifying two endpoints and generating a long-form interpolation
+            between them that traverses the latent space. The interpolation is generated by
+            the method described at https://www.riffusion.com/about. A seed image is used to
+            set the beat and tempo of the generated audio, and can be set in the sidebar.
+            Usually the seed is changed or the prompt, but not both at once. You can browse
+            infinite variations of the same prompt by changing the seed.
+
+            For example, try going from "church bells" to "jazz" with 10 steps and 0.75 denoising.
+            This will generate a 50 second clip at 5 seconds per step. Then play with the seeds
+            or denoising to get different variations.
+            """
+        )
+
     # Sidebar params
 
     device = streamlit_util.select_device(st.sidebar)
@@ -50,7 +66,7 @@ def render_interpolation() -> None:
         # TODO(hayk): Read from directory
         options=["og_beat", "agile", "marim", "motorway", "vibes", "custom"],
         index=0,
-        help="Which seed image to use for img2img",
+        help="Which seed image to use for img2img. Custom allows uploading your own.",
     )
     assert init_image_name is not None
     if init_image_name == "custom":
@@ -160,7 +176,15 @@ def get_prompt_inputs(key: str) -> PromptInput:
     Compute prompt inputs from widgets.
     """
     prompt = st.text_input("Prompt", label_visibility="collapsed", key=f"prompt_{key}")
-    seed = T.cast(int, st.number_input("Seed", value=42, key=f"seed_{key}"))
+    seed = T.cast(
+        int,
+        st.number_input(
+            "Seed",
+            value=42,
+            key=f"seed_{key}",
+            help="Integer used to generate a random result. Vary this to explore alternatives.",
+        ),
+    )
     denoising = st.number_input(
         "Denoising", value=0.75, key=f"denoising_{key}", help="How much to modify the seed image"
     )

diff --git a/riffusion/streamlit/pages/split_audio.py b/riffusion/streamlit/pages/split_audio.py
@@ -16,14 +16,15 @@ def render_split_audio() -> None:
     )
 
     device = streamlit_util.select_device(st.sidebar)
-    splitter = streamlit_util.get_audio_splitter(device=device)
 
     audio_file = st.file_uploader(
         "Upload audio",
         type=["mp3", "m4a", "ogg", "wav", "flac"],
         label_visibility="collapsed",
     )
 
+    splitter = streamlit_util.get_audio_splitter(device=device)
+
     if not audio_file:
         st.info("Upload audio to get started")
         return

diff --git a/riffusion/streamlit/playground.py b/riffusion/streamlit/playground.py
@@ -4,34 +4,33 @@
 def render_main():
     st.set_page_config(layout="wide", page_icon="🎸")
 
-    st.header(":guitar: Riffusion Playground")
-    st.write("Interactive app for common riffusion tasks.")
+    st.title(":guitar: Riffusion Playground")
 
     left, right = st.columns(2)
 
     with left:
+        create_link(":pencil2: Text to Audio", "/text_to_audio")
+        st.write("Generate audio clips from text prompts.")
+
+        create_link(":wave: Audio to Audio", "/audio_to_audio")
+        st.write("Upload audio and modify with text prompt.")
+
         create_link(":performing_arts: Interpolation", "/interpolation")
         st.write("Interpolate between prompts in the latent space.")
 
-        create_link(":pencil2: Text to Audio", "/text_to_audio")
-        st.write("Generate audio from text prompts.")
+        create_link(":scissors: Audio Splitter", "/split_audio")
+        st.write("Upload audio and split into vocals, bass, drums, and other.")
 
+    with right:
         create_link(":scroll: Text to Audio Batch", "/text_to_audio_batch")
         st.write("Generate audio in batch from a JSON file of text prompts.")
 
-        create_link(":scissors: Audio Splitter", "/split_audio")
-        st.write("Split an audio into stems of {vocals, drums, bass, other}.")
-
-    with right:
         create_link(":paperclip: Sample Clips", "/sample_clips")
         st.write("Export short clips from an audio file.")
 
         create_link(":musical_keyboard: Image to Audio", "/image_to_audio")
         st.write("Reconstruct audio from spectrogram images.")
 
-        create_link(":wave: Audio to Audio", "/audio_to_audio")
-        st.write("Modify audio with a text prompt")
-
 
 def create_link(name: str, url: str) -> None:
     st.markdown(

diff --git a/riffusion/streamlit/util.py b/riffusion/streamlit/util.py
@@ -167,7 +167,8 @@ def select_device(container: T.Any = st.sidebar) -> str:
 
     device_options = ["cuda", "cpu", "mps"]
     device = st.sidebar.selectbox(
-        "Device", options=device_options, index=device_options.index(default_device)
+        "Device", options=device_options, index=device_options.index(default_device),
+        help="Which compute device to use. CUDA is recommended."
     )
     assert device is not None
-Original file line number
+Diff line change
@@ Expand Up / @@ -9,6 +9,9 @@ __pycache__/ @@
     # VSCode
     .vscode
+    # Cog
+    .cog/
     # Distribution / packaging
     .Python
     build/
@@ Expand Down @@