Skip to content

Commit

Permalink
Text to audio supports multiple clips
Browse files Browse the repository at this point in the history
Topic: text_to_audio_multiple_riffs
  • Loading branch information
hmartiro committed Jan 9, 2023
1 parent a4784bb commit ca72f41
Showing 1 changed file with 51 additions and 24 deletions.
75 changes: 51 additions & 24 deletions riffusion/streamlit/pages/text_to_audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,33 @@ def render_text_to_audio() -> None:

device = streamlit_util.select_device(st.sidebar)

prompt = st.text_input("Prompt")
negative_prompt = st.text_input("Negative prompt")
with st.form("Inputs"):
prompt = st.text_input("Prompt")
negative_prompt = st.text_input("Negative prompt")

row = st.columns(4)
num_clips = T.cast(
int,
row[0].number_input(
"Number of clips",
value=1,
min_value=1,
max_value=25,
help="How many outputs to generate (seed gets incremented)",
),
)
starting_seed = T.cast(
int,
row[1].number_input(
"Seed",
value=42,
help="Change this to generate different variations",
),
)

st.form_submit_button("Riff", type="primary")

with st.sidebar.expander("Text to Audio Params", expanded=True):
seed = T.cast(int, st.number_input("Seed", value=42))
with st.sidebar:
num_inference_steps = T.cast(int, st.number_input("Inference steps", value=50))
width = T.cast(int, st.number_input("Width", value=512))
guidance = st.number_input(
Expand All @@ -43,32 +65,37 @@ def render_text_to_audio() -> None:
st.info("Enter a prompt")
return

image = streamlit_util.run_txt2img(
prompt=prompt,
num_inference_steps=num_inference_steps,
guidance=guidance,
negative_prompt=negative_prompt,
seed=seed,
width=width,
height=512,
device=device,
)

st.image(image)

# TODO(hayk): Change the frequency range to [20, 20k] once the model is retrained
params = SpectrogramParams(
min_frequency=0,
max_frequency=10000,
)

audio_bytes = streamlit_util.audio_bytes_from_spectrogram_image(
image=image,
params=params,
device=device,
output_format="mp3",
)
st.audio(audio_bytes)
seed = starting_seed
for i in range(1, num_clips + 1):
st.write(f"#### Riff {i} / {num_clips} - Seed {seed}")

image = streamlit_util.run_txt2img(
prompt=prompt,
num_inference_steps=num_inference_steps,
guidance=guidance,
negative_prompt=negative_prompt,
seed=seed,
width=width,
height=512,
device=device,
)
st.image(image)

audio_bytes = streamlit_util.audio_bytes_from_spectrogram_image(
image=image,
params=params,
device=device,
output_format="mp3",
)
st.audio(audio_bytes)

seed += 1


if __name__ == "__main__":
Expand Down

0 comments on commit ca72f41

Please sign in to comment.