Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🔊 feat: download audio files #3349

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
162 changes: 110 additions & 52 deletions client/src/components/Chat/Messages/MessageAudio.tsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { useEffect } from 'react';
import { useEffect, useRef, useState } from 'react';
import { useRecoilValue } from 'recoil';
import type { TMessage } from 'librechat-data-provider';
import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from '~/components/ui';
import { VolumeIcon, VolumeMuteIcon, Spinner } from '~/components/svg';
import { useLocalize, useTextToSpeech } from '~/hooks';
import store from '~/store';
Expand All @@ -14,81 +15,138 @@ type THoverButtons = {
export default function MessageAudio({ index, message, isLast }: THoverButtons) {
const localize = useLocalize();
const playbackRate = useRecoilValue(store.playbackRate);
const [audioText, setAudioText] = useState<string>(localize('com_ui_info_read_aloud'));
const [tooltipOpen, setTooltipOpen] = useState(false);
const [wasLongPress, setWasLongPress] = useState(false);

const { toggleSpeech, isSpeaking, isLoading, audioRef } = useTextToSpeech(message, isLast, index);

const isMouseDownRef = useRef(false);
const timerRef = useRef<NodeJS.Timeout | null>(null);
const counterRef = useRef(0);

const renderIcon = (size: string) => {
if (isLoading) {
return <Spinner size={size} />;
}
return isSpeaking ? <VolumeMuteIcon size={size} /> : <VolumeIcon size={size} />;
};

if (isSpeaking) {
return <VolumeMuteIcon size={size} />;
const handleMouseDown = () => {
setWasLongPress(false);
setTooltipOpen(true);
if (isMouseDownRef.current) {
return;
}
isMouseDownRef.current = true;
counterRef.current = 2;
setAudioText(localize('com_ui_hold_mouse_download', counterRef.current.toString()));
timerRef.current = setInterval(() => {
counterRef.current--;
if (counterRef.current >= 0) {
setAudioText(localize('com_ui_hold_mouse_download', counterRef.current.toString()));
}
if (isMouseDownRef.current && counterRef.current === 0) {
setAudioText(localize('com_ui_downloading'));
toggleSpeech(true);
}
if (counterRef.current < 0 && timerRef.current) {
clearInterval(timerRef.current);
}
}, 1000);

return <VolumeIcon size={size} />;
window.addEventListener('mouseup', handleMouseUp);
};

const handleMouseUp = () => {
if (counterRef.current > 0) {
toggleSpeech(false);
}

if (counterRef.current === 0) {
setWasLongPress(true);
}

setTooltipOpen(false);
isMouseDownRef.current = false;
if (timerRef.current) {
clearInterval(timerRef.current);
timerRef.current = null;
setAudioText(localize('com_ui_info_read_aloud'));
}

window.removeEventListener('mouseup', handleMouseUp);
};

useEffect(() => {
const messageAudio = document.getElementById(
`audio-${message.messageId}`,
) as HTMLAudioElement | null;
if (!messageAudio) {
return;
}
if (
playbackRate &&
playbackRate > 0 &&
messageAudio &&
playbackRate !== null &&
playbackRate > 0 &&
messageAudio.playbackRate !== playbackRate
) {
messageAudio.playbackRate = playbackRate;
}
}, [audioRef, isSpeaking, playbackRate, message.messageId]);

return (
<>
<button
className="hover-button rounded-md p-1 pl-0 text-gray-400 hover:bg-gray-100 hover:text-gray-500 dark:text-gray-400/70 dark:hover:bg-gray-700 dark:hover:text-gray-200 disabled:dark:hover:text-gray-400 md:group-hover:visible md:group-[.final-completion]:visible"
// onMouseDownCapture={() => {
// if (audioRef.current) {
// audioRef.current.muted = false;
// }
// handleMouseDown();
// }}
// onMouseUpCapture={() => {
// if (audioRef.current) {
// audioRef.current.muted = false;
// }
// handleMouseUp();
// }}
onClickCapture={() => {
if (audioRef.current) {
audioRef.current.muted = false;
}
toggleSpeech();
}}
type="button"
title={isSpeaking ? localize('com_ui_stop') : localize('com_ui_read_aloud')}
>
{renderIcon('19')}
</button>
<audio
ref={audioRef}
controls
controlsList="nodownload nofullscreen noremoteplayback"
style={{
position: 'absolute',
overflow: 'hidden',
display: 'none',
height: '0px',
width: '0px',
}}
src={audioRef.current?.src || undefined}
id={`audio-${message.messageId}`}
muted
autoPlay
/>
</>
<TooltipProvider>
<>
<Tooltip open={tooltipOpen}>
<TooltipTrigger asChild>
<button
className="hover-button rounded-md p-1 pl-0 text-gray-400 hover:text-gray-950 dark:text-gray-400/70 dark:hover:text-gray-200 disabled:dark:hover:text-gray-400 md:group-hover:visible md:group-[.final-completion]:visible"
onMouseDownCapture={handleMouseDown}
onMouseUpCapture={handleMouseUp}
onMouseEnter={() => setTooltipOpen(true)}
onMouseLeave={() => setTooltipOpen(false)}
onClickCapture={() => {
if (!wasLongPress && audioRef.current) {
audioRef.current.muted = false;
toggleSpeech(false);
}
}}
type="button"
>
{renderIcon('19')}
</button>
</TooltipTrigger>
<TooltipContent side="bottom" sideOffset={0}>
<div className="space-y-2">
{isSpeaking ? (
<p className="text-center text-sm text-gray-600 dark:text-gray-300">
{localize('com_ui_stop')}
</p>
) : (
<p className="text-center text-sm text-gray-600 dark:text-gray-300">
{localize('com_ui_read_aloud')}
<br />
{audioText}
</p>
)}
</div>
</TooltipContent>
</Tooltip>
<audio
ref={audioRef}
controls
controlsList="nodownload nofullscreen noremoteplayback"
style={{
position: 'absolute',
overflow: 'hidden',
display: 'none',
height: '0px',
width: '0px',
}}
src={audioRef.current?.src || undefined}
id={`audio-${message.messageId}`}
muted
autoPlay
/>
</>
</TooltipProvider>
);
}
2 changes: 1 addition & 1 deletion client/src/components/ui/Tooltip.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ const TooltipContent = React.forwardRef<
>
<span className="flex items-center whitespace-pre-wrap px-2 py-1 text-center text-sm font-medium normal-case text-white">
{children}
<TooltipArrow className="TooltipArrow" />
<TooltipArrow className="TooltipArrow border-gray-700" />
</span>
</TooltipPrimitive.Content>
</TooltipPortal>
Expand Down
3 changes: 0 additions & 3 deletions client/src/hooks/Audio/useAudioRef.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,17 @@ export default function useCustomAudioRef({
useEffect(() => {
const handleEnded = () => {
setIsPlaying(false);
console.log('message audio ended');
if (audioRef.current) {
URL.revokeObjectURL(audioRef.current.src);
}
};

const handleStart = () => {
setIsPlaying(true);
console.log('message audio started');
};

const handlePause = () => {
setIsPlaying(false);
console.log('message audio paused');
};

const audioElement = audioRef.current;
Expand Down
4 changes: 2 additions & 2 deletions client/src/hooks/Input/useTextToSpeech.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ const useTextToSpeech = (message: TMessage, isLast: boolean, index = 0) => {
}
};

const toggleSpeech = () => {
const toggleSpeech = (download: boolean) => {
if (isSpeaking) {
console.log('canceling message audio speech');
cancelSpeech();
Expand All @@ -59,7 +59,7 @@ const useTextToSpeech = (message: TMessage, isLast: boolean, index = 0) => {
const messageContent = message?.content ?? message?.text ?? '';
const parsedMessage =
typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
generateSpeech(parsedMessage, false);
generateSpeech(parsedMessage, download);
}
};

Expand Down
3 changes: 2 additions & 1 deletion client/src/hooks/Input/useTextToSpeechExternal.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ function useTextToSpeechExternal(messageId: string, isLast: boolean, index = 0)
});

newAudio.onended = () => {
console.log('Cached message audio ended');
URL.revokeObjectURL(blobUrl);
setIsSpeaking(false);
};
Expand Down Expand Up @@ -100,7 +99,9 @@ function useTextToSpeechExternal(messageId: string, isLast: boolean, index = 0)
const blobUrl = URL.createObjectURL(audioBlob);
if (downloadFile) {
downloadAudio(blobUrl);
return;
}

autoPlayAudio(blobUrl);
} catch (error) {
showToast({
Expand Down
3 changes: 3 additions & 0 deletions client/src/localization/languages/Eng.ts
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,9 @@ export default {
com_ui_use_micrphone: 'Use microphone',
com_ui_min_tags: 'Cannot remove more values, a minimum of {0} are required.',
com_ui_max_tags: 'Maximum number allowed is {0}, using latest values.',
com_ui_hold_mouse_download: 'Hold for {0} more seconds to download the audio',
com_ui_info_read_aloud: 'Hold click 3 seconds to download',
com_ui_downloading: 'Downloading...',
com_ui_bookmarks: 'Bookmarks',
com_ui_bookmarks_rebuild: 'Rebuild',
com_ui_bookmarks_new: 'New Bookmark',
Expand Down