diff --git a/apps/sim/app/workspace/[workspaceId]/home/components/user-input/user-input.tsx b/apps/sim/app/workspace/[workspaceId]/home/components/user-input/user-input.tsx index fffd762b9e2..a69cc08477d 100644 --- a/apps/sim/app/workspace/[workspaceId]/home/components/user-input/user-input.tsx +++ b/apps/sim/app/workspace/[workspaceId]/home/components/user-input/user-input.tsx @@ -106,6 +106,7 @@ const SEND_BUTTON_ACTIVE = const SEND_BUTTON_DISABLED = 'bg-[var(--c-808080)] dark:bg-[var(--c-808080)]' const MAX_CHAT_TEXTAREA_HEIGHT = 200 +const SPEECH_RECOGNITION_LANG = 'en-US' const DROP_OVERLAY_ICONS = [ PdfIcon, @@ -267,6 +268,7 @@ export function UserInput({ const [isListening, setIsListening] = useState(false) const recognitionRef = useRef(null) const prefixRef = useRef('') + const valueRef = useRef(value) useEffect(() => { return () => { @@ -274,6 +276,10 @@ export function UserInput({ } }, []) + useEffect(() => { + valueRef.current = value + }, [value]) + const textareaRef = mentionMenu.textareaRef const wasSendingRef = useRef(false) const atInsertPosRef = useRef(null) @@ -390,6 +396,84 @@ export function UserInput({ [textareaRef] ) + const startRecognition = useCallback((): boolean => { + const w = window as WindowWithSpeech + const SpeechRecognitionAPI = w.SpeechRecognition || w.webkitSpeechRecognition + if (!SpeechRecognitionAPI) return false + + const recognition = new SpeechRecognitionAPI() + recognition.continuous = true + recognition.interimResults = true + recognition.lang = SPEECH_RECOGNITION_LANG + + recognition.onresult = (event: SpeechRecognitionEvent) => { + let transcript = '' + for (let i = 0; i < event.results.length; i++) { + transcript += event.results[i][0].transcript + } + const prefix = prefixRef.current + const newVal = prefix ? `${prefix} ${transcript}` : transcript + setValue(newVal) + valueRef.current = newVal + } + + recognition.onend = () => { + if (recognitionRef.current === recognition) { + prefixRef.current = valueRef.current + try { + recognition.start() + } catch { + recognitionRef.current = null + setIsListening(false) + } + } + } + + recognition.onerror = (e: SpeechRecognitionErrorEvent) => { + if (recognitionRef.current !== recognition) return + if (e.error === 'aborted' || e.error === 'not-allowed') { + recognitionRef.current = null + setIsListening(false) + } + } + + recognitionRef.current = recognition + try { + recognition.start() + return true + } catch { + recognitionRef.current = null + return false + } + }, []) + + const restartRecognition = useCallback( + (newPrefix: string) => { + if (!recognitionRef.current) return + prefixRef.current = newPrefix + recognitionRef.current.abort() + recognitionRef.current = null + if (!startRecognition()) { + setIsListening(false) + } + }, + [startRecognition] + ) + + const toggleListening = useCallback(() => { + if (isListening) { + recognitionRef.current?.stop() + recognitionRef.current = null + setIsListening(false) + return + } + + prefixRef.current = value + if (startRecognition()) { + setIsListening(true) + } + }, [isListening, value, startRecognition]) + const handleSubmit = useCallback(() => { const fileAttachmentsForApi: FileAttachmentForApi[] = files.attachedFiles .filter((f) => !f.uploading && f.key) @@ -407,13 +491,14 @@ export function UserInput({ contextManagement.selectedContexts.length > 0 ? contextManagement.selectedContexts : undefined ) setValue('') + restartRecognition('') files.clearAttachedFiles() contextManagement.clearContexts() if (textareaRef.current) { textareaRef.current.style.height = 'auto' } - }, [onSubmit, files, value, contextManagement, textareaRef]) + }, [onSubmit, files, value, contextManagement, textareaRef, restartRecognition]) const handleKeyDown = useCallback( (e: React.KeyboardEvent) => { @@ -488,27 +573,33 @@ export function UserInput({ [handleSubmit, mentionTokensWithContext, value, textareaRef] ) - const handleInputChange = useCallback((e: React.ChangeEvent) => { - const newValue = e.target.value - const caret = e.target.selectionStart ?? newValue.length - - if ( - caret > 0 && - newValue.charAt(caret - 1) === '@' && - (caret === 1 || /\s/.test(newValue.charAt(caret - 2))) - ) { - const before = newValue.slice(0, caret - 1) - const after = newValue.slice(caret) - setValue(`${before}${after}`) - atInsertPosRef.current = caret - 1 - setPlusMenuOpen(true) - setPlusMenuSearch('') - setPlusMenuActiveIndex(0) - return - } + const handleInputChange = useCallback( + (e: React.ChangeEvent) => { + const newValue = e.target.value + const caret = e.target.selectionStart ?? newValue.length + + if ( + caret > 0 && + newValue.charAt(caret - 1) === '@' && + (caret === 1 || /\s/.test(newValue.charAt(caret - 2))) + ) { + const before = newValue.slice(0, caret - 1) + const after = newValue.slice(caret) + const adjusted = `${before}${after}` + setValue(adjusted) + atInsertPosRef.current = caret - 1 + setPlusMenuOpen(true) + setPlusMenuSearch('') + setPlusMenuActiveIndex(0) + restartRecognition(adjusted) + return + } - setValue(newValue) - }, []) + setValue(newValue) + restartRecognition(newValue) + }, + [restartRecognition] + ) const handleSelectAdjust = useCallback(() => { const textarea = textareaRef.current @@ -536,56 +627,6 @@ export function UserInput({ [isInitialView] ) - const toggleListening = useCallback(() => { - if (isListening) { - recognitionRef.current?.stop() - recognitionRef.current = null - setIsListening(false) - return - } - - const w = window as WindowWithSpeech - const SpeechRecognitionAPI = w.SpeechRecognition || w.webkitSpeechRecognition - if (!SpeechRecognitionAPI) return - - prefixRef.current = value - - const recognition = new SpeechRecognitionAPI() - recognition.continuous = true - recognition.interimResults = true - recognition.lang = 'en-US' - - recognition.onresult = (event: SpeechRecognitionEvent) => { - let transcript = '' - for (let i = 0; i < event.results.length; i++) { - transcript += event.results[i][0].transcript - } - const prefix = prefixRef.current - setValue(prefix ? `${prefix} ${transcript}` : transcript) - } - - recognition.onend = () => { - if (recognitionRef.current === recognition) { - try { - recognition.start() - } catch { - recognitionRef.current = null - setIsListening(false) - } - } - } - recognition.onerror = (e: SpeechRecognitionErrorEvent) => { - if (e.error === 'aborted' || e.error === 'not-allowed') { - recognitionRef.current = null - setIsListening(false) - } - } - - recognitionRef.current = recognition - recognition.start() - setIsListening(true) - }, [isListening, value]) - const renderOverlayContent = useCallback(() => { const contexts = contextManagement.selectedContexts