From 604f2be92ee64b21fd8ddfffd8728c73dd1d3843 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Tue, 24 Mar 2026 06:36:02 -0700 Subject: [PATCH 1/4] fix(home): voice input text persistence bugs --- .../home/components/user-input/user-input.tsx | 137 +++++++++++------- 1 file changed, 85 insertions(+), 52 deletions(-) diff --git a/apps/sim/app/workspace/[workspaceId]/home/components/user-input/user-input.tsx b/apps/sim/app/workspace/[workspaceId]/home/components/user-input/user-input.tsx index fffd762b9e2..e09243f103c 100644 --- a/apps/sim/app/workspace/[workspaceId]/home/components/user-input/user-input.tsx +++ b/apps/sim/app/workspace/[workspaceId]/home/components/user-input/user-input.tsx @@ -106,6 +106,7 @@ const SEND_BUTTON_ACTIVE = const SEND_BUTTON_DISABLED = 'bg-[var(--c-808080)] dark:bg-[var(--c-808080)]' const MAX_CHAT_TEXTAREA_HEIGHT = 200 +const SPEECH_RECOGNITION_LANG = 'en-US' const DROP_OVERLAY_ICONS = [ PdfIcon, @@ -267,6 +268,7 @@ export function UserInput({ const [isListening, setIsListening] = useState(false) const recognitionRef = useRef(null) const prefixRef = useRef('') + const valueRef = useRef(value) useEffect(() => { return () => { @@ -274,6 +276,10 @@ export function UserInput({ } }, []) + useEffect(() => { + valueRef.current = value + }, [value]) + const textareaRef = mentionMenu.textareaRef const wasSendingRef = useRef(false) const atInsertPosRef = useRef(null) @@ -488,6 +494,80 @@ export function UserInput({ [handleSubmit, mentionTokensWithContext, value, textareaRef] ) + const startRecognition = useCallback(() => { + const w = window as WindowWithSpeech + const SpeechRecognitionAPI = w.SpeechRecognition || w.webkitSpeechRecognition + if (!SpeechRecognitionAPI) return + + const recognition = new SpeechRecognitionAPI() + recognition.continuous = true + recognition.interimResults = true + recognition.lang = SPEECH_RECOGNITION_LANG + + recognition.onresult = (event: SpeechRecognitionEvent) => { + let transcript = '' + for (let i = 0; i < event.results.length; i++) { + transcript += event.results[i][0].transcript + } + const prefix = prefixRef.current + const newVal = prefix ? `${prefix} ${transcript}` : transcript + setValue(newVal) + valueRef.current = newVal + } + + recognition.onend = () => { + if (recognitionRef.current === recognition) { + prefixRef.current = valueRef.current + try { + recognition.start() + } catch { + recognitionRef.current = null + setIsListening(false) + } + } + } + + recognition.onerror = (e: SpeechRecognitionErrorEvent) => { + if (recognitionRef.current !== recognition) return + if (e.error === 'aborted' || e.error === 'not-allowed') { + recognitionRef.current = null + setIsListening(false) + } + } + + recognitionRef.current = recognition + try { + recognition.start() + } catch { + recognitionRef.current = null + setIsListening(false) + } + }, []) + + const restartRecognition = useCallback( + (newPrefix: string) => { + if (!recognitionRef.current) return + prefixRef.current = newPrefix + recognitionRef.current.abort() + recognitionRef.current = null + startRecognition() + }, + [startRecognition] + ) + + const toggleListening = useCallback(() => { + if (isListening) { + recognitionRef.current?.stop() + recognitionRef.current = null + setIsListening(false) + return + } + + prefixRef.current = value + startRecognition() + setIsListening(true) + }, [isListening, value, startRecognition]) + const handleInputChange = useCallback((e: React.ChangeEvent) => { const newValue = e.target.value const caret = e.target.selectionStart ?? newValue.length @@ -499,16 +579,19 @@ export function UserInput({ ) { const before = newValue.slice(0, caret - 1) const after = newValue.slice(caret) - setValue(`${before}${after}`) + const adjusted = `${before}${after}` + setValue(adjusted) atInsertPosRef.current = caret - 1 setPlusMenuOpen(true) setPlusMenuSearch('') setPlusMenuActiveIndex(0) + restartRecognition(adjusted) return } setValue(newValue) - }, []) + restartRecognition(newValue) + }, [restartRecognition]) const handleSelectAdjust = useCallback(() => { const textarea = textareaRef.current @@ -536,56 +619,6 @@ export function UserInput({ [isInitialView] ) - const toggleListening = useCallback(() => { - if (isListening) { - recognitionRef.current?.stop() - recognitionRef.current = null - setIsListening(false) - return - } - - const w = window as WindowWithSpeech - const SpeechRecognitionAPI = w.SpeechRecognition || w.webkitSpeechRecognition - if (!SpeechRecognitionAPI) return - - prefixRef.current = value - - const recognition = new SpeechRecognitionAPI() - recognition.continuous = true - recognition.interimResults = true - recognition.lang = 'en-US' - - recognition.onresult = (event: SpeechRecognitionEvent) => { - let transcript = '' - for (let i = 0; i < event.results.length; i++) { - transcript += event.results[i][0].transcript - } - const prefix = prefixRef.current - setValue(prefix ? `${prefix} ${transcript}` : transcript) - } - - recognition.onend = () => { - if (recognitionRef.current === recognition) { - try { - recognition.start() - } catch { - recognitionRef.current = null - setIsListening(false) - } - } - } - recognition.onerror = (e: SpeechRecognitionErrorEvent) => { - if (e.error === 'aborted' || e.error === 'not-allowed') { - recognitionRef.current = null - setIsListening(false) - } - } - - recognitionRef.current = recognition - recognition.start() - setIsListening(true) - }, [isListening, value]) - const renderOverlayContent = useCallback(() => { const contexts = contextManagement.selectedContexts From 7e79317a830267ef7942e78528c5fe510cf15fc9 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Tue, 24 Mar 2026 06:42:10 -0700 Subject: [PATCH 2/4] fix(home): gate setIsListening on startRecognition success --- .../home/components/user-input/user-input.tsx | 61 ++++++++++--------- 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/apps/sim/app/workspace/[workspaceId]/home/components/user-input/user-input.tsx b/apps/sim/app/workspace/[workspaceId]/home/components/user-input/user-input.tsx index e09243f103c..4d99241330a 100644 --- a/apps/sim/app/workspace/[workspaceId]/home/components/user-input/user-input.tsx +++ b/apps/sim/app/workspace/[workspaceId]/home/components/user-input/user-input.tsx @@ -494,10 +494,10 @@ export function UserInput({ [handleSubmit, mentionTokensWithContext, value, textareaRef] ) - const startRecognition = useCallback(() => { + const startRecognition = useCallback((): boolean => { const w = window as WindowWithSpeech const SpeechRecognitionAPI = w.SpeechRecognition || w.webkitSpeechRecognition - if (!SpeechRecognitionAPI) return + if (!SpeechRecognitionAPI) return false const recognition = new SpeechRecognitionAPI() recognition.continuous = true @@ -538,9 +538,10 @@ export function UserInput({ recognitionRef.current = recognition try { recognition.start() + return true } catch { recognitionRef.current = null - setIsListening(false) + return false } }, []) @@ -564,34 +565,38 @@ export function UserInput({ } prefixRef.current = value - startRecognition() - setIsListening(true) + if (startRecognition()) { + setIsListening(true) + } }, [isListening, value, startRecognition]) - const handleInputChange = useCallback((e: React.ChangeEvent) => { - const newValue = e.target.value - const caret = e.target.selectionStart ?? newValue.length - - if ( - caret > 0 && - newValue.charAt(caret - 1) === '@' && - (caret === 1 || /\s/.test(newValue.charAt(caret - 2))) - ) { - const before = newValue.slice(0, caret - 1) - const after = newValue.slice(caret) - const adjusted = `${before}${after}` - setValue(adjusted) - atInsertPosRef.current = caret - 1 - setPlusMenuOpen(true) - setPlusMenuSearch('') - setPlusMenuActiveIndex(0) - restartRecognition(adjusted) - return - } + const handleInputChange = useCallback( + (e: React.ChangeEvent) => { + const newValue = e.target.value + const caret = e.target.selectionStart ?? newValue.length + + if ( + caret > 0 && + newValue.charAt(caret - 1) === '@' && + (caret === 1 || /\s/.test(newValue.charAt(caret - 2))) + ) { + const before = newValue.slice(0, caret - 1) + const after = newValue.slice(caret) + const adjusted = `${before}${after}` + setValue(adjusted) + atInsertPosRef.current = caret - 1 + setPlusMenuOpen(true) + setPlusMenuSearch('') + setPlusMenuActiveIndex(0) + restartRecognition(adjusted) + return + } - setValue(newValue) - restartRecognition(newValue) - }, [restartRecognition]) + setValue(newValue) + restartRecognition(newValue) + }, + [restartRecognition] + ) const handleSelectAdjust = useCallback(() => { const textarea = textareaRef.current From 228cef72d496372932159f0b07534307e5c0fb15 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Tue, 24 Mar 2026 06:50:39 -0700 Subject: [PATCH 3/4] fix(home): handle startRecognition failure in restartRecognition --- .../[workspaceId]/home/components/user-input/user-input.tsx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/apps/sim/app/workspace/[workspaceId]/home/components/user-input/user-input.tsx b/apps/sim/app/workspace/[workspaceId]/home/components/user-input/user-input.tsx index 4d99241330a..cbd3d4553c1 100644 --- a/apps/sim/app/workspace/[workspaceId]/home/components/user-input/user-input.tsx +++ b/apps/sim/app/workspace/[workspaceId]/home/components/user-input/user-input.tsx @@ -551,7 +551,9 @@ export function UserInput({ prefixRef.current = newPrefix recognitionRef.current.abort() recognitionRef.current = null - startRecognition() + if (!startRecognition()) { + setIsListening(false) + } }, [startRecognition] ) From 8549d3c0c184b339f5131d3a61192d6439092664 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Tue, 24 Mar 2026 09:43:31 -0700 Subject: [PATCH 4/4] fix(home): reset speech prefix on submit while mic is active --- .../home/components/user-input/user-input.tsx | 159 +++++++++--------- 1 file changed, 80 insertions(+), 79 deletions(-) diff --git a/apps/sim/app/workspace/[workspaceId]/home/components/user-input/user-input.tsx b/apps/sim/app/workspace/[workspaceId]/home/components/user-input/user-input.tsx index cbd3d4553c1..a69cc08477d 100644 --- a/apps/sim/app/workspace/[workspaceId]/home/components/user-input/user-input.tsx +++ b/apps/sim/app/workspace/[workspaceId]/home/components/user-input/user-input.tsx @@ -396,6 +396,84 @@ export function UserInput({ [textareaRef] ) + const startRecognition = useCallback((): boolean => { + const w = window as WindowWithSpeech + const SpeechRecognitionAPI = w.SpeechRecognition || w.webkitSpeechRecognition + if (!SpeechRecognitionAPI) return false + + const recognition = new SpeechRecognitionAPI() + recognition.continuous = true + recognition.interimResults = true + recognition.lang = SPEECH_RECOGNITION_LANG + + recognition.onresult = (event: SpeechRecognitionEvent) => { + let transcript = '' + for (let i = 0; i < event.results.length; i++) { + transcript += event.results[i][0].transcript + } + const prefix = prefixRef.current + const newVal = prefix ? `${prefix} ${transcript}` : transcript + setValue(newVal) + valueRef.current = newVal + } + + recognition.onend = () => { + if (recognitionRef.current === recognition) { + prefixRef.current = valueRef.current + try { + recognition.start() + } catch { + recognitionRef.current = null + setIsListening(false) + } + } + } + + recognition.onerror = (e: SpeechRecognitionErrorEvent) => { + if (recognitionRef.current !== recognition) return + if (e.error === 'aborted' || e.error === 'not-allowed') { + recognitionRef.current = null + setIsListening(false) + } + } + + recognitionRef.current = recognition + try { + recognition.start() + return true + } catch { + recognitionRef.current = null + return false + } + }, []) + + const restartRecognition = useCallback( + (newPrefix: string) => { + if (!recognitionRef.current) return + prefixRef.current = newPrefix + recognitionRef.current.abort() + recognitionRef.current = null + if (!startRecognition()) { + setIsListening(false) + } + }, + [startRecognition] + ) + + const toggleListening = useCallback(() => { + if (isListening) { + recognitionRef.current?.stop() + recognitionRef.current = null + setIsListening(false) + return + } + + prefixRef.current = value + if (startRecognition()) { + setIsListening(true) + } + }, [isListening, value, startRecognition]) + const handleSubmit = useCallback(() => { const fileAttachmentsForApi: FileAttachmentForApi[] = files.attachedFiles .filter((f) => !f.uploading && f.key) @@ -413,13 +491,14 @@ export function UserInput({ contextManagement.selectedContexts.length > 0 ? contextManagement.selectedContexts : undefined ) setValue('') + restartRecognition('') files.clearAttachedFiles() contextManagement.clearContexts() if (textareaRef.current) { textareaRef.current.style.height = 'auto' } - }, [onSubmit, files, value, contextManagement, textareaRef]) + }, [onSubmit, files, value, contextManagement, textareaRef, restartRecognition]) const handleKeyDown = useCallback( (e: React.KeyboardEvent) => { @@ -494,84 +573,6 @@ export function UserInput({ [handleSubmit, mentionTokensWithContext, value, textareaRef] ) - const startRecognition = useCallback((): boolean => { - const w = window as WindowWithSpeech - const SpeechRecognitionAPI = w.SpeechRecognition || w.webkitSpeechRecognition - if (!SpeechRecognitionAPI) return false - - const recognition = new SpeechRecognitionAPI() - recognition.continuous = true - recognition.interimResults = true - recognition.lang = SPEECH_RECOGNITION_LANG - - recognition.onresult = (event: SpeechRecognitionEvent) => { - let transcript = '' - for (let i = 0; i < event.results.length; i++) { - transcript += event.results[i][0].transcript - } - const prefix = prefixRef.current - const newVal = prefix ? `${prefix} ${transcript}` : transcript - setValue(newVal) - valueRef.current = newVal - } - - recognition.onend = () => { - if (recognitionRef.current === recognition) { - prefixRef.current = valueRef.current - try { - recognition.start() - } catch { - recognitionRef.current = null - setIsListening(false) - } - } - } - - recognition.onerror = (e: SpeechRecognitionErrorEvent) => { - if (recognitionRef.current !== recognition) return - if (e.error === 'aborted' || e.error === 'not-allowed') { - recognitionRef.current = null - setIsListening(false) - } - } - - recognitionRef.current = recognition - try { - recognition.start() - return true - } catch { - recognitionRef.current = null - return false - } - }, []) - - const restartRecognition = useCallback( - (newPrefix: string) => { - if (!recognitionRef.current) return - prefixRef.current = newPrefix - recognitionRef.current.abort() - recognitionRef.current = null - if (!startRecognition()) { - setIsListening(false) - } - }, - [startRecognition] - ) - - const toggleListening = useCallback(() => { - if (isListening) { - recognitionRef.current?.stop() - recognitionRef.current = null - setIsListening(false) - return - } - - prefixRef.current = value - if (startRecognition()) { - setIsListening(true) - } - }, [isListening, value, startRecognition]) - const handleInputChange = useCallback( (e: React.ChangeEvent) => { const newValue = e.target.value