mirror of
https://github.com/open-webui/desktop.git
synced 2026-06-30 20:57:56 -04:00
feat: add global voice input with push-to-talk transcription (0.0.8)
This commit is contained in:
@@ -5,6 +5,18 @@ All notable changes to this project will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [0.0.8] - 2026-04-11
|
||||
|
||||
### Added
|
||||
|
||||
- **Voice Input.** System-wide push-to-talk voice transcription. Press the shortcut from any app to record audio, which is automatically transcribed and sent to your active chat.
|
||||
- **Voice Input Settings.** Configurable global hotkey and enable/disable toggle in Settings, with a default of Shift+Cmd+Space (macOS) or Shift+Ctrl+Space (Windows/Linux).
|
||||
- **Audio Feedback.** Bundled start and stop chime sounds play when recording begins and ends.
|
||||
|
||||
### Fixed
|
||||
|
||||
- **Shortcut Recorder on macOS.** Shortcut inputs now use physical key codes instead of character values, fixing Alt key combinations producing unicode characters like √ instead of V.
|
||||
|
||||
## [0.0.7] - 2026-04-11
|
||||
|
||||
### Fixed
|
||||
|
||||
@@ -15,7 +15,8 @@ export default defineConfig({
|
||||
input: {
|
||||
index: resolve(__dirname, 'src/preload/index.ts'),
|
||||
'content-preload': resolve(__dirname, 'src/preload/content-preload.ts'),
|
||||
'spotlight-preload': resolve(__dirname, 'src/preload/spotlight-preload.ts')
|
||||
'spotlight-preload': resolve(__dirname, 'src/preload/spotlight-preload.ts'),
|
||||
'voice-input-preload': resolve(__dirname, 'src/preload/voice-input-preload.ts')
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -25,7 +26,8 @@ export default defineConfig({
|
||||
rollupOptions: {
|
||||
input: {
|
||||
index: resolve(__dirname, 'src/renderer/index.html'),
|
||||
spotlight: resolve(__dirname, 'src/renderer/spotlight.html')
|
||||
spotlight: resolve(__dirname, 'src/renderer/spotlight.html'),
|
||||
'voice-input': resolve(__dirname, 'src/renderer/voice-input.html')
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
+1
-1
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "open-webui",
|
||||
"version": "0.0.7",
|
||||
"version": "0.0.8",
|
||||
"license": "AGPL-3.0",
|
||||
"description": "Open WebUI Desktop",
|
||||
"main": "./out/main/index.js",
|
||||
|
||||
Binary file not shown.
Binary file not shown.
+303
-3
@@ -98,6 +98,7 @@ if (process.platform === 'linux') {
|
||||
let mainWindow: BrowserWindow | null = null
|
||||
let contentWindow: BrowserWindow | null = null
|
||||
let spotlightWindow: BrowserWindow | null = null
|
||||
let voiceInputWindow: BrowserWindow | null = null
|
||||
let tray: Tray | null = null
|
||||
let isQuiting = false
|
||||
|
||||
@@ -106,10 +107,12 @@ let SERVER_URL: string | null = null
|
||||
let SERVER_STATUS: string | null = null
|
||||
let SERVER_REACHABLE = false
|
||||
let SERVER_PID: number | null = null
|
||||
let AUTH_TOKEN: string | null = null
|
||||
let voiceInputRecording = false
|
||||
|
||||
// ─── Global Shortcuts ───────────────────────────────────
|
||||
|
||||
const registerShortcuts = (globalAccel?: string, spotlightAccel?: string): void => {
|
||||
const registerShortcuts = (globalAccel?: string, spotlightAccel?: string, voiceInputAccel?: string): void => {
|
||||
globalShortcut.unregisterAll()
|
||||
|
||||
// Global shortcut – bring main window to foreground
|
||||
@@ -139,6 +142,20 @@ const registerShortcuts = (globalAccel?: string, spotlightAccel?: string): void
|
||||
log.warn('Failed to register spotlight shortcut:', spotlightAccel, error)
|
||||
}
|
||||
}
|
||||
|
||||
// Voice input shortcut – toggle microphone recording
|
||||
if (voiceInputAccel && CONFIG?.voiceInputEnabled !== false) {
|
||||
try {
|
||||
const ok = globalShortcut.register(voiceInputAccel, () => {
|
||||
toggleVoiceInput()
|
||||
})
|
||||
log.info(`Voice input shortcut "${voiceInputAccel}" registered: ${ok}`)
|
||||
} catch (error) {
|
||||
log.warn('Failed to register voice input shortcut:', voiceInputAccel, error)
|
||||
}
|
||||
} else {
|
||||
log.info(`Voice input shortcut skipped — accel="${voiceInputAccel}", enabled=${CONFIG?.voiceInputEnabled}`)
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Spotlight Window ───────────────────────────────────
|
||||
@@ -257,6 +274,122 @@ function toggleSpotlight(selectedText?: string): void {
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Voice Input Window ─────────────────────────────────
|
||||
|
||||
function createVoiceInputWindow(): BrowserWindow {
|
||||
const { screen } = require('electron')
|
||||
const cursorPoint = screen.getCursorScreenPoint()
|
||||
const activeDisplay = screen.getDisplayNearestPoint(cursorPoint)
|
||||
const { x: sx, y: sy, width: sw } = activeDisplay.bounds
|
||||
|
||||
const winW = 340
|
||||
const winH = 72
|
||||
|
||||
voiceInputWindow = new BrowserWindow({
|
||||
x: sx + Math.round((sw - winW) / 2),
|
||||
y: sy + 120,
|
||||
width: winW,
|
||||
height: winH,
|
||||
frame: false,
|
||||
transparent: true,
|
||||
alwaysOnTop: true,
|
||||
skipTaskbar: true,
|
||||
resizable: false,
|
||||
hasShadow: false,
|
||||
show: false,
|
||||
focusable: true,
|
||||
icon: path.join(__dirname, 'assets/icon.png'),
|
||||
webPreferences: {
|
||||
preload: join(__dirname, '../preload/voice-input-preload.js'),
|
||||
sandbox: false,
|
||||
webviewTag: false,
|
||||
autoplayPolicy: 'no-user-gesture-required'
|
||||
}
|
||||
})
|
||||
|
||||
// Grant microphone permission for the voice input window
|
||||
voiceInputWindow.webContents.session.setPermissionRequestHandler(
|
||||
(_webContents, permission, callback) => {
|
||||
callback(permission === 'media')
|
||||
}
|
||||
)
|
||||
|
||||
if (is.dev && process.env['ELECTRON_RENDERER_URL']) {
|
||||
voiceInputWindow.loadURL(`${process.env['ELECTRON_RENDERER_URL']}/voice-input.html`)
|
||||
} else {
|
||||
voiceInputWindow.loadFile(join(__dirname, '../renderer/voice-input.html'))
|
||||
}
|
||||
|
||||
voiceInputWindow.on('closed', () => {
|
||||
voiceInputWindow = null
|
||||
voiceInputRecording = false
|
||||
})
|
||||
|
||||
return voiceInputWindow
|
||||
}
|
||||
|
||||
function playChime(ascending: boolean): Promise<void> {
|
||||
return new Promise((resolve) => {
|
||||
const { execFile } = require('child_process')
|
||||
const fs = require('fs')
|
||||
const file = ascending ? 'chime-start.wav' : 'chime-stop.wav'
|
||||
const soundPath = app.isPackaged
|
||||
? join(process.resourcesPath, 'app.asar.unpacked', 'resources', 'sounds', file)
|
||||
: join(app.getAppPath(), 'resources', 'sounds', file)
|
||||
|
||||
const exists = fs.existsSync(soundPath)
|
||||
log.info(`playChime: ${ascending ? 'start' : 'stop'}, path=${soundPath}, exists=${exists}`)
|
||||
|
||||
if (!exists) { resolve(); return }
|
||||
|
||||
if (process.platform === 'darwin') {
|
||||
execFile('afplay', [soundPath], (err, stdout, stderr) => {
|
||||
if (err) log.warn('afplay error:', err.message, stderr)
|
||||
resolve()
|
||||
})
|
||||
} else if (process.platform === 'win32') {
|
||||
execFile('powershell', ['-NoProfile', '-Command',
|
||||
`(New-Object Media.SoundPlayer '${soundPath}').PlaySync()`
|
||||
], () => resolve())
|
||||
} else {
|
||||
execFile('paplay', [soundPath], (err) => {
|
||||
if (err) execFile('aplay', [soundPath], () => resolve())
|
||||
else resolve()
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
async function toggleVoiceInput(): Promise<void> {
|
||||
if (voiceInputRecording) {
|
||||
// Stop recording — chime plays in done/close handler after mic is released
|
||||
voiceInputRecording = false
|
||||
if (voiceInputWindow && !voiceInputWindow.isDestroyed()) {
|
||||
voiceInputWindow.webContents.send('voiceInput:state', { recording: false })
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Start recording — chime plays concurrently (separate audio output path from mic input)
|
||||
voiceInputRecording = true
|
||||
playChime(true)
|
||||
|
||||
if (voiceInputWindow && !voiceInputWindow.isDestroyed()) {
|
||||
voiceInputWindow.show()
|
||||
voiceInputWindow.focus()
|
||||
voiceInputWindow.webContents.send('voiceInput:state', { recording: true })
|
||||
} else {
|
||||
const win = createVoiceInputWindow()
|
||||
win.once('ready-to-show', () => {
|
||||
win.show()
|
||||
win.focus()
|
||||
setTimeout(() => {
|
||||
win.webContents.send('voiceInput:state', { recording: true })
|
||||
}, 100)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Windows ────────────────────────────────────────────
|
||||
|
||||
function createMainWindow(show = true): void {
|
||||
@@ -790,7 +923,8 @@ if (!gotTheLock) {
|
||||
await setConfig(config)
|
||||
CONFIG = await getConfig()
|
||||
updateTray()
|
||||
registerShortcuts(CONFIG.globalShortcut, CONFIG.spotlightShortcut)
|
||||
voiceInputRecording = false
|
||||
registerShortcuts(CONFIG.globalShortcut, CONFIG.spotlightShortcut, CONFIG.voiceInputShortcut)
|
||||
})
|
||||
|
||||
// Python/uv
|
||||
@@ -941,6 +1075,12 @@ if (!gotTheLock) {
|
||||
}
|
||||
})
|
||||
|
||||
// Auth token relay from webview
|
||||
ipcMain.handle('app:setAuthToken', (_event, token: string) => {
|
||||
AUTH_TOKEN = token || null
|
||||
log.info('Auth token updated from webview')
|
||||
})
|
||||
|
||||
// Misc
|
||||
ipcMain.handle('app:reset', () => resetAppHandler())
|
||||
|
||||
@@ -1076,6 +1216,162 @@ if (!gotTheLock) {
|
||||
}
|
||||
)
|
||||
|
||||
// ── Voice Input ─────────────────────────────────────
|
||||
|
||||
// Check microphone permission (macOS)
|
||||
ipcMain.handle('voiceInput:micPermission', async () => {
|
||||
if (process.platform === 'darwin') {
|
||||
const status = systemPreferences.getMediaAccessStatus('microphone')
|
||||
if (status !== 'granted') {
|
||||
const granted = await systemPreferences.askForMediaAccess('microphone')
|
||||
return granted ? 'granted' : 'denied'
|
||||
}
|
||||
return 'granted'
|
||||
}
|
||||
return 'granted' // Windows/Linux don't need explicit permission
|
||||
})
|
||||
|
||||
// Transcribe audio via the connected server's STT endpoint
|
||||
ipcMain.handle('voiceInput:transcribe', async (_event, audioBuffer: ArrayBuffer, rendererToken?: string) => {
|
||||
try {
|
||||
const config = await getConfig()
|
||||
if (!config.defaultConnectionId || config.connections.length === 0) {
|
||||
throw new Error('No connection configured')
|
||||
}
|
||||
const conn = config.connections.find((c) => c.id === config.defaultConnectionId)
|
||||
if (!conn) throw new Error('Default connection not found')
|
||||
|
||||
let url = conn.url
|
||||
if (conn.type === 'local' && SERVER_URL) {
|
||||
url = SERVER_URL
|
||||
}
|
||||
if (url.startsWith('http://0.0.0.0')) {
|
||||
url = url.replace('http://0.0.0.0', 'http://localhost')
|
||||
}
|
||||
|
||||
// Use stored auth token (relayed from webview), fall back to renderer-provided or contentWindow
|
||||
let token = AUTH_TOKEN || rendererToken || ''
|
||||
if (!token) {
|
||||
// Scan all webContents to find the Open WebUI webview and read its token
|
||||
try {
|
||||
const { webContents: wc } = require('electron')
|
||||
const allContents = wc.getAllWebContents()
|
||||
for (const contents of allContents) {
|
||||
try {
|
||||
if (contents.getType() === 'webview' && !contents.isDestroyed()) {
|
||||
const t = await contents.executeJavaScript(
|
||||
`localStorage.getItem('token') || ''`
|
||||
)
|
||||
if (t) { token = t; break }
|
||||
}
|
||||
} catch {
|
||||
// Skip inaccessible webContents
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
log.warn('voiceInput:transcribe — could not extract token from webviews')
|
||||
}
|
||||
}
|
||||
|
||||
if (!token) {
|
||||
throw new Error('Not authenticated — open a connection first')
|
||||
}
|
||||
|
||||
// Build multipart form data manually using Node.js
|
||||
const boundary = '----VoiceInput' + Date.now()
|
||||
const buffer = Buffer.from(audioBuffer)
|
||||
const filename = `recording-${Date.now()}.wav`
|
||||
|
||||
const header = [
|
||||
`--${boundary}`,
|
||||
`Content-Disposition: form-data; name="file"; filename="${filename}"`,
|
||||
`Content-Type: audio/wav`,
|
||||
'',
|
||||
''
|
||||
].join('\r\n')
|
||||
|
||||
const footer = `\r\n--${boundary}--\r\n`
|
||||
const headerBuf = Buffer.from(header, 'utf-8')
|
||||
const footerBuf = Buffer.from(footer, 'utf-8')
|
||||
const body = Buffer.concat([headerBuf, buffer, footerBuf])
|
||||
|
||||
const response = await fetch(`${url}/api/v1/audio/transcriptions`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${token}`,
|
||||
'Content-Type': `multipart/form-data; boundary=${boundary}`
|
||||
},
|
||||
body
|
||||
})
|
||||
|
||||
if (!response.ok) {
|
||||
const text = await response.text().catch(() => '')
|
||||
throw new Error(`Transcription failed (${response.status}): ${text}`)
|
||||
}
|
||||
|
||||
const result = await response.json()
|
||||
return result
|
||||
} catch (error: any) {
|
||||
log.error('voiceInput:transcribe failed:', error)
|
||||
throw error
|
||||
}
|
||||
})
|
||||
|
||||
// Voice input completed — deliver text to chat
|
||||
ipcMain.handle('voiceInput:done', async (_event, text: string) => {
|
||||
voiceInputRecording = false
|
||||
playChime(false)
|
||||
if (voiceInputWindow && !voiceInputWindow.isDestroyed()) {
|
||||
voiceInputWindow.hide()
|
||||
}
|
||||
|
||||
if (!text?.trim()) return
|
||||
|
||||
// Deliver text through the same path as Spotlight
|
||||
const config = await getConfig()
|
||||
if (!config.defaultConnectionId || config.connections.length === 0) {
|
||||
mainWindow?.show()
|
||||
mainWindow?.focus()
|
||||
return
|
||||
}
|
||||
const conn = config.connections.find((c) => c.id === config.defaultConnectionId)
|
||||
if (!conn) {
|
||||
mainWindow?.show()
|
||||
mainWindow?.focus()
|
||||
return
|
||||
}
|
||||
|
||||
let url = conn.url
|
||||
if (conn.type === 'local' && SERVER_URL) {
|
||||
url = SERVER_URL
|
||||
}
|
||||
if (url.startsWith('http://0.0.0.0')) {
|
||||
url = url.replace('http://0.0.0.0', 'http://localhost')
|
||||
}
|
||||
|
||||
sendToRenderer('query', { query: text.trim(), connectionId: conn.id, url })
|
||||
|
||||
if (mainWindow && !mainWindow.isDestroyed()) {
|
||||
mainWindow.show()
|
||||
mainWindow.focus()
|
||||
}
|
||||
})
|
||||
|
||||
// Voice input window requests close
|
||||
ipcMain.handle('voiceInput:close', () => {
|
||||
voiceInputRecording = false
|
||||
playChime(false)
|
||||
if (voiceInputWindow && !voiceInputWindow.isDestroyed()) {
|
||||
voiceInputWindow.hide()
|
||||
}
|
||||
})
|
||||
|
||||
// Voice input error
|
||||
ipcMain.handle('voiceInput:error', (_event, message: string) => {
|
||||
log.warn('Voice input error:', message)
|
||||
voiceInputRecording = false
|
||||
})
|
||||
|
||||
// Open Terminal
|
||||
ipcMain.handle('open-terminal:start', async () => {
|
||||
try {
|
||||
@@ -1331,7 +1627,7 @@ if (!gotTheLock) {
|
||||
|
||||
|
||||
// Global shortcut
|
||||
registerShortcuts(CONFIG.globalShortcut, CONFIG.spotlightShortcut)
|
||||
registerShortcuts(CONFIG.globalShortcut, CONFIG.spotlightShortcut, CONFIG.voiceInputShortcut)
|
||||
|
||||
// Enable screen capture
|
||||
session.defaultSession.setDisplayMediaRequestHandler(
|
||||
@@ -1423,6 +1719,10 @@ if (!gotTheLock) {
|
||||
spotlightWindow.destroy()
|
||||
}
|
||||
spotlightWindow = null
|
||||
if (voiceInputWindow && !voiceInputWindow.isDestroyed()) {
|
||||
voiceInputWindow.destroy()
|
||||
}
|
||||
voiceInputWindow = null
|
||||
tray?.destroy()
|
||||
tray = null
|
||||
})
|
||||
|
||||
@@ -828,6 +828,8 @@ export interface AppConfig {
|
||||
envVars: Record<string, string>
|
||||
showSidebar: boolean
|
||||
spotlightPosition: { x: number; y: number } | null
|
||||
voiceInputShortcut: string
|
||||
voiceInputEnabled: boolean
|
||||
}
|
||||
|
||||
const DEFAULT_CONFIG: AppConfig = {
|
||||
@@ -856,7 +858,9 @@ const DEFAULT_CONFIG: AppConfig = {
|
||||
},
|
||||
envVars: {},
|
||||
showSidebar: false,
|
||||
spotlightPosition: null
|
||||
spotlightPosition: null,
|
||||
voiceInputShortcut: 'Shift+CommandOrControl+Space',
|
||||
voiceInputEnabled: true
|
||||
}
|
||||
|
||||
export const getConfig = async (): Promise<AppConfig> => {
|
||||
|
||||
@@ -181,7 +181,10 @@ const api = {
|
||||
installUpdate: () => ipcRenderer.invoke('updater:install'),
|
||||
|
||||
// Changelog
|
||||
getChangelog: () => ipcRenderer.invoke('app:changelog')
|
||||
getChangelog: () => ipcRenderer.invoke('app:changelog'),
|
||||
|
||||
// Auth token relay from webview
|
||||
setAuthToken: (token: string) => ipcRenderer.invoke('app:setAuthToken', token)
|
||||
}
|
||||
|
||||
if (process.contextIsolated) {
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
import { ipcRenderer, contextBridge } from 'electron'
|
||||
|
||||
const api = {
|
||||
// Main process tells us to start/stop recording
|
||||
onRecordingState: (
|
||||
callback: (data: { recording: boolean }) => void
|
||||
): void => {
|
||||
ipcRenderer.on('voiceInput:state', (_event, data) => {
|
||||
callback(data)
|
||||
})
|
||||
},
|
||||
|
||||
// Send recorded audio to main process for transcription
|
||||
transcribe: (audioBuffer: ArrayBuffer, token?: string): Promise<any> => {
|
||||
return ipcRenderer.invoke('voiceInput:transcribe', audioBuffer, token)
|
||||
},
|
||||
|
||||
// Notify main process that transcription completed
|
||||
done: (text: string): void => {
|
||||
ipcRenderer.invoke('voiceInput:done', text)
|
||||
},
|
||||
|
||||
// Close/hide the voice input window
|
||||
close: (): void => {
|
||||
ipcRenderer.invoke('voiceInput:close')
|
||||
},
|
||||
|
||||
// Report an error
|
||||
error: (message: string): void => {
|
||||
ipcRenderer.invoke('voiceInput:error', message)
|
||||
}
|
||||
}
|
||||
|
||||
if (process.contextIsolated) {
|
||||
try {
|
||||
contextBridge.exposeInMainWorld('voiceInputAPI', api)
|
||||
} catch (error) {
|
||||
console.error(error)
|
||||
}
|
||||
} else {
|
||||
// @ts-ignore
|
||||
window.voiceInputAPI = api
|
||||
}
|
||||
@@ -0,0 +1,320 @@
|
||||
<script lang="ts">
|
||||
import { onMount, onDestroy } from 'svelte'
|
||||
|
||||
const api = window.voiceInputAPI
|
||||
|
||||
let recording = $state(false)
|
||||
let transcribing = $state(false)
|
||||
let duration = $state(0)
|
||||
let errorMsg = $state('')
|
||||
|
||||
// Waveform
|
||||
let levels: number[] = $state(Array(5).fill(0.15))
|
||||
let animFrame: number | null = null
|
||||
|
||||
let timer: ReturnType<typeof setInterval> | null = null
|
||||
let errorTimer: ReturnType<typeof setTimeout> | null = null
|
||||
|
||||
// Audio
|
||||
let mediaRecorder: MediaRecorder | null = null
|
||||
let audioChunks: Blob[] = []
|
||||
let mediaStream: MediaStream | null = null
|
||||
let analyser: AnalyserNode | null = null
|
||||
let audioCtx: AudioContext | null = null
|
||||
let dataArray: Uint8Array | null = null
|
||||
|
||||
// Dragging
|
||||
let dragging = false
|
||||
let dragStart = { mx: 0, my: 0, wx: 0, wy: 0 }
|
||||
|
||||
const formatDuration = (s: number): string => {
|
||||
const m = Math.floor(s / 60)
|
||||
return `${m}:${(s % 60).toString().padStart(2, '0')}`
|
||||
}
|
||||
|
||||
const animateLevel = () => {
|
||||
if (analyser && dataArray) {
|
||||
analyser.getByteFrequencyData(dataArray)
|
||||
// Sample 5 frequency bands
|
||||
const bands = 5
|
||||
const step = Math.floor(dataArray.length / bands)
|
||||
levels = Array.from({ length: bands }, (_, i) => {
|
||||
const val = dataArray![i * step] / 255
|
||||
return Math.max(0.15, val)
|
||||
})
|
||||
} else {
|
||||
levels = levels.map(() => 0.15 + Math.random() * 0.6)
|
||||
}
|
||||
animFrame = requestAnimationFrame(animateLevel)
|
||||
}
|
||||
|
||||
const showError = (msg: string) => {
|
||||
errorMsg = msg
|
||||
if (errorTimer) clearTimeout(errorTimer)
|
||||
errorTimer = setTimeout(() => {
|
||||
errorMsg = ''
|
||||
api?.close()
|
||||
}, 3000)
|
||||
}
|
||||
|
||||
const startRecording = async () => {
|
||||
// Reset all state from any previous session
|
||||
cleanup()
|
||||
errorMsg = ''
|
||||
transcribing = false
|
||||
recording = true
|
||||
duration = 0
|
||||
audioChunks = []
|
||||
animateLevel() // show placeholder bars immediately
|
||||
|
||||
// Wait for the start chime (played from main process) to finish
|
||||
// before activating mic — macOS ducks audio when mic activates
|
||||
await new Promise((r) => setTimeout(r, 500))
|
||||
|
||||
try {
|
||||
mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true })
|
||||
audioChunks = []
|
||||
|
||||
// Set up analyser for real audio levels
|
||||
audioCtx = new AudioContext()
|
||||
analyser = audioCtx.createAnalyser()
|
||||
analyser.fftSize = 64
|
||||
dataArray = new Uint8Array(analyser.frequencyBinCount)
|
||||
const source = audioCtx.createMediaStreamSource(mediaStream)
|
||||
source.connect(analyser)
|
||||
|
||||
mediaRecorder = new MediaRecorder(mediaStream, {
|
||||
mimeType: MediaRecorder.isTypeSupported('audio/webm;codecs=opus')
|
||||
? 'audio/webm;codecs=opus'
|
||||
: 'audio/webm'
|
||||
})
|
||||
|
||||
mediaRecorder.ondataavailable = (e) => {
|
||||
if (e.data.size > 0) audioChunks.push(e.data)
|
||||
}
|
||||
|
||||
mediaRecorder.start(250)
|
||||
timer = setInterval(() => { duration++ }, 1000)
|
||||
} catch (err: any) {
|
||||
showError(err?.message || 'Mic access failed')
|
||||
}
|
||||
}
|
||||
|
||||
const cleanup = () => {
|
||||
recording = false
|
||||
transcribing = false
|
||||
if (timer) { clearInterval(timer); timer = null }
|
||||
if (animFrame) { cancelAnimationFrame(animFrame); animFrame = null }
|
||||
levels = Array(5).fill(0.15)
|
||||
if (mediaRecorder && mediaRecorder.state !== 'inactive') {
|
||||
mediaRecorder.stop()
|
||||
}
|
||||
if (mediaStream) {
|
||||
mediaStream.getTracks().forEach((t) => t.stop())
|
||||
mediaStream = null
|
||||
}
|
||||
if (audioCtx) {
|
||||
audioCtx.close()
|
||||
audioCtx = null
|
||||
analyser = null
|
||||
}
|
||||
mediaRecorder = null
|
||||
}
|
||||
|
||||
const cancelRecording = () => {
|
||||
cleanup()
|
||||
api?.close()
|
||||
}
|
||||
|
||||
const stopRecording = async () => {
|
||||
if (!mediaRecorder || mediaRecorder.state === 'inactive') {
|
||||
cancelRecording()
|
||||
return
|
||||
}
|
||||
|
||||
// Too short — treat as cancel (less than 0.8 seconds)
|
||||
if (duration < 1) {
|
||||
cancelRecording()
|
||||
return
|
||||
}
|
||||
|
||||
recording = false
|
||||
if (timer) { clearInterval(timer); timer = null }
|
||||
if (animFrame) { cancelAnimationFrame(animFrame); animFrame = null }
|
||||
levels = Array(5).fill(0.15)
|
||||
|
||||
const audioBlob = await new Promise<Blob>((resolve) => {
|
||||
mediaRecorder!.onstop = () => {
|
||||
resolve(new Blob(audioChunks, { type: mediaRecorder!.mimeType }))
|
||||
}
|
||||
mediaRecorder!.stop()
|
||||
})
|
||||
|
||||
if (mediaStream) {
|
||||
mediaStream.getTracks().forEach((t) => t.stop())
|
||||
mediaStream = null
|
||||
}
|
||||
if (audioCtx) {
|
||||
audioCtx.close()
|
||||
audioCtx = null
|
||||
analyser = null
|
||||
}
|
||||
|
||||
if (audioBlob.size < 4096) {
|
||||
api?.close()
|
||||
return
|
||||
}
|
||||
|
||||
transcribing = true
|
||||
try {
|
||||
const buffer = await audioBlob.arrayBuffer()
|
||||
const result = await api?.transcribe(buffer)
|
||||
const text = result?.text?.trim()
|
||||
if (text) {
|
||||
api?.done(text)
|
||||
} else {
|
||||
api?.close()
|
||||
}
|
||||
} catch (err: any) {
|
||||
showError(err?.message || 'Transcription failed')
|
||||
}
|
||||
}
|
||||
|
||||
const onMouseDown = (e: MouseEvent) => {
|
||||
dragging = true
|
||||
dragStart = { mx: e.screenX, my: e.screenY, wx: window.screenX, wy: window.screenY }
|
||||
}
|
||||
|
||||
const onMouseMove = (e: MouseEvent) => {
|
||||
if (!dragging) return
|
||||
window.moveTo(
|
||||
dragStart.wx + (e.screenX - dragStart.mx),
|
||||
dragStart.wy + (e.screenY - dragStart.my)
|
||||
)
|
||||
}
|
||||
|
||||
const onMouseUp = () => { dragging = false }
|
||||
|
||||
onMount(() => {
|
||||
api?.onRecordingState((data) => {
|
||||
if (data.recording && !recording) startRecording()
|
||||
else if (!data.recording && recording) stopRecording()
|
||||
})
|
||||
})
|
||||
|
||||
onDestroy(() => {
|
||||
cleanup()
|
||||
if (errorTimer) clearTimeout(errorTimer)
|
||||
})
|
||||
</script>
|
||||
|
||||
<svelte:window
|
||||
onkeydown={(e) => { if (e.key === 'Escape') cancelRecording() }}
|
||||
onmousemove={onMouseMove}
|
||||
onmouseup={onMouseUp}
|
||||
/>
|
||||
|
||||
<!-- svelte-ignore a11y_no_static_element_interactions -->
|
||||
<div class="pill" onmousedown={onMouseDown}>
|
||||
{#if recording}
|
||||
<div class="bars">
|
||||
{#each levels as level}
|
||||
<div class="bar" style="height: {6 + level * 22}px"></div>
|
||||
{/each}
|
||||
</div>
|
||||
<span class="time">{formatDuration(duration)}</span>
|
||||
{:else if transcribing}
|
||||
<div class="loader"></div>
|
||||
{:else if errorMsg}
|
||||
<span class="err">{errorMsg}</span>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
<style>
|
||||
@font-face {
|
||||
font-family: 'Archivo';
|
||||
src: url('../lib/assets/fonts/Archivo-Variable.ttf');
|
||||
font-display: swap;
|
||||
}
|
||||
:global(*) { margin: 0; padding: 0; box-sizing: border-box; }
|
||||
:global(html), :global(body), :global(#app) {
|
||||
height: 100%; width: 100%;
|
||||
background: transparent;
|
||||
overflow: hidden;
|
||||
user-select: none;
|
||||
-webkit-font-smoothing: antialiased;
|
||||
}
|
||||
|
||||
.pill {
|
||||
position: absolute;
|
||||
top: 50%; left: 50%;
|
||||
transform: translate(-50%, -50%);
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: 12px;
|
||||
padding: 0 20px;
|
||||
height: 44px;
|
||||
border-radius: 22px;
|
||||
cursor: grab;
|
||||
font-family: 'Archivo', -apple-system, BlinkMacSystemFont, system-ui, sans-serif;
|
||||
animation: appear 0.15s ease-out;
|
||||
|
||||
background: rgba(30, 30, 30, 0.78);
|
||||
backdrop-filter: blur(40px) saturate(1.8);
|
||||
-webkit-backdrop-filter: blur(40px) saturate(1.8);
|
||||
border: 0.5px solid rgba(255, 255, 255, 0.12);
|
||||
box-shadow:
|
||||
0 2px 12px rgba(0, 0, 0, 0.35),
|
||||
inset 0 0.5px 0 rgba(255, 255, 255, 0.06);
|
||||
}
|
||||
|
||||
.pill:active { cursor: grabbing; }
|
||||
|
||||
@keyframes appear {
|
||||
from { opacity: 0; transform: translate(-50%, -50%) scale(0.92); }
|
||||
to { opacity: 1; transform: translate(-50%, -50%) scale(1); }
|
||||
}
|
||||
|
||||
.bars {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 3px;
|
||||
height: 28px;
|
||||
}
|
||||
|
||||
.bar {
|
||||
width: 4px;
|
||||
border-radius: 99px;
|
||||
background: #fff;
|
||||
opacity: 0.9;
|
||||
transition: height 60ms ease-out;
|
||||
min-height: 6px;
|
||||
}
|
||||
|
||||
.time {
|
||||
font-size: 14px;
|
||||
font-weight: 600;
|
||||
font-variant-numeric: tabular-nums;
|
||||
color: rgba(255, 255, 255, 0.85);
|
||||
letter-spacing: 0.01em;
|
||||
}
|
||||
|
||||
.loader {
|
||||
width: 16px;
|
||||
height: 16px;
|
||||
border: 2px solid rgba(255, 255, 255, 0.15);
|
||||
border-top-color: rgba(255, 255, 255, 0.8);
|
||||
border-radius: 50%;
|
||||
animation: spin 0.7s linear infinite;
|
||||
}
|
||||
|
||||
@keyframes spin {
|
||||
to { transform: rotate(360deg); }
|
||||
}
|
||||
|
||||
.err {
|
||||
font-size: 12px;
|
||||
font-weight: 500;
|
||||
color: #ff6b6b;
|
||||
}
|
||||
</style>
|
||||
@@ -120,6 +120,13 @@
|
||||
if (event.channel === 'webview:send') {
|
||||
const requestData = event.args?.[0]
|
||||
if (!requestData) return
|
||||
|
||||
// Handle auth token relay from webview
|
||||
if (requestData.type === 'token:update' && requestData.token) {
|
||||
window.electronAPI.setAuthToken?.(requestData.token)
|
||||
return
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await window.electronAPI[requestData.type]?.(requestData)
|
||||
if (requestData._requestId) {
|
||||
|
||||
@@ -99,6 +99,12 @@
|
||||
let spotlightRecording = $state(false)
|
||||
let spotlightShortcutInputEl = $state<HTMLButtonElement | null>(null)
|
||||
|
||||
// Voice input shortcut recorder
|
||||
let voiceInputShortcutValue = $state('')
|
||||
let voiceInputRecording = $state(false)
|
||||
let voiceInputShortcutInputEl = $state<HTMLButtonElement | null>(null)
|
||||
let voiceInputEnabled = $state(true)
|
||||
|
||||
// Keep shortcut value in sync with config store
|
||||
$effect(() => {
|
||||
if ($config?.globalShortcut !== undefined) {
|
||||
@@ -112,6 +118,15 @@
|
||||
}
|
||||
})
|
||||
|
||||
$effect(() => {
|
||||
if ($config?.voiceInputShortcut !== undefined) {
|
||||
voiceInputShortcutValue = $config.voiceInputShortcut ?? ''
|
||||
}
|
||||
if ($config?.voiceInputEnabled !== undefined) {
|
||||
voiceInputEnabled = $config.voiceInputEnabled ?? true
|
||||
}
|
||||
})
|
||||
|
||||
const keyToElectron = (e: KeyboardEvent): string | null => {
|
||||
const parts: string[] = []
|
||||
if (e.metaKey || e.ctrlKey) parts.push('CommandOrControl')
|
||||
@@ -122,16 +137,40 @@
|
||||
const ignore = ['Control', 'Meta', 'Alt', 'Shift']
|
||||
if (ignore.includes(e.key)) return null
|
||||
|
||||
// Map special keys
|
||||
const keyMap: Record<string, string> = {
|
||||
' ': 'Space',
|
||||
// Use e.code to get the physical key (avoids macOS Alt producing unicode like √ for V)
|
||||
const codeMap: Record<string, string> = {
|
||||
Space: 'Space',
|
||||
ArrowUp: 'Up',
|
||||
ArrowDown: 'Down',
|
||||
ArrowLeft: 'Left',
|
||||
ArrowRight: 'Right',
|
||||
Enter: 'Return'
|
||||
Enter: 'Return',
|
||||
Backquote: '`',
|
||||
Minus: '-',
|
||||
Equal: '=',
|
||||
BracketLeft: '[',
|
||||
BracketRight: ']',
|
||||
Backslash: '\\',
|
||||
Semicolon: ';',
|
||||
Quote: "'",
|
||||
Comma: ',',
|
||||
Period: '.',
|
||||
Slash: '/'
|
||||
}
|
||||
const key = keyMap[e.key] ?? (e.key.length === 1 ? e.key.toUpperCase() : e.key)
|
||||
|
||||
let key: string
|
||||
if (codeMap[e.code]) {
|
||||
key = codeMap[e.code]
|
||||
} else if (e.code.startsWith('Key')) {
|
||||
key = e.code.slice(3) // KeyA → A
|
||||
} else if (e.code.startsWith('Digit')) {
|
||||
key = e.code.slice(5) // Digit1 → 1
|
||||
} else if (e.code.startsWith('F') && /^F\d+$/.test(e.code)) {
|
||||
key = e.code // F1, F2, etc.
|
||||
} else {
|
||||
key = e.key.length === 1 ? e.key.toUpperCase() : e.key
|
||||
}
|
||||
|
||||
parts.push(key)
|
||||
return parts.join('+')
|
||||
}
|
||||
@@ -197,6 +236,32 @@
|
||||
config.set(await window.electronAPI.getConfig())
|
||||
}
|
||||
}
|
||||
|
||||
const handleVoiceInputShortcutKeydown = async (e: KeyboardEvent) => {
|
||||
e.preventDefault()
|
||||
e.stopPropagation()
|
||||
|
||||
if (e.key === 'Escape') {
|
||||
voiceInputRecording = false
|
||||
return
|
||||
}
|
||||
|
||||
if (e.key === 'Backspace' || e.key === 'Delete') {
|
||||
voiceInputShortcutValue = ''
|
||||
voiceInputRecording = false
|
||||
await window.electronAPI.setConfig({ voiceInputShortcut: '' })
|
||||
config.set(await window.electronAPI.getConfig())
|
||||
return
|
||||
}
|
||||
|
||||
const accel = keyToElectron(e)
|
||||
if (accel) {
|
||||
voiceInputShortcutValue = accel
|
||||
voiceInputRecording = false
|
||||
await window.electronAPI.setConfig({ voiceInputShortcut: accel })
|
||||
config.set(await window.electronAPI.getConfig())
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<div class="flex flex-col divide-y divide-white/[0.04]">
|
||||
@@ -412,6 +477,78 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="py-4 flex items-center justify-between">
|
||||
<div>
|
||||
<div class="text-[13px] opacity-70">Voice Input</div>
|
||||
<div class="text-[11px] opacity-25 mt-0.5">Enable global push-to-talk voice transcription</div>
|
||||
</div>
|
||||
<Switch
|
||||
checked={voiceInputEnabled}
|
||||
label="Toggle voice input"
|
||||
onchange={async (value) => {
|
||||
voiceInputEnabled = value
|
||||
await window.electronAPI.setConfig({ voiceInputEnabled: value })
|
||||
config.set(await window.electronAPI.getConfig())
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{#if voiceInputEnabled}
|
||||
<div class="py-4 flex items-center justify-between">
|
||||
<div>
|
||||
<div class="text-[13px] opacity-70">Voice Input Shortcut</div>
|
||||
<div class="text-[11px] opacity-25 mt-0.5">
|
||||
{#if voiceInputRecording}
|
||||
Press a key combination…
|
||||
{:else}
|
||||
Toggle microphone recording from anywhere
|
||||
{/if}
|
||||
</div>
|
||||
</div>
|
||||
<div class="flex items-center gap-1.5">
|
||||
<button
|
||||
bind:this={voiceInputShortcutInputEl}
|
||||
class="text-[12px] px-3 py-1.5 border-none outline-none rounded-xl transition min-w-[80px] text-center
|
||||
{voiceInputRecording
|
||||
? 'bg-black/[0.08] dark:bg-white/[0.10] text-[#1d1d1f] dark:text-[#fafafa] opacity-80 animate-pulse'
|
||||
: 'bg-black/[0.04] dark:bg-white/[0.06] text-[#1d1d1f] dark:text-[#fafafa] opacity-60 hover:opacity-80'}"
|
||||
onclick={() => {
|
||||
voiceInputRecording = true
|
||||
voiceInputShortcutInputEl?.focus()
|
||||
}}
|
||||
onkeydown={(e) => {
|
||||
if (voiceInputRecording) handleVoiceInputShortcutKeydown(e)
|
||||
}}
|
||||
onblur={() => {
|
||||
voiceInputRecording = false
|
||||
}}
|
||||
>
|
||||
{#if voiceInputRecording}
|
||||
<span class="text-[11px]">Press keys…</span>
|
||||
{:else if voiceInputShortcutValue}
|
||||
{displayShortcut(voiceInputShortcutValue)}
|
||||
{:else}
|
||||
<span class="opacity-40">Disabled</span>
|
||||
{/if}
|
||||
</button>
|
||||
{#if voiceInputShortcutValue && !voiceInputRecording}
|
||||
<button
|
||||
class="opacity-20 hover:opacity-50 transition bg-transparent border-none text-[#1d1d1f] dark:text-[#fafafa] p-0.5 shrink-0"
|
||||
onclick={async () => {
|
||||
voiceInputShortcutValue = ''
|
||||
await window.electronAPI.setConfig({ voiceInputShortcut: '' })
|
||||
config.set(await window.electronAPI.getConfig())
|
||||
}}
|
||||
>
|
||||
<svg class="w-3 h-3" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="1.5">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" d="M6 18L18 6M6 6l12 12" />
|
||||
</svg>
|
||||
</button>
|
||||
{/if}
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
<!-- Advanced (collapsed by default) -->
|
||||
<div class="py-4">
|
||||
<button
|
||||
|
||||
@@ -0,0 +1,8 @@
|
||||
import { mount } from 'svelte'
|
||||
import VoiceInput from './components/VoiceInput.svelte'
|
||||
|
||||
const app = mount(VoiceInput, {
|
||||
target: document.getElementById('app')!
|
||||
})
|
||||
|
||||
export default app
|
||||
@@ -0,0 +1,15 @@
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<title>Open WebUI – Voice Input</title>
|
||||
<meta
|
||||
http-equiv="Content-Security-Policy"
|
||||
content="default-src 'self'; script-src 'self'; style-src 'self' 'unsafe-inline'; img-src 'self' data:; connect-src 'self'"
|
||||
/>
|
||||
</head>
|
||||
<body>
|
||||
<div id="app"></div>
|
||||
<script type="module" src="/src/voice-input-main.ts"></script>
|
||||
</body>
|
||||
</html>
|
||||
Reference in New Issue
Block a user