fix: Mikrofon-Fehlerbehandlung — UI friert auf NixOS nicht mehr ein [appimage]
All checks were successful
Build AppImage / build (push) Successful in 8m39s
All checks were successful
Build AppImage / build (push) Successful in 8m39s
Problem: Wenn auf NixOS der WebKit-Audio-Stack unvollständig ist
(fehlendes gst-plugin-pipewire, pipewire-pulse, Policy-Datei oder
whisper-cli/piper Binary), hängt die App fest ohne Fehlermeldung —
weder getUserMedia noch die Backend-Prozesse reagieren.
Frontend (VoicePanel.svelte):
- Preflight: prüft ob navigator.mediaDevices überhaupt existiert
- getUserMedia via Promise.race gegen 8s-Timeout (sonst hängt es ewig)
- Kategorisierte Fehler: NotAllowedError → Berechtigung,
NotFoundError → keine Hardware, NotReadableError → PipeWire-Problem
- Neuer 'connecting'-State mit 🔌-Icon — User sieht dass was passiert
- AudioContext-Konstruktor in try/catch
Backend (voice.rs):
- ffmpeg: 20s-Timeout + spezifische Fehlermeldung bei fehlendem Binary
- whisper-cli: 60s-Timeout, kein stilles Hängen mehr
- piper-tts: 30s-Timeout, Spawn-Fehler benennt NixOS-Wrapper
- Temp-Dateien werden bei Timeout auch aufgeräumt
This commit is contained in:
parent
384888ccd8
commit
61541098d7
2 changed files with 137 additions and 35 deletions
|
|
@ -5,9 +5,21 @@
|
||||||
use base64::{Engine as _, engine::general_purpose::STANDARD as BASE64};
|
use base64::{Engine as _, engine::general_purpose::STANDARD as BASE64};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::process::Stdio;
|
use std::process::Stdio;
|
||||||
|
use std::time::Duration;
|
||||||
use tokio::process::Command as TokioCommand;
|
use tokio::process::Command as TokioCommand;
|
||||||
use tokio::io::AsyncWriteExt;
|
use tokio::io::AsyncWriteExt;
|
||||||
|
|
||||||
|
/// Max-Laufzeit für whisper-cli Transkription — eine Minute reicht für ~10s Audio
|
||||||
|
/// auf ggml-base. Hängt der Prozess (z.B. weil whisper-cli fehlt und bash spawnt
|
||||||
|
/// stattdessen was anderes), killt der Timeout ihn statt die UI einfrieren zu lassen.
|
||||||
|
const WHISPER_TIMEOUT: Duration = Duration::from_secs(60);
|
||||||
|
|
||||||
|
/// Piper ist schneller als Whisper — 30s ist bereits extrem großzügig für ~600 Zeichen.
|
||||||
|
const PIPER_TIMEOUT: Duration = Duration::from_secs(30);
|
||||||
|
|
||||||
|
/// ffmpeg-Konvertierung sollte in Sekunden fertig sein.
|
||||||
|
const FFMPEG_TIMEOUT: Duration = Duration::from_secs(20);
|
||||||
|
|
||||||
/// Pfade zu den lokalen Binaries (werden in shell.nix bereitgestellt)
|
/// Pfade zu den lokalen Binaries (werden in shell.nix bereitgestellt)
|
||||||
fn whisper_binary() -> String {
|
fn whisper_binary() -> String {
|
||||||
std::env::var("WHISPER_CPP_PATH")
|
std::env::var("WHISPER_CPP_PATH")
|
||||||
|
|
@ -183,22 +195,32 @@ pub async fn transcribe_audio(
|
||||||
// Falls nicht WAV: mit ffmpeg konvertieren (WebM → WAV 16kHz mono)
|
// Falls nicht WAV: mit ffmpeg konvertieren (WebM → WAV 16kHz mono)
|
||||||
let wav_path = if format != "wav" {
|
let wav_path = if format != "wav" {
|
||||||
let wav_path = tmp_dir.join("claude-voice-input.wav");
|
let wav_path = tmp_dir.join("claude-voice-input.wav");
|
||||||
let ffmpeg_result = TokioCommand::new("ffmpeg")
|
let ffmpeg_fut = TokioCommand::new("ffmpeg")
|
||||||
.args(["-y", "-i"])
|
.args(["-y", "-i"])
|
||||||
.arg(&input_path)
|
.arg(&input_path)
|
||||||
.args(["-ar", "16000", "-ac", "1", "-f", "wav"])
|
.args(["-ar", "16000", "-ac", "1", "-f", "wav"])
|
||||||
.arg(&wav_path)
|
.arg(&wav_path)
|
||||||
.stdout(Stdio::null())
|
.stdout(Stdio::null())
|
||||||
.stderr(Stdio::null())
|
.stderr(Stdio::null())
|
||||||
.status()
|
.status();
|
||||||
.await;
|
|
||||||
|
|
||||||
match ffmpeg_result {
|
match tokio::time::timeout(FFMPEG_TIMEOUT, ffmpeg_fut).await {
|
||||||
Ok(status) if status.success() => wav_path,
|
Ok(Ok(status)) if status.success() => wav_path,
|
||||||
_ => {
|
Ok(Ok(_)) => {
|
||||||
println!("⚠️ ffmpeg Konvertierung fehlgeschlagen, versuche direkt...");
|
println!("⚠️ ffmpeg Konvertierung fehlgeschlagen (Exit != 0), versuche direkt...");
|
||||||
input_path.clone()
|
input_path.clone()
|
||||||
}
|
}
|
||||||
|
Ok(Err(e)) => {
|
||||||
|
let _ = tokio::fs::remove_file(&input_path).await;
|
||||||
|
return Err(format!(
|
||||||
|
"ffmpeg nicht ausführbar: {} — auf NixOS ffmpeg im Nix-Wrapper prüfen",
|
||||||
|
e
|
||||||
|
));
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
let _ = tokio::fs::remove_file(&input_path).await;
|
||||||
|
return Err(format!("ffmpeg Timeout nach {}s", FFMPEG_TIMEOUT.as_secs()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
input_path.clone()
|
input_path.clone()
|
||||||
|
|
@ -206,7 +228,7 @@ pub async fn transcribe_audio(
|
||||||
|
|
||||||
// whisper-cli ausführen (Datei als letztes Argument, kein --file Flag)
|
// whisper-cli ausführen (Datei als letztes Argument, kein --file Flag)
|
||||||
let model = whisper_model_path();
|
let model = whisper_model_path();
|
||||||
let output = TokioCommand::new(&whisper_binary())
|
let whisper_fut = TokioCommand::new(&whisper_binary())
|
||||||
.args([
|
.args([
|
||||||
"--model", &model,
|
"--model", &model,
|
||||||
"--language", "de",
|
"--language", "de",
|
||||||
|
|
@ -217,9 +239,27 @@ pub async fn transcribe_audio(
|
||||||
.arg(&wav_path)
|
.arg(&wav_path)
|
||||||
.stdout(Stdio::piped())
|
.stdout(Stdio::piped())
|
||||||
.stderr(Stdio::piped())
|
.stderr(Stdio::piped())
|
||||||
.output()
|
.output();
|
||||||
.await
|
|
||||||
.map_err(|e| format!("whisper-cli ausführen fehlgeschlagen: {}", e))?;
|
let output = match tokio::time::timeout(WHISPER_TIMEOUT, whisper_fut).await {
|
||||||
|
Ok(Ok(out)) => out,
|
||||||
|
Ok(Err(e)) => {
|
||||||
|
let _ = tokio::fs::remove_file(&input_path).await;
|
||||||
|
if format != "wav" { let _ = tokio::fs::remove_file(&wav_path).await; }
|
||||||
|
return Err(format!(
|
||||||
|
"whisper-cli nicht ausführbar: {} — Binary fehlt? Auf NixOS whisper-cpp im Nix-Wrapper prüfen",
|
||||||
|
e
|
||||||
|
));
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
let _ = tokio::fs::remove_file(&input_path).await;
|
||||||
|
if format != "wav" { let _ = tokio::fs::remove_file(&wav_path).await; }
|
||||||
|
return Err(format!(
|
||||||
|
"whisper-cli Timeout nach {}s — Prozess hängt, Audio zu lang oder Binary defekt",
|
||||||
|
WHISPER_TIMEOUT.as_secs()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// Aufräumen
|
// Aufräumen
|
||||||
let _ = tokio::fs::remove_file(&input_path).await;
|
let _ = tokio::fs::remove_file(&input_path).await;
|
||||||
|
|
@ -267,7 +307,10 @@ pub async fn text_to_speech(
|
||||||
.stdout(Stdio::piped())
|
.stdout(Stdio::piped())
|
||||||
.stderr(Stdio::piped())
|
.stderr(Stdio::piped())
|
||||||
.spawn()
|
.spawn()
|
||||||
.map_err(|e| format!("Piper starten fehlgeschlagen: {}", e))?;
|
.map_err(|e| format!(
|
||||||
|
"Piper starten fehlgeschlagen: {} — Binary fehlt? Auf NixOS piper-tts im Nix-Wrapper prüfen",
|
||||||
|
e
|
||||||
|
))?;
|
||||||
|
|
||||||
// Text über stdin senden
|
// Text über stdin senden
|
||||||
if let Some(mut stdin) = child.stdin.take() {
|
if let Some(mut stdin) = child.stdin.take() {
|
||||||
|
|
@ -276,8 +319,17 @@ pub async fn text_to_speech(
|
||||||
drop(stdin); // EOF senden
|
drop(stdin); // EOF senden
|
||||||
}
|
}
|
||||||
|
|
||||||
let output = child.wait_with_output().await
|
// Timeout: hängt Piper (z.B. Modell-Mismatch), killen statt UI einfrieren.
|
||||||
.map_err(|e| format!("Piper Fehler: {}", e))?;
|
let output = match tokio::time::timeout(PIPER_TIMEOUT, child.wait_with_output()).await {
|
||||||
|
Ok(Ok(out)) => out,
|
||||||
|
Ok(Err(e)) => return Err(format!("Piper Fehler: {}", e)),
|
||||||
|
Err(_) => {
|
||||||
|
return Err(format!(
|
||||||
|
"Piper Timeout nach {}s — Prozess hängt. Text zu lang oder Modell defekt?",
|
||||||
|
PIPER_TIMEOUT.as_secs()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
if output.stdout.is_empty() {
|
if output.stdout.is_empty() {
|
||||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,7 @@
|
||||||
}
|
}
|
||||||
|
|
||||||
// === Gesprächs-Zustand ===
|
// === Gesprächs-Zustand ===
|
||||||
type ConversationState = 'idle' | 'listening' | 'transcribing' | 'waiting' | 'speaking';
|
type ConversationState = 'idle' | 'connecting' | 'listening' | 'transcribing' | 'waiting' | 'speaking';
|
||||||
let state: ConversationState = 'idle';
|
let state: ConversationState = 'idle';
|
||||||
let conversationActive = false;
|
let conversationActive = false;
|
||||||
|
|
||||||
|
|
@ -87,14 +87,17 @@
|
||||||
conversationActive = true;
|
conversationActive = true;
|
||||||
errorMsg = '';
|
errorMsg = '';
|
||||||
conversationLog = [];
|
conversationLog = [];
|
||||||
|
state = 'connecting';
|
||||||
console.log('🎙️ Gespräch gestartet');
|
console.log('🎙️ Gespräch gestartet');
|
||||||
|
|
||||||
// Mikrofon-Zugriff holen
|
// Mikrofon-Zugriff holen (mit Timeout — siehe initMicrophone)
|
||||||
try {
|
try {
|
||||||
await initMicrophone();
|
await initMicrophone();
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
errorMsg = `Mikrofon-Fehler: ${err instanceof Error ? err.message : err}`;
|
errorMsg = err instanceof Error ? err.message : String(err);
|
||||||
conversationActive = false;
|
conversationActive = false;
|
||||||
|
state = 'idle';
|
||||||
|
cleanupAudio();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -112,32 +115,77 @@
|
||||||
}
|
}
|
||||||
|
|
||||||
async function initMicrophone() {
|
async function initMicrophone() {
|
||||||
// Mikrofon-Zugriff mit Fallback-Kette
|
// Preflight: API überhaupt vorhanden? Auf NixOS kann WebKit ohne korrekte
|
||||||
|
// gst-plugins/PipeWire-Einbindung `mediaDevices` nicht exposen.
|
||||||
|
if (!navigator.mediaDevices || typeof navigator.mediaDevices.getUserMedia !== 'function') {
|
||||||
|
throw new Error(
|
||||||
|
'Mikrofon-API nicht verfügbar. Auf NixOS: PipeWire + gst-plugins-pipewire im Nix-Wrapper prüfen. ' +
|
||||||
|
'Siehe nix/default.nix → runtimeDeps.'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mikrofon-Zugriff MIT Timeout — getUserMedia kann auf NixOS hängen statt zu
|
||||||
|
// rejecten wenn der Audio-Backend-Stack kaputt ist. Race gegen 8s-Timeout.
|
||||||
|
const getMic = async (constraints: MediaStreamConstraints): Promise<MediaStream> => {
|
||||||
|
const timeout = new Promise<MediaStream>((_, reject) =>
|
||||||
|
setTimeout(() => reject(new Error('__MIC_TIMEOUT__')), 8000)
|
||||||
|
);
|
||||||
|
return Promise.race([navigator.mediaDevices.getUserMedia(constraints), timeout]);
|
||||||
|
};
|
||||||
|
|
||||||
try {
|
try {
|
||||||
mediaStream = await navigator.mediaDevices.getUserMedia({
|
mediaStream = await getMic({
|
||||||
audio: { echoCancellation: true, noiseSuppression: true, sampleRate: 16000 }
|
audio: { echoCancellation: true, noiseSuppression: true, sampleRate: 16000 }
|
||||||
});
|
});
|
||||||
} catch {
|
} catch (err1: any) {
|
||||||
|
if (err1?.message === '__MIC_TIMEOUT__') {
|
||||||
|
throw new Error(
|
||||||
|
'Mikrofon antwortet nicht (Timeout 8s). Typisch für NixOS wenn gst-plugin-pipewire, ' +
|
||||||
|
'pipewire-pulse oder eine Policy-Datei fehlt. Prüfen: `pactl info` im Terminal.'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// Fallback: einfachere Constraints
|
||||||
try {
|
try {
|
||||||
mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
mediaStream = await getMic({ audio: true });
|
||||||
} catch {
|
} catch (err2: any) {
|
||||||
const devices = await navigator.mediaDevices.enumerateDevices();
|
if (err2?.message === '__MIC_TIMEOUT__') {
|
||||||
const mic = devices.find(d => d.kind === 'audioinput');
|
throw new Error('Mikrofon antwortet nicht (Timeout 8s). Audio-Backend hängt.');
|
||||||
if (mic) {
|
}
|
||||||
mediaStream = await navigator.mediaDevices.getUserMedia({
|
// Kategorisierte Diagnose
|
||||||
audio: { deviceId: { exact: mic.deviceId } }
|
const name = err2?.name || err1?.name || '';
|
||||||
});
|
switch (name) {
|
||||||
} else {
|
case 'NotAllowedError':
|
||||||
throw new Error('Kein Mikrofon gefunden. Unter WebKitGTK (Tauri/Linux) wird PipeWire oder PulseAudio mit gst-plugin-pipewire benötigt.');
|
case 'SecurityError':
|
||||||
|
throw new Error('Mikrofon-Zugriff verweigert. Berechtigung im System-Setting prüfen.');
|
||||||
|
case 'NotFoundError':
|
||||||
|
case 'OverconstrainedError':
|
||||||
|
throw new Error('Kein Mikrofon gefunden. Hardware angeschlossen? `arecord -l` prüft Geräte.');
|
||||||
|
case 'NotReadableError':
|
||||||
|
case 'AbortError':
|
||||||
|
throw new Error(
|
||||||
|
'Mikrofon blockiert (evtl. durch anderes Programm). Oder PipeWire/Pulse ' +
|
||||||
|
'nicht erreichbar. `systemctl --user status pipewire` prüfen.'
|
||||||
|
);
|
||||||
|
default:
|
||||||
|
throw new Error(
|
||||||
|
`Mikrofon-Fehler: ${err2?.message || err1?.message || name || 'unbekannt'}`
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AudioContext ebenfalls mit Defensive — Konstruktor kann bei kaputtem
|
||||||
|
// WebAudio-Backend werfen.
|
||||||
|
try {
|
||||||
audioContext = new AudioContext();
|
audioContext = new AudioContext();
|
||||||
const source = audioContext.createMediaStreamSource(mediaStream);
|
const source = audioContext.createMediaStreamSource(mediaStream);
|
||||||
analyser = audioContext.createAnalyser();
|
analyser = audioContext.createAnalyser();
|
||||||
analyser.fftSize = 2048;
|
analyser.fftSize = 2048;
|
||||||
source.connect(analyser);
|
source.connect(analyser);
|
||||||
|
} catch (err: any) {
|
||||||
|
cleanupAudio();
|
||||||
|
throw new Error(`AudioContext-Fehler: ${err?.message || err}. WebAudio-Support in WebKitGTK prüfen.`);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function startListening() {
|
function startListening() {
|
||||||
|
|
@ -438,6 +486,7 @@
|
||||||
|
|
||||||
function getStateLabel(): string {
|
function getStateLabel(): string {
|
||||||
switch (state) {
|
switch (state) {
|
||||||
|
case 'connecting': return 'Mikrofon verbinden…';
|
||||||
case 'listening': return 'Höre zu…';
|
case 'listening': return 'Höre zu…';
|
||||||
case 'transcribing': return 'Transkribiere…';
|
case 'transcribing': return 'Transkribiere…';
|
||||||
case 'waiting': return 'Claude denkt nach…';
|
case 'waiting': return 'Claude denkt nach…';
|
||||||
|
|
@ -448,6 +497,7 @@
|
||||||
|
|
||||||
function getStateIcon(): string {
|
function getStateIcon(): string {
|
||||||
switch (state) {
|
switch (state) {
|
||||||
|
case 'connecting': return '🔌';
|
||||||
case 'listening': return '🎤';
|
case 'listening': return '🎤';
|
||||||
case 'transcribing': return '⏳';
|
case 'transcribing': return '⏳';
|
||||||
case 'waiting': return '🤔';
|
case 'waiting': return '🤔';
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue