From f287514af59a73b57faab3974a8b67422c627a0e Mon Sep 17 00:00:00 2001
From: Eddy <eddy@alles-watt-laeuft.de>
Date: Tue, 21 Apr 2026 08:48:03 +0200
Subject: [PATCH] =?UTF-8?q?Phase=202.2:=20Lokales=20Voice=20=E2=80=94=20wh?=
 =?UTF-8?q?isper-cli=20+=20piper-tts,=20Gespr=C3=A4chsmodus=20mit=20VAD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

OpenAI komplett entfernt. Voice läuft jetzt offline mit lokalen Binaries:
- whisper-cli (whisper-cpp 1.8.3) für Speech-to-Text
- piper-tts mit Thorsten-Stimme (Deutsch) für Text-to-Speech
- GStreamer + PipeWire in shell.nix für WebKitGTK Mikrofon-Zugriff
- VoicePanel: Echtgespräch mit VAD-Stille-Erkennung, Interrupt, Loop
- Models in .gitignore (~250MB)

[appimage]

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .gitignore                           |    4 +
 shell.nix                            |   28 +-
 src-tauri/Cargo.lock                 |    1 +
 src-tauri/src/voice.rs               |  368 ++++++---
 src/lib/components/VoicePanel.svelte | 1137 +++++++++++++-------------
 5 files changed, 856 insertions(+), 682 deletions(-)

diff --git a/.gitignore b/.gitignore
index 44b12bc..e2c45d4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,6 +25,10 @@ result-*
 .env
 .env.local
 
+# Voice-Modelle (zu groß für Git, ~250MB)
+models/*.bin
+models/*.onnx
+
 # OS
 .DS_Store
 Thumbs.db
diff --git a/shell.nix b/shell.nix
index 13960cf..6037934 100644
--- a/shell.nix
+++ b/shell.nix
@@ -30,10 +30,20 @@ pkgs.mkShell {
     # Node.js (falls nicht global)
     nodejs_22
 
-    # Für Audio (Whisper/TTS später)
+    # Phase 2.2: Lokales Voice (kostenlos, offline)
+    whisper-cpp    # Speech-to-Text (whisper-cli Binary)
+    piper-tts      # Text-to-Speech (deutsche Stimme: Thorsten)
     alsa-lib
     ffmpeg
 
+    # GStreamer für WebKitGTK Mikrofon-Zugriff (getUserMedia)
+    gst_all_1.gstreamer
+    gst_all_1.gst-plugins-base
+    gst_all_1.gst-plugins-good
+    gst_all_1.gst-plugins-bad
+    gst_all_1.gst-plugins-ugly
+    pipewire       # PipeWire-Support für Audio-Capture
+
     # Zusätzliche Bibliotheken für Tauri CLI
     bzip2
     zlib
@@ -58,11 +68,27 @@ pkgs.mkShell {
       pkgs.xz
       pkgs.zstd
       pkgs.openssl
+      pkgs.gst_all_1.gstreamer
+      pkgs.gst_all_1.gst-plugins-base
+      pkgs.gst_all_1.gst-plugins-good
+      pkgs.gst_all_1.gst-plugins-bad
+      pkgs.pipewire
+      pkgs.alsa-lib
     ]}:$LD_LIBRARY_PATH"
 
+    # GStreamer Plugin-Pfade für WebKitGTK Mikrofon-Zugriff
+    export GST_PLUGIN_PATH="${pkgs.lib.makeSearchPathOutput "lib" "lib/gstreamer-1.0" [
+      pkgs.gst_all_1.gstreamer
+      pkgs.gst_all_1.gst-plugins-base
+      pkgs.gst_all_1.gst-plugins-good
+      pkgs.gst_all_1.gst-plugins-bad
+      pkgs.gst_all_1.gst-plugins-ugly
+    ]}''${GST_PLUGIN_PATH:+:$GST_PLUGIN_PATH}"
+
     echo "🦀 Claude Desktop Entwicklungsumgebung geladen"
     echo "   Rust: $(rustc --version 2>/dev/null || echo 'nicht gefunden')"
     echo "   Cargo: $(cargo --version 2>/dev/null || echo 'nicht gefunden')"
     echo "   Node: $(node --version 2>/dev/null || echo 'nicht gefunden')"
+    echo "   GStreamer: $(gst-inspect-1.0 --version 2>/dev/null | head -1 || echo 'nicht gefunden')"
   '';
 }
diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock
index 15a077c..f8419cd 100644
--- a/src-tauri/Cargo.lock
+++ b/src-tauri/Cargo.lock
@@ -494,6 +494,7 @@ dependencies = [
  "tokio",
  "tokio-tungstenite",
  "uuid",
+ "webkit2gtk",
 ]
 
 [[package]]
diff --git a/src-tauri/src/voice.rs b/src-tauri/src/voice.rs
index e16af6a..2c072e4 100644
--- a/src-tauri/src/voice.rs
+++ b/src-tauri/src/voice.rs
@@ -1,173 +1,295 @@
 // Claude Desktop — Voice Interface
-// Speech-to-Text mit Whisper API, Text-to-Speech mit OpenAI TTS
+// Phase 2.2: Lokales Whisper (whisper-cli) + Piper-TTS — komplett offline, kostenlos
+// Gesprächs-Modus: Kontinuierlich zuhören, unterbrechen, antworten
 
 use base64::{Engine as _, engine::general_purpose::STANDARD as BASE64};
 use serde::{Deserialize, Serialize};
+use std::process::Stdio;
+use tokio::process::Command as TokioCommand;
+use tokio::io::AsyncWriteExt;
 
-/// Whisper API Konfiguration
-const OPENAI_API_URL: &str = "https://api.openai.com/v1/audio/transcriptions";
-const TTS_API_URL: &str = "https://api.openai.com/v1/audio/speech";
+/// Pfade zu den lokalen Binaries (werden in shell.nix bereitgestellt)
+fn whisper_binary() -> String {
+    std::env::var("WHISPER_CPP_PATH")
+        .unwrap_or_else(|_| "whisper-cli".to_string())
+}
 
-/// Transkriptions-Ergebnis
+fn piper_binary() -> String {
+    std::env::var("PIPER_TTS_PATH")
+        .unwrap_or_else(|_| "piper".to_string())
+}
+
+/// Modell-Pfade — relativ zum Executable oder absolut
+fn whisper_model_path() -> String {
+    std::env::var("WHISPER_MODEL")
+        .unwrap_or_else(|_| {
+            let exe_dir = std::env::current_exe()
+                .ok()
+                .and_then(|p| p.parent().map(|p| p.to_path_buf()));
+
+            let candidates = vec![
+                exe_dir.as_ref().map(|d| d.join("../models/ggml-base.bin")),
+                exe_dir.as_ref().map(|d| d.join("models/ggml-base.bin")),
+                Some(std::path::PathBuf::from("models/ggml-base.bin")),
+            ];
+
+            candidates.into_iter()
+                .flatten()
+                .find(|p| p.exists())
+                .map(|p| p.to_string_lossy().to_string())
+                .unwrap_or_else(|| "models/ggml-base.bin".to_string())
+        })
+}
+
+fn piper_model_path() -> String {
+    std::env::var("PIPER_MODEL")
+        .unwrap_or_else(|_| {
+            let exe_dir = std::env::current_exe()
+                .ok()
+                .and_then(|p| p.parent().map(|p| p.to_path_buf()));
+
+            let candidates = vec![
+                exe_dir.as_ref().map(|d| d.join("../models/de_DE-thorsten-high.onnx")),
+                exe_dir.as_ref().map(|d| d.join("models/de_DE-thorsten-high.onnx")),
+                Some(std::path::PathBuf::from("models/de_DE-thorsten-high.onnx")),
+            ];
+
+            candidates.into_iter()
+                .flatten()
+                .find(|p| p.exists())
+                .map(|p| p.to_string_lossy().to_string())
+                .unwrap_or_else(|| "models/de_DE-thorsten-high.onnx".to_string())
+        })
+}
+
+/// Voice-System Status
 #[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct TranscriptionResult {
-    pub text: String,
-    pub language: Option<String>,
-    pub duration: Option<f64>,
+pub struct VoiceStatus {
+    pub whisper_available: bool,
+    pub piper_available: bool,
+    pub whisper_model: String,
+    pub piper_model: String,
+    pub openai_available: bool,
 }
 
-/// TTS-Stimmen
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub enum TtsVoice {
-    Alloy,
-    Echo,
-    Fable,
-    Onyx,
-    Nova,
-    Shimmer,
+/// Prüft ob die lokalen Voice-Tools verfügbar sind
+#[tauri::command]
+pub async fn check_voice_availability() -> Result<VoiceStatus, String> {
+    // whisper-cli prüfen
+    let whisper_ok = TokioCommand::new(&whisper_binary())
+        .arg("--help")
+        .stdout(Stdio::null())
+        .stderr(Stdio::null())
+        .status()
+        .await
+        .map(|s| s.success())
+        .unwrap_or(false);
+
+    // Modell-Datei prüfen
+    let whisper_model = whisper_model_path();
+    let whisper_model_ok = std::path::Path::new(&whisper_model).exists();
+
+    // Piper prüfen
+    let piper_ok = TokioCommand::new(&piper_binary())
+        .arg("--help")
+        .stdout(Stdio::null())
+        .stderr(Stdio::null())
+        .status()
+        .await
+        .map(|_| true)  // piper --help gibt exit 0 oder 1, beides OK
+        .unwrap_or(false);
+
+    let piper_model = piper_model_path();
+    let piper_model_ok = std::path::Path::new(&piper_model).exists();
+
+    // OpenAI API Key (Fallback)
+    let openai_available = std::env::var("OPENAI_API_KEY")
+        .map(|k| !k.is_empty())
+        .unwrap_or(false);
+
+    let status = VoiceStatus {
+        whisper_available: whisper_ok && whisper_model_ok,
+        piper_available: piper_ok && piper_model_ok,
+        whisper_model: if whisper_model_ok { whisper_model } else { "Nicht gefunden".to_string() },
+        piper_model: if piper_model_ok { piper_model } else { "Nicht gefunden".to_string() },
+        openai_available,
+    };
+
+    println!("🎤 Voice-Status: Whisper={}, Piper={}, OpenAI={}",
+        status.whisper_available, status.piper_available, status.openai_available);
+
+    Ok(status)
 }
 
-impl TtsVoice {
-    fn as_str(&self) -> &str {
-        match self {
-            TtsVoice::Alloy => "alloy",
-            TtsVoice::Echo => "echo",
-            TtsVoice::Fable => "fable",
-            TtsVoice::Onyx => "onyx",
-            TtsVoice::Nova => "nova",
-            TtsVoice::Shimmer => "shimmer",
-        }
-    }
-}
-
-/// Holt den OpenAI API Key aus Umgebungsvariable oder Settings
-fn get_openai_key() -> Result<String, String> {
-    // Erst Umgebungsvariable prüfen
-    if let Ok(key) = std::env::var("OPENAI_API_KEY") {
-        if !key.is_empty() {
-            return Ok(key);
-        }
-    }
-
-    // Alternativ: Aus Settings laden (TODO)
-    Err("OpenAI API Key nicht gefunden. Setze OPENAI_API_KEY Umgebungsvariable.".to_string())
-}
-
-/// Transkribiert Audio mit OpenAI Whisper API
+/// Transkribiert Audio mit lokalem whisper-cli
+/// Audio kommt als Base64-kodiertes WAV vom Frontend
 #[tauri::command]
 pub async fn transcribe_audio(
     audio_base64: String,
     format: String,
 ) -> Result<String, String> {
-    let api_key = get_openai_key()?;
-
-    // Base64 dekodieren
     let audio_bytes = BASE64.decode(&audio_base64)
         .map_err(|e| format!("Base64-Dekodierung fehlgeschlagen: {}", e))?;
 
-    // Temporäre Datei erstellen (Whisper API braucht Datei-Upload)
-    // Multipart-Request an Whisper API — direkt aus dem Byte-Buffer
-    let client = reqwest::Client::new();
+    // Temporäre WAV-Datei schreiben (whisper-cli braucht eine Datei)
+    let tmp_dir = std::env::temp_dir();
+    let input_path = tmp_dir.join(format!("claude-voice-input.{}", format));
 
-    let file_part = reqwest::multipart::Part::bytes(audio_bytes)
-        .file_name(format!("audio.{}", format))
-        .mime_str(&format!("audio/{}", format))
-        .map_err(|e| format!("MIME-Type fehlgeschlagen: {}", e))?;
-
-    let form = reqwest::multipart::Form::new()
-        .part("file", file_part)
-        .text("model", "whisper-1")
-        .text("language", "de")  // Deutsch priorisieren
-        .text("response_format", "json");
-
-    let response = client
-        .post(OPENAI_API_URL)
-        .bearer_auth(&api_key)
-        .multipart(form)
-        .send()
+    tokio::fs::write(&input_path, &audio_bytes)
         .await
-        .map_err(|e| format!("API-Request fehlgeschlagen: {}", e))?;
+        .map_err(|e| format!("Temp-Datei schreiben fehlgeschlagen: {}", e))?;
 
-    if !response.status().is_success() {
-        let error_text = response.text().await.unwrap_or_default();
-        return Err(format!("Whisper API Fehler: {}", error_text));
+    // Falls nicht WAV: mit ffmpeg konvertieren (WebM → WAV 16kHz mono)
+    let wav_path = if format != "wav" {
+        let wav_path = tmp_dir.join("claude-voice-input.wav");
+        let ffmpeg_result = TokioCommand::new("ffmpeg")
+            .args(["-y", "-i"])
+            .arg(&input_path)
+            .args(["-ar", "16000", "-ac", "1", "-f", "wav"])
+            .arg(&wav_path)
+            .stdout(Stdio::null())
+            .stderr(Stdio::null())
+            .status()
+            .await;
+
+        match ffmpeg_result {
+            Ok(status) if status.success() => wav_path,
+            _ => {
+                println!("⚠️ ffmpeg Konvertierung fehlgeschlagen, versuche direkt...");
+                input_path.clone()
+            }
+        }
+    } else {
+        input_path.clone()
+    };
+
+    // whisper-cli ausführen (Datei als letztes Argument, kein --file Flag)
+    let model = whisper_model_path();
+    let output = TokioCommand::new(&whisper_binary())
+        .args([
+            "--model", &model,
+            "--language", "de",
+            "--no-timestamps",
+            "--no-prints",
+            "--threads", "4",
+        ])
+        .arg(&wav_path)
+        .stdout(Stdio::piped())
+        .stderr(Stdio::piped())
+        .output()
+        .await
+        .map_err(|e| format!("whisper-cli ausführen fehlgeschlagen: {}", e))?;
+
+    // Aufräumen
+    let _ = tokio::fs::remove_file(&input_path).await;
+    if format != "wav" {
+        let _ = tokio::fs::remove_file(&wav_path).await;
     }
 
-    // Response parsen
-    #[derive(Deserialize)]
-    struct WhisperResponse {
-        text: String,
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        return Err(format!("whisper-cli Fehler: {}", stderr));
     }
 
-    let result: WhisperResponse = response.json().await
-        .map_err(|e| format!("Response parsen fehlgeschlagen: {}", e))?;
+    // Transkription parsen — whisper-cli gibt Text auf stdout
+    let text = String::from_utf8_lossy(&output.stdout)
+        .lines()
+        .filter(|l| !l.trim().is_empty())
+        .map(|l| l.trim().to_string())
+        .collect::<Vec<_>>()
+        .join(" ")
+        .trim()
+        .to_string();
 
-    println!("🎤 Transkription: \"{}\"", result.text);
+    println!("🎤 Transkription (lokal): \"{}\"", text);
 
-    Ok(result.text)
+    Ok(text)
 }
 
-/// Text-to-Speech mit OpenAI TTS API
+/// Text-to-Speech mit lokalem Piper-TTS
+/// Gibt Base64-kodierten WAV-Audio zurück
 #[tauri::command]
 pub async fn text_to_speech(
     text: String,
-    voice: Option<String>,
+    _voice: Option<String>,  // Wird für Piper ignoriert (Modell bestimmt Stimme)
 ) -> Result<String, String> {
-    let api_key = get_openai_key()?;
+    let model = piper_model_path();
 
-    let voice_name = voice.unwrap_or_else(|| "nova".to_string());
-
-    let client = reqwest::Client::new();
-
-    let body = serde_json::json!({
-        "model": "tts-1",
-        "input": text,
-        "voice": voice_name,
-        "response_format": "mp3"
-    });
-
-    let response = client
-        .post(TTS_API_URL)
-        .bearer_auth(&api_key)
-        .json(&body)
-        .send()
-        .await
-        .map_err(|e| format!("TTS API-Request fehlgeschlagen: {}", e))?;
-
-    if !response.status().is_success() {
-        let error_text = response.text().await.unwrap_or_default();
-        return Err(format!("TTS API Fehler: {}", error_text));
+    if !std::path::Path::new(&model).exists() {
+        return Err(format!("Piper-Modell nicht gefunden: {}", model));
     }
 
-    // Audio-Bytes als Base64 zurückgeben
-    let audio_bytes = response.bytes().await
-        .map_err(|e| format!("Audio lesen fehlgeschlagen: {}", e))?;
+    // Piper über stdin füttern, RAW PCM auf stdout
+    let mut child = TokioCommand::new(&piper_binary())
+        .args(["--model", &model, "--output-raw"])
+        .stdin(Stdio::piped())
+        .stdout(Stdio::piped())
+        .stderr(Stdio::piped())
+        .spawn()
+        .map_err(|e| format!("Piper starten fehlgeschlagen: {}", e))?;
 
-    let audio_base64 = BASE64.encode(&audio_bytes);
+    // Text über stdin senden
+    if let Some(mut stdin) = child.stdin.take() {
+        stdin.write_all(text.as_bytes()).await
+            .map_err(|e| format!("Piper stdin schreiben fehlgeschlagen: {}", e))?;
+        drop(stdin);  // EOF senden
+    }
 
-    println!("🔊 TTS generiert: {} Zeichen → {} Bytes Audio", text.len(), audio_bytes.len());
+    let output = child.wait_with_output().await
+        .map_err(|e| format!("Piper Fehler: {}", e))?;
+
+    if output.stdout.is_empty() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        return Err(format!("Piper hat kein Audio erzeugt: {}", stderr));
+    }
+
+    // Raw PCM → WAV Header hinzufügen (16-bit, 22050Hz, mono — Piper Default)
+    let pcm_data = &output.stdout;
+    let wav_data = pcm_to_wav(pcm_data, 22050, 1, 16);
+
+    let audio_base64 = BASE64.encode(&wav_data);
+
+    println!("🔊 TTS (Piper lokal): {} Zeichen → {} Bytes WAV", text.len(), wav_data.len());
 
     Ok(audio_base64)
 }
 
-/// Prüft ob Voice-Features verfügbar sind (API Key vorhanden)
-#[tauri::command]
-pub async fn check_voice_availability() -> Result<bool, String> {
-    match get_openai_key() {
-        Ok(_) => Ok(true),
-        Err(_) => Ok(false),
-    }
+/// Konvertiert Raw PCM zu WAV (mit Header)
+fn pcm_to_wav(pcm: &[u8], sample_rate: u32, channels: u16, bits_per_sample: u16) -> Vec<u8> {
+    let byte_rate = sample_rate * channels as u32 * bits_per_sample as u32 / 8;
+    let block_align = channels * bits_per_sample / 8;
+    let data_size = pcm.len() as u32;
+    let file_size = 36 + data_size;
+
+    let mut wav = Vec::with_capacity(44 + pcm.len());
+
+    // RIFF Header
+    wav.extend_from_slice(b"RIFF");
+    wav.extend_from_slice(&file_size.to_le_bytes());
+    wav.extend_from_slice(b"WAVE");
+
+    // fmt Chunk
+    wav.extend_from_slice(b"fmt ");
+    wav.extend_from_slice(&16u32.to_le_bytes());  // Chunk-Größe
+    wav.extend_from_slice(&1u16.to_le_bytes());    // PCM Format
+    wav.extend_from_slice(&channels.to_le_bytes());
+    wav.extend_from_slice(&sample_rate.to_le_bytes());
+    wav.extend_from_slice(&byte_rate.to_le_bytes());
+    wav.extend_from_slice(&block_align.to_le_bytes());
+    wav.extend_from_slice(&bits_per_sample.to_le_bytes());
+
+    // data Chunk
+    wav.extend_from_slice(b"data");
+    wav.extend_from_slice(&data_size.to_le_bytes());
+    wav.extend_from_slice(pcm);
+
+    wav
 }
 
-/// Verfügbare TTS-Stimmen
+/// Verfügbare TTS-Stimmen — bei Piper: Modell-basiert
 #[tauri::command]
 pub async fn get_tts_voices() -> Result<Vec<serde_json::Value>, String> {
     Ok(vec![
-        serde_json::json!({ "id": "alloy", "name": "Alloy", "description": "Neutral, ausgewogen" }),
-        serde_json::json!({ "id": "echo", "name": "Echo", "description": "Männlich, warm" }),
-        serde_json::json!({ "id": "fable", "name": "Fable", "description": "Expressiv, britisch" }),
-        serde_json::json!({ "id": "onyx", "name": "Onyx", "description": "Tief, autoritär" }),
-        serde_json::json!({ "id": "nova", "name": "Nova", "description": "Weiblich, freundlich" }),
-        serde_json::json!({ "id": "shimmer", "name": "Shimmer", "description": "Weiblich, sanft" }),
+        serde_json::json!({ "id": "thorsten-high", "name": "Thorsten (Deutsch)", "description": "Lokal, hohe Qualität, männlich" }),
     ])
 }
diff --git a/src/lib/components/VoicePanel.svelte b/src/lib/components/VoicePanel.svelte
index 4516ec1..32f904d 100644
--- a/src/lib/components/VoicePanel.svelte
+++ b/src/lib/components/VoicePanel.svelte
@@ -5,15 +5,25 @@
 	import { get } from 'svelte/store';
 	import { isProcessing, messages, addMessage } from '$lib/stores/app';
 
-	// Voice-Zustand
-	let isListening = false;
-	let isSpeaking = false;
-	let isVoiceAvailable = false;
-	let voiceMode: 'push-to-talk' | 'continuous' = 'push-to-talk';
-	let selectedVoice = 'nova';
-	let availableVoices: { id: string; name: string; description: string }[] = [];
+	// === Typen ===
+	interface VoiceStatus {
+		whisper_available: boolean;
+		piper_available: boolean;
+		whisper_model: string;
+		piper_model: string;
+		openai_available: boolean;
+	}
 
-	// Audio-Kontext
+	// === Gesprächs-Zustand ===
+	type ConversationState = 'idle' | 'listening' | 'transcribing' | 'waiting' | 'speaking';
+	let state: ConversationState = 'idle';
+	let conversationActive = false;
+
+	// === Voice-Status ===
+	let voiceStatus: VoiceStatus | null = null;
+	let isReady = false;
+
+	// === Audio ===
 	let audioContext: AudioContext | null = null;
 	let mediaStream: MediaStream | null = null;
 	let mediaRecorder: MediaRecorder | null = null;
@@ -21,278 +31,365 @@
 	let analyser: AnalyserNode | null = null;
 	let animationFrame: number | null = null;
 
-	// Visualisierung
-	let canvasEl: HTMLCanvasElement;
-	let volumeLevel = 0;
+	// === VAD (Voice Activity Detection) ===
+	const SILENCE_THRESHOLD = 0.03;   // RMS-Schwelle für Stille
+	const SILENCE_DURATION = 1800;    // ms Stille bevor Aufnahme endet
+	const MIN_RECORDING = 500;        // Mindest-Aufnahmedauer in ms
+	let silenceStart = 0;
+	let recordingStart = 0;
+	let currentVolume = 0;
 
-	// Transkription (live)
-	let currentTranscript = '';
+	// === Gesprächslog ===
+	interface LogEntry {
+		role: 'user' | 'assistant';
+		text: string;
+		time: string;
+	}
+	let conversationLog: LogEntry[] = [];
+	const MAX_LOG_ENTRIES = 6;
 
-	// Fehler-Anzeige
-	let micError = '';
-
-	// TTS Audio-Element
+	// === TTS ===
 	let ttsAudio: HTMLAudioElement | null = null;
 
-	// Event-Listener
+	// === Fehler ===
+	let errorMsg = '';
+
+	// === Event-Listener ===
 	let ttsListener: UnlistenFn | null = null;
 
 	onMount(async () => {
-		// Voice-Verfügbarkeit prüfen
 		try {
-			isVoiceAvailable = await invoke('check_voice_availability');
-			if (isVoiceAvailable) {
-				availableVoices = await invoke('get_tts_voices');
+			voiceStatus = await invoke<VoiceStatus>('check_voice_availability');
+			isReady = voiceStatus.whisper_available && voiceStatus.piper_available;
+
+			if (!isReady && voiceStatus) {
+				if (!voiceStatus.whisper_available) {
+					errorMsg = 'Whisper nicht verfügbar. Modell: ' + voiceStatus.whisper_model;
+				} else if (!voiceStatus.piper_available) {
+					errorMsg = 'Piper-TTS nicht verfügbar. Modell: ' + voiceStatus.piper_model;
+				}
 			}
 		} catch (err) {
-			console.warn('Voice nicht verfügbar:', err);
+			console.warn('Voice-Status Fehler:', err);
+			errorMsg = `Voice-System nicht verfügbar: ${err}`;
 		}
-
-		// TTS-Event listener
-		ttsListener = await listen<string>('tts-audio', (event) => {
-			playTtsAudio(event.payload);
-		});
 	});
 
 	onDestroy(() => {
-		stopListening();
+		stopConversation();
 		ttsListener?.();
 	});
 
-	async function startListening() {
-		if (isListening) return;
-		micError = '';
+	// === Gesprächs-Loop ===
 
+	async function startConversation() {
+		if (conversationActive) return;
+		conversationActive = true;
+		errorMsg = '';
+		conversationLog = [];
+		console.log('🎙️ Gespräch gestartet');
+
+		// Mikrofon-Zugriff holen
 		try {
-			// Mikrofon-Zugriff — zuerst mit optimalen Constraints versuchen,
-			// bei OverconstrainedError (z.B. WebKitGTK/Tauri) auf Fallback ausweichen
-			let usedFallback = false;
+			await initMicrophone();
+		} catch (err) {
+			errorMsg = `Mikrofon-Fehler: ${err instanceof Error ? err.message : err}`;
+			conversationActive = false;
+			return;
+		}
+
+		// Loop: Zuhören → Transkribieren → Claude → Sprechen → Zuhören
+		startListening();
+	}
+
+	function stopConversation() {
+		conversationActive = false;
+		stopSpeaking();
+		stopRecording();
+		cleanupAudio();
+		state = 'idle';
+		console.log('🎙️ Gespräch beendet');
+	}
+
+	async function initMicrophone() {
+		// Mikrofon-Zugriff mit Fallback-Kette
+		try {
+			mediaStream = await navigator.mediaDevices.getUserMedia({
+				audio: { echoCancellation: true, noiseSuppression: true, sampleRate: 16000 }
+			});
+		} catch {
 			try {
-				mediaStream = await navigator.mediaDevices.getUserMedia({
-					audio: {
-						echoCancellation: true,
-						noiseSuppression: true,
-						sampleRate: 16000,
-					},
-				});
-				console.log('🎤 Mikrofon mit optimalen Constraints geöffnet');
-			} catch (constraintErr) {
-				// WebKitGTK wirft diverse Fehler (OverconstrainedError, TypeError "Invalid constraint", etc.)
-				console.warn('Mikrofon-Constraints fehlgeschlagen, versuche Fallbacks:', constraintErr);
-				try {
-					mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
-				} catch (_) {
-					// Auch { audio: true } fehlgeschlagen — versuche explizites Device
-					const devices = await navigator.mediaDevices.enumerateDevices();
-					const audioInput = devices.find(d => d.kind === 'audioinput');
-					if (audioInput) {
-						mediaStream = await navigator.mediaDevices.getUserMedia({
-							audio: { deviceId: { exact: audioInput.deviceId } }
-						});
-					} else {
-						mediaStream = await navigator.mediaDevices.getUserMedia({ audio: {} });
-					}
+				mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
+			} catch {
+				const devices = await navigator.mediaDevices.enumerateDevices();
+				const mic = devices.find(d => d.kind === 'audioinput');
+				if (mic) {
+					mediaStream = await navigator.mediaDevices.getUserMedia({
+						audio: { deviceId: { exact: mic.deviceId } }
+					});
+				} else {
+					throw new Error('Kein Mikrofon gefunden. Unter WebKitGTK (Tauri/Linux) wird PipeWire oder PulseAudio mit gst-plugin-pipewire benötigt.');
 				}
-				usedFallback = true;
-				console.log('🎤 Mikrofon mit Fallback geöffnet');
 			}
+		}
 
-			// Audio-Kontext für Visualisierung
-			audioContext = new AudioContext();
-			const source = audioContext.createMediaStreamSource(mediaStream);
-			analyser = audioContext.createAnalyser();
-			analyser.fftSize = 256;
-			source.connect(analyser);
+		audioContext = new AudioContext();
+		const source = audioContext.createMediaStreamSource(mediaStream);
+		analyser = audioContext.createAnalyser();
+		analyser.fftSize = 2048;
+		source.connect(analyser);
+	}
 
-			// MediaRecorder für Aufnahme
+	function startListening() {
+		if (!conversationActive || !mediaStream) return;
+		state = 'listening';
+		audioChunks = [];
+		silenceStart = 0;
+		recordingStart = Date.now();
+
+		// MediaRecorder starten
+		try {
 			mediaRecorder = new MediaRecorder(mediaStream, {
 				mimeType: 'audio/webm;codecs=opus',
 			});
-
-			audioChunks = [];
-
-			mediaRecorder.ondataavailable = (event) => {
-				if (event.data.size > 0) {
-					audioChunks.push(event.data);
-				}
-			};
-
-			mediaRecorder.onstop = async () => {
-				if (audioChunks.length > 0) {
-					await processAudio();
-				}
-			};
-
-			mediaRecorder.start(100); // Chunks alle 100ms
-			isListening = true;
-
-			// Visualisierung starten
-			visualize();
-
-			console.log('🎤 Aufnahme gestartet' + (usedFallback ? ' (Fallback-Modus)' : ''));
-		} catch (err) {
-			console.error('Mikrofon-Fehler:', err);
-			micError = `Mikrofon-Zugriff fehlgeschlagen: ${err instanceof Error ? err.message : err}`;
-			// Fehler nach 8 Sekunden ausblenden
-			setTimeout(() => { micError = ''; }, 8000);
+		} catch {
+			// Fallback ohne expliziten Codec
+			mediaRecorder = new MediaRecorder(mediaStream);
 		}
+
+		mediaRecorder.ondataavailable = (e) => {
+			if (e.data.size > 0) audioChunks.push(e.data);
+		};
+
+		mediaRecorder.onstop = () => {
+			if (audioChunks.length > 0 && conversationActive) {
+				processRecording();
+			}
+		};
+
+		mediaRecorder.start(100);
+
+		// VAD-Loop starten
+		monitorVAD();
 	}
 
-	function stopListening() {
-		if (!isListening) return;
+	function monitorVAD() {
+		if (!analyser || state !== 'listening') return;
 
-		if (mediaRecorder && mediaRecorder.state !== 'inactive') {
-			mediaRecorder.stop();
+		const buffer = new Float32Array(analyser.fftSize);
+		analyser.getFloatTimeDomainData(buffer);
+
+		// RMS berechnen
+		let sum = 0;
+		for (let i = 0; i < buffer.length; i++) {
+			sum += buffer[i] * buffer[i];
+		}
+		const rms = Math.sqrt(sum / buffer.length);
+		currentVolume = rms;
+
+		const now = Date.now();
+		const recordingDuration = now - recordingStart;
+
+		if (rms < SILENCE_THRESHOLD) {
+			// Stille
+			if (silenceStart === 0) silenceStart = now;
+
+			if (now - silenceStart > SILENCE_DURATION && recordingDuration > MIN_RECORDING) {
+				// Genug Stille nach genug Aufnahme → Aufnahme beenden
+				stopRecording();
+				return;
+			}
+		} else {
+			// Sprache erkannt
+			silenceStart = 0;
 		}
 
-		if (mediaStream) {
-			mediaStream.getTracks().forEach((track) => track.stop());
-			mediaStream = null;
-		}
+		animationFrame = requestAnimationFrame(() => monitorVAD());
+	}
 
+	function stopRecording() {
 		if (animationFrame) {
 			cancelAnimationFrame(animationFrame);
 			animationFrame = null;
 		}
 
-		if (audioContext) {
-			audioContext.close();
-			audioContext = null;
+		if (mediaRecorder && mediaRecorder.state !== 'inactive') {
+			mediaRecorder.stop();
 		}
-
-		isListening = false;
-		volumeLevel = 0;
-
-		console.log('🎤 Aufnahme gestoppt');
 	}
 
-	async function processAudio() {
-		if (audioChunks.length === 0) return;
+	async function processRecording() {
+		if (!conversationActive) return;
+		state = 'transcribing';
 
-		const audioBlob = new Blob(audioChunks, { type: 'audio/webm' });
+		const blob = new Blob(audioChunks, { type: 'audio/webm' });
 		audioChunks = [];
 
-		// Blob zu Base64 konvertieren
-		const reader = new FileReader();
-		reader.onloadend = async () => {
-			const base64 = (reader.result as string).split(',')[1];
-
-			try {
-				currentTranscript = 'Transkribiere...';
-				const text: string = await invoke('transcribe_audio', {
-					audioBase64: base64,
-					format: 'webm',
-				});
-
-				currentTranscript = text;
-
-				if (text.trim()) {
-					// Nachricht an Chat senden
-					addMessage('user', text);
-
-					// An Claude senden (triggert TTS-Response)
-					await sendToClaudeWithTts(text);
-				}
-			} catch (err) {
-				console.error('Transkription fehlgeschlagen:', err);
-				currentTranscript = `Fehler: ${err}`;
-			}
-		};
-		reader.readAsDataURL(audioBlob);
-	}
-
-	async function sendToClaudeWithTts(text: string) {
-		// Nachricht an Claude senden und Antwort per TTS vorlesen
-		try {
-			$isProcessing = true;
-
-			// Claude-Request abfeuern
-			await invoke('send_message', { message: text });
-
-			// Auf Ende der Verarbeitung warten (all-stopped Event)
-			await new Promise<void>((resolve) => {
-				// Timeout nach 120 Sekunden als Sicherheitsnetz
-				const timeout = setTimeout(() => {
-					console.warn('TTS-Timeout: Claude hat nach 120s nicht geantwortet');
-					unlisten();
-					resolve();
-				}, 120_000);
-
-				let unlisten: UnlistenFn;
-
-				listen('all-stopped', () => {
-					clearTimeout(timeout);
-					unlisten();
-					resolve();
-				}).then((fn) => {
-					unlisten = fn;
-				});
-			});
-
-			// Letzte Assistant-Nachricht aus dem Store holen
-			const allMessages = get(messages);
-			const lastAssistant = [...allMessages]
-				.reverse()
-				.find((m) => m.role === 'assistant' && m.content.trim());
-
-			if (lastAssistant) {
-				// TTS auf max 500 Zeichen begrenzen (lange Antworten abschneiden)
-				let ttsText = lastAssistant.content.trim();
-				if (ttsText.length > 500) {
-					// Am letzten Satzende vor 500 Zeichen abschneiden
-					const cutoff = ttsText.lastIndexOf('.', 500);
-					ttsText = cutoff > 200
-						? ttsText.substring(0, cutoff + 1)
-						: ttsText.substring(0, 500) + '…';
-				}
-
-				await speakText(ttsText);
-			}
-		} catch (err) {
-			console.error('sendToClaudeWithTts fehlgeschlagen:', err);
+		// Zu klein? Überspringen
+		if (blob.size < 1000) {
+			console.log('⏭️ Aufnahme zu kurz, übersprungen');
+			if (conversationActive) startListening();
+			return;
 		}
-	}
 
-	async function speakText(text: string) {
-		if (isSpeaking) {
-			stopSpeaking();
-		}
+		// Blob → Base64
+		const base64 = await blobToBase64(blob);
 
 		try {
-			isSpeaking = true;
-			const audioBase64: string = await invoke('text_to_speech', {
-				text,
-				voice: selectedVoice,
+			const text: string = await invoke('transcribe_audio', {
+				audioBase64: base64,
+				format: 'webm',
 			});
 
-			playTtsAudio(audioBase64);
-		} catch (err) {
-			console.error('TTS fehlgeschlagen:', err);
-			isSpeaking = false;
-		}
-	}
+			const cleaned = text.trim();
+			if (!cleaned || cleaned.length < 2) {
+				console.log('⏭️ Leere Transkription, weiter zuhören');
+				if (conversationActive) startListening();
+				return;
+			}
 
-	function playTtsAudio(base64: string) {
-		if (ttsAudio) {
-			ttsAudio.pause();
-			ttsAudio = null;
-		}
+			// User-Nachricht ins Log
+			addLogEntry('user', cleaned);
+			addMessage('user', cleaned);
 
-		ttsAudio = new Audio(`data:audio/mp3;base64,${base64}`);
+			// Claude fragen
+			state = 'waiting';
+			await invoke('send_message', { message: cleaned });
 
-		ttsAudio.onended = () => {
-			isSpeaking = false;
-			// Bei Continuous-Modus: Wieder zuhören
-			if (voiceMode === 'continuous' && !isListening) {
+			// Auf Antwort warten
+			const response = await waitForResponse();
+
+			if (response && conversationActive) {
+				addLogEntry('assistant', response);
+
+				// TTS: Text aufbereiten (Code-Blöcke entfernen, Markdown strippen)
+				let ttsText = prepareTtsText(response);
+
+				// TTS abspielen
+				state = 'speaking';
+				await speakAndWait(ttsText);
+			}
+
+			// Weiter zuhören
+			if (conversationActive) {
 				startListening();
 			}
-		};
+		} catch (err) {
+			console.error('Verarbeitung fehlgeschlagen:', err);
+			errorMsg = `Fehler: ${err}`;
+			setTimeout(() => { errorMsg = ''; }, 5000);
+			if (conversationActive) startListening();
+		}
+	}
 
-		ttsAudio.onerror = () => {
-			isSpeaking = false;
-		};
+	async function waitForResponse(): Promise<string | null> {
+		return new Promise((resolve) => {
+			const timeout = setTimeout(() => {
+				if (unlisten) unlisten();
+				resolve(null);
+			}, 120_000);
 
-		ttsAudio.play();
+			let unlisten: UnlistenFn;
+			listen('all-stopped', () => {
+				clearTimeout(timeout);
+				if (unlisten) unlisten();
+
+				// Letzte Assistant-Nachricht holen
+				const allMessages = get(messages);
+				const last = [...allMessages].reverse().find(m => m.role === 'assistant' && m.content.trim());
+				resolve(last ? last.content.trim() : null);
+			}).then((fn: UnlistenFn) => { unlisten = fn; });
+		});
+	}
+
+	function prepareTtsText(text: string): string {
+		// Code-Blöcke entfernen
+		let clean = text.replace(/```[\s\S]*?```/g, '');
+		// Inline-Code entfernen
+		clean = clean.replace(/`[^`]+`/g, '');
+		// Markdown-Formatierung entfernen
+		clean = clean.replace(/[*_~#]+/g, '');
+		// URLs entfernen
+		clean = clean.replace(/https?:\/\/\S+/g, '');
+		// Mehrfache Leerzeichen/Zeilenumbrüche
+		clean = clean.replace(/\s+/g, ' ').trim();
+
+		// Auf 600 Zeichen begrenzen (am Satzende schneiden)
+		if (clean.length > 600) {
+			const cutoff = clean.lastIndexOf('.', 600);
+			clean = cutoff > 200 ? clean.substring(0, cutoff + 1) : clean.substring(0, 600) + '…';
+		}
+
+		return clean;
+	}
+
+	async function speakAndWait(text: string): Promise<void> {
+		if (!text || !conversationActive) return;
+
+		try {
+			const audioBase64: string = await invoke('text_to_speech', {
+				text,
+				voice: null,
+			});
+
+			return new Promise((resolve) => {
+				if (ttsAudio) {
+					ttsAudio.pause();
+					ttsAudio = null;
+				}
+
+				// Piper gibt WAV zurück
+				ttsAudio = new Audio(`data:audio/wav;base64,${audioBase64}`);
+
+				ttsAudio.onended = () => {
+					ttsAudio = null;
+					resolve();
+				};
+
+				ttsAudio.onerror = (e) => {
+					console.error('TTS-Wiedergabe fehlgeschlagen:', e);
+					ttsAudio = null;
+					resolve();
+				};
+
+				ttsAudio.play().catch(err => {
+					console.error('Audio-Play fehlgeschlagen:', err);
+					resolve();
+				});
+
+				// Interrupt-Detection: Während TTS läuft, prüfe ob User spricht
+				monitorInterrupt(resolve);
+			});
+		} catch (err) {
+			console.error('TTS fehlgeschlagen:', err);
+		}
+	}
+
+	function monitorInterrupt(onInterrupt: () => void) {
+		if (!analyser || state !== 'speaking') return;
+
+		const buffer = new Float32Array(analyser.fftSize);
+		analyser.getFloatTimeDomainData(buffer);
+
+		let sum = 0;
+		for (let i = 0; i < buffer.length; i++) {
+			sum += buffer[i] * buffer[i];
+		}
+		const rms = Math.sqrt(sum / buffer.length);
+		currentVolume = rms;
+
+		// User spricht laut genug → Claude unterbrechen
+		if (rms > SILENCE_THRESHOLD * 3) {
+			console.log('⚡ Unterbrochen! (RMS:', rms.toFixed(3), ')');
+			stopSpeaking();
+			onInterrupt();
+			return;
+		}
+
+		// Weiter prüfen
+		if (state === 'speaking' && ttsAudio && !ttsAudio.paused) {
+			requestAnimationFrame(() => monitorInterrupt(onInterrupt));
+		}
 	}
 
 	function stopSpeaking() {
@@ -301,213 +398,152 @@
 			ttsAudio.currentTime = 0;
 			ttsAudio = null;
 		}
-		isSpeaking = false;
 	}
 
-	// Unterbrechung: User spricht während Claude spricht
-	function handleInterrupt() {
-		if (isSpeaking) {
-			stopSpeaking();
-			console.log('⚡ Claude unterbrochen');
+	function cleanupAudio() {
+		if (mediaStream) {
+			mediaStream.getTracks().forEach(t => t.stop());
+			mediaStream = null;
+		}
+		if (audioContext) {
+			audioContext.close();
+			audioContext = null;
+		}
+		analyser = null;
+		if (animationFrame) {
+			cancelAnimationFrame(animationFrame);
+			animationFrame = null;
 		}
 	}
 
-	function visualize() {
-		if (!analyser || !canvasEl) return;
+	// === Hilfsfunktionen ===
 
-		const ctx = canvasEl.getContext('2d');
-		if (!ctx) return;
-
-		const bufferLength = analyser.frequencyBinCount;
-		const dataArray = new Uint8Array(bufferLength);
-
-		function draw() {
-			if (!analyser) return;
-
-			animationFrame = requestAnimationFrame(draw);
-			analyser.getByteFrequencyData(dataArray);
-
-			// Durchschnittliche Lautstärke berechnen
-			const average = dataArray.reduce((a, b) => a + b, 0) / bufferLength;
-			volumeLevel = average / 255;
-
-			// VAD: Bei hoher Lautstärke während TTS → Unterbrechen
-			if (volumeLevel > 0.3 && isSpeaking) {
-				handleInterrupt();
-			}
-
-			// Canvas zeichnen
-			ctx.fillStyle = 'var(--bg-secondary)';
-			ctx.fillRect(0, 0, canvasEl.width, canvasEl.height);
-
-			const barWidth = (canvasEl.width / bufferLength) * 2.5;
-			let x = 0;
-
-			for (let i = 0; i < bufferLength; i++) {
-				const barHeight = (dataArray[i] / 255) * canvasEl.height;
-
-				// Farbverlauf basierend auf Höhe
-				const hue = (i / bufferLength) * 60 + 200; // Blau-Violett
-				ctx.fillStyle = `hsl(${hue}, 70%, ${50 + volumeLevel * 30}%)`;
-
-				ctx.fillRect(x, canvasEl.height - barHeight, barWidth, barHeight);
-				x += barWidth + 1;
-			}
-		}
-
-		draw();
+	function blobToBase64(blob: Blob): Promise<string> {
+		return new Promise((resolve, reject) => {
+			const reader = new FileReader();
+			reader.onloadend = () => {
+				const result = reader.result as string;
+				resolve(result.split(',')[1]);
+			};
+			reader.onerror = reject;
+			reader.readAsDataURL(blob);
+		});
 	}
 
-	function toggleVoiceMode() {
-		voiceMode = voiceMode === 'push-to-talk' ? 'continuous' : 'push-to-talk';
-		if (voiceMode === 'push-to-talk' && isListening) {
-			stopListening();
+	function addLogEntry(role: 'user' | 'assistant', text: string) {
+		const now = new Date();
+		const time = now.toLocaleTimeString('de-DE', { hour: '2-digit', minute: '2-digit' });
+		conversationLog = [...conversationLog, { role, text, time }].slice(-MAX_LOG_ENTRIES);
+	}
+
+	function getStateLabel(): string {
+		switch (state) {
+			case 'listening': return 'Höre zu…';
+			case 'transcribing': return 'Transkribiere…';
+			case 'waiting': return 'Claude denkt nach…';
+			case 'speaking': return 'Claude spricht…';
+			default: return 'Bereit';
 		}
 	}
 
-	// Push-to-Talk Handling
-	function handlePttDown() {
-		if (voiceMode === 'push-to-talk') {
-			startListening();
-		}
-	}
-
-	function handlePttUp() {
-		if (voiceMode === 'push-to-talk') {
-			stopListening();
-		}
-	}
-
-	// Keyboard-Shortcut (Leertaste für PTT)
-	function handleKeydown(e: KeyboardEvent) {
-		if (e.code === 'Space' && !e.repeat && voiceMode === 'push-to-talk' && e.target === document.body) {
-			e.preventDefault();
-			handlePttDown();
-		}
-	}
-
-	function handleKeyup(e: KeyboardEvent) {
-		if (e.code === 'Space' && voiceMode === 'push-to-talk') {
-			handlePttUp();
+	function getStateIcon(): string {
+		switch (state) {
+			case 'listening': return '🎤';
+			case 'transcribing': return '⏳';
+			case 'waiting': return '🤔';
+			case 'speaking': return '🔊';
+			default: return '💬';
 		}
 	}
 </script>
 
-<svelte:window on:keydown={handleKeydown} on:keyup={handleKeyup} />
-
 <div class="voice-panel">
 	<div class="voice-header">
-		<h3>🎤 Sprachsteuerung</h3>
-		{#if !isVoiceAvailable}
-			<span class="badge warning">API Key fehlt</span>
+		<h3>🎙️ Gespräch</h3>
+		{#if voiceStatus}
+			<div class="status-badges">
+				<span class="badge" class:ok={voiceStatus.whisper_available} class:fail={!voiceStatus.whisper_available}>
+					Whisper {voiceStatus.whisper_available ? '✓' : '✗'}
+				</span>
+				<span class="badge" class:ok={voiceStatus.piper_available} class:fail={!voiceStatus.piper_available}>
+					Piper {voiceStatus.piper_available ? '✓' : '✗'}
+				</span>
+			</div>
 		{/if}
 	</div>
 
-	{#if isVoiceAvailable}
-		<div class="voice-controls">
-			<!-- Modus-Umschalter -->
-			<div class="mode-switch">
-				<button
-					class="mode-btn"
-					class:active={voiceMode === 'push-to-talk'}
-					on:click={() => (voiceMode = 'push-to-talk')}
-				>
-					🎯 Push-to-Talk
-				</button>
-				<button
-					class="mode-btn"
-					class:active={voiceMode === 'continuous'}
-					on:click={() => (voiceMode = 'continuous')}
-				>
-					🔄 Kontinuierlich
-				</button>
-			</div>
-
-			<!-- Visualisierung -->
-			<div class="visualizer-container">
-				<canvas bind:this={canvasEl} width="300" height="60" class="visualizer"></canvas>
-				{#if isListening}
-					<div class="listening-indicator">
-						<span class="pulse"></span>
-						Höre zu...
-					</div>
-				{/if}
-			</div>
-
-			<!-- Haupt-Button -->
-			<div class="main-control">
-				{#if voiceMode === 'push-to-talk'}
-					<button
-						class="voice-btn ptt"
-						class:active={isListening}
-						on:mousedown={handlePttDown}
-						on:mouseup={handlePttUp}
-						on:mouseleave={handlePttUp}
-						disabled={$isProcessing}
-					>
-						{#if isListening}
-							🎤 Loslassen zum Senden
-						{:else}
-							🎤 Gedrückt halten zum Sprechen
-						{/if}
-					</button>
-					<p class="hint">Oder Leertaste gedrückt halten</p>
+	{#if isReady}
+		<!-- Haupt-Button -->
+		<div class="main-control">
+			<button
+				class="conversation-btn"
+				class:active={conversationActive}
+				on:click={() => conversationActive ? stopConversation() : startConversation()}
+				disabled={$isProcessing && !conversationActive}
+			>
+				{#if conversationActive}
+					<span class="btn-icon stop">⏹</span>
+					<span>Gespräch beenden</span>
 				{:else}
-					<button
-						class="voice-btn continuous"
-						class:active={isListening}
-						on:click={() => (isListening ? stopListening() : startListening())}
-						disabled={$isProcessing}
-					>
-						{#if isListening}
-							⏹️ Stoppen
-						{:else}
-							▶️ Gespräch starten
-						{/if}
-					</button>
+					<span class="btn-icon start">🎙️</span>
+					<span>Gespräch starten</span>
+				{/if}
+			</button>
+		</div>
+
+		<!-- Status-Anzeige -->
+		{#if conversationActive}
+			<div class="state-display" class:listening={state === 'listening'} class:speaking={state === 'speaking'} class:thinking={state === 'waiting' || state === 'transcribing'}>
+				<div class="pulse-ring" style="--volume: {Math.min(currentVolume * 10, 1)}">
+					<span class="state-icon">{getStateIcon()}</span>
+				</div>
+				<span class="state-label">{getStateLabel()}</span>
+
+				{#if state === 'speaking'}
+					<button class="interrupt-btn" on:click={stopSpeaking} title="Unterbrechen">⏹️</button>
 				{/if}
 			</div>
 
-			<!-- Transkription -->
-			{#if currentTranscript}
-				<div class="transcript">
-					<span class="label">Du:</span>
-					<span class="text">{currentTranscript}</span>
-				</div>
-			{/if}
-
-			<!-- TTS-Status -->
-			{#if isSpeaking}
-				<div class="speaking-indicator">
-					<span class="speaking-pulse"></span>
-					Claude spricht...
-					<button class="interrupt-btn" on:click={stopSpeaking}>⏹️</button>
-				</div>
-			{/if}
-
-			<!-- Mikrofon-Fehler -->
-			{#if micError}
-				<div class="mic-error">
-					⚠️ {micError}
-				</div>
-			{/if}
-
-			<!-- Stimmen-Auswahl -->
-			<div class="voice-select">
-				<label for="voice">Claudes Stimme:</label>
-				<select id="voice" bind:value={selectedVoice}>
-					{#each availableVoices as voice}
-						<option value={voice.id}>{voice.name} - {voice.description}</option>
-					{/each}
-				</select>
+			<!-- Volume-Anzeige -->
+			<div class="volume-bar">
+				<div class="volume-fill" style="width: {Math.min(currentVolume * 500, 100)}%"></div>
 			</div>
-		</div>
+		{/if}
+
+		<!-- Gesprächslog -->
+		{#if conversationLog.length > 0}
+			<div class="conversation-log">
+				{#each conversationLog as entry}
+					<div class="log-entry" class:user={entry.role === 'user'} class:assistant={entry.role === 'assistant'}>
+						<span class="log-time">{entry.time}</span>
+						<span class="log-role">{entry.role === 'user' ? 'Du' : 'Claude'}:</span>
+						<span class="log-text">{entry.text.length > 120 ? entry.text.substring(0, 120) + '…' : entry.text}</span>
+					</div>
+				{/each}
+			</div>
+		{/if}
 	{:else}
+		<!-- Setup-Hinweis -->
 		<div class="setup-hint">
-			<p>Setze <code>OPENAI_API_KEY</code> Umgebungsvariable für Sprachsteuerung.</p>
-			<p class="alt">Oder warte auf lokale Whisper/Piper Integration.</p>
+			{#if voiceStatus}
+				{#if !voiceStatus.whisper_available}
+					<p>⚠️ Whisper-Modell nicht gefunden</p>
+					<p class="detail">Lade <code>ggml-base.bin</code> in den <code>models/</code> Ordner</p>
+				{/if}
+				{#if !voiceStatus.piper_available}
+					<p>⚠️ Piper-TTS-Modell nicht gefunden</p>
+					<p class="detail">Lade <code>de_DE-thorsten-high.onnx</code> in den <code>models/</code> Ordner</p>
+				{/if}
+			{:else}
+				<p>Voice-System wird geladen...</p>
+			{/if}
 		</div>
 	{/if}
+
+	<!-- Fehler -->
+	{#if errorMsg}
+		<div class="error-msg">⚠️ {errorMsg}</div>
+	{/if}
 </div>
 
 <style>
@@ -521,7 +557,7 @@
 	.voice-header {
 		display: flex;
 		align-items: center;
-		gap: var(--spacing-sm);
+		justify-content: space-between;
 		margin-bottom: var(--spacing-md);
 	}
 
@@ -530,219 +566,198 @@
 		font-size: 1rem;
 	}
 
-	.badge.warning {
-		background: rgba(245, 158, 11, 0.2);
-		color: #f59e0b;
+	.status-badges {
+		display: flex;
+		gap: 4px;
+	}
+
+	.badge {
 		padding: 2px 8px;
 		border-radius: var(--radius-sm);
-		font-size: 0.7rem;
-	}
-
-	.mode-switch {
-		display: flex;
-		gap: var(--spacing-xs);
-		margin-bottom: var(--spacing-md);
-	}
-
-	.mode-btn {
-		flex: 1;
-		padding: var(--spacing-sm);
-		border: 1px solid var(--border);
-		background: transparent;
-		color: var(--text-secondary);
-		border-radius: var(--radius-md);
-		cursor: pointer;
-		font-size: 0.8rem;
-		transition: all 0.15s ease;
-	}
-
-	.mode-btn.active {
-		background: var(--accent);
-		color: white;
-		border-color: var(--accent);
-	}
-
-	.visualizer-container {
-		position: relative;
-		margin-bottom: var(--spacing-md);
-	}
-
-	.visualizer {
-		width: 100%;
-		height: 60px;
-		border-radius: var(--radius-md);
-		background: var(--bg-tertiary);
-	}
-
-	.listening-indicator {
-		position: absolute;
-		top: 50%;
-		left: 50%;
-		transform: translate(-50%, -50%);
-		display: flex;
-		align-items: center;
-		gap: var(--spacing-xs);
-		color: var(--success);
-		font-size: 0.85rem;
+		font-size: 0.65rem;
 		font-weight: 500;
 	}
 
-	.pulse {
-		width: 10px;
-		height: 10px;
-		background: var(--success);
-		border-radius: 50%;
-		animation: pulse 1s ease-in-out infinite;
+	.badge.ok {
+		background: rgba(34, 197, 94, 0.15);
+		color: #22c55e;
 	}
 
-	@keyframes pulse {
-		0%,
-		100% {
-			opacity: 1;
-			transform: scale(1);
-		}
-		50% {
-			opacity: 0.5;
-			transform: scale(1.2);
-		}
+	.badge.fail {
+		background: rgba(239, 68, 68, 0.15);
+		color: #ef4444;
 	}
 
+	/* Haupt-Button */
 	.main-control {
-		text-align: center;
 		margin-bottom: var(--spacing-md);
 	}
 
-	.voice-btn {
+	.conversation-btn {
 		width: 100%;
-		padding: var(--spacing-md);
+		display: flex;
+		align-items: center;
+		justify-content: center;
+		gap: var(--spacing-sm);
+		padding: var(--spacing-md) var(--spacing-lg);
 		border: none;
 		border-radius: var(--radius-lg);
 		font-size: 1rem;
 		font-weight: 600;
 		cursor: pointer;
-		transition: all 0.15s ease;
-	}
-
-	.voice-btn.ptt {
-		background: var(--bg-tertiary);
-		color: var(--text-primary);
-	}
-
-	.voice-btn.ptt:hover {
-		background: var(--bg-hover);
-	}
-
-	.voice-btn.ptt.active {
-		background: var(--success);
-		color: white;
-		transform: scale(0.98);
-	}
-
-	.voice-btn.continuous {
+		transition: all 0.2s ease;
 		background: var(--accent);
 		color: white;
 	}
 
-	.voice-btn.continuous.active {
-		background: var(--error);
+	.conversation-btn:hover {
+		filter: brightness(1.1);
 	}
 
-	.voice-btn:disabled {
+	.conversation-btn.active {
+		background: #ef4444;
+	}
+
+	.conversation-btn:disabled {
 		opacity: 0.5;
 		cursor: not-allowed;
 	}
 
-	.hint {
-		font-size: 0.75rem;
-		color: var(--text-secondary);
-		margin-top: var(--spacing-xs);
+	.btn-icon {
+		font-size: 1.3rem;
 	}
 
-	.transcript {
-		background: var(--bg-tertiary);
-		padding: var(--spacing-sm);
-		border-radius: var(--radius-md);
-		margin-bottom: var(--spacing-md);
-		font-size: 0.85rem;
-	}
-
-	.transcript .label {
-		color: var(--text-secondary);
-		margin-right: var(--spacing-xs);
-	}
-
-	.transcript .text {
-		color: var(--text-primary);
-	}
-
-	.speaking-indicator {
+	/* Status-Anzeige */
+	.state-display {
 		display: flex;
 		align-items: center;
 		justify-content: center;
 		gap: var(--spacing-sm);
-		padding: var(--spacing-sm);
-		background: rgba(96, 165, 250, 0.1);
+		padding: var(--spacing-sm) var(--spacing-md);
 		border-radius: var(--radius-md);
-		margin-bottom: var(--spacing-md);
-		color: var(--accent);
-		font-size: 0.85rem;
+		margin-bottom: var(--spacing-sm);
+		transition: background 0.3s ease;
 	}
 
-	.speaking-pulse {
-		width: 10px;
-		height: 10px;
-		background: var(--accent);
+	.state-display.listening {
+		background: rgba(34, 197, 94, 0.1);
+		color: #22c55e;
+	}
+
+	.state-display.speaking {
+		background: rgba(96, 165, 250, 0.1);
+		color: #60a5fa;
+	}
+
+	.state-display.thinking {
+		background: rgba(245, 158, 11, 0.1);
+		color: #f59e0b;
+	}
+
+	.pulse-ring {
+		width: 36px;
+		height: 36px;
 		border-radius: 50%;
-		animation: pulse 0.8s ease-in-out infinite;
+		display: flex;
+		align-items: center;
+		justify-content: center;
+		box-shadow: 0 0 0 calc(var(--volume, 0) * 12px) currentColor;
+		opacity: 0.8;
+		transition: box-shadow 0.1s ease;
+	}
+
+	.state-icon {
+		font-size: 1.2rem;
+	}
+
+	.state-label {
+		font-size: 0.85rem;
+		font-weight: 500;
 	}
 
 	.interrupt-btn {
 		background: none;
-		border: none;
+		border: 1px solid currentColor;
+		border-radius: var(--radius-sm);
+		padding: 2px 8px;
 		cursor: pointer;
-		font-size: 1rem;
+		color: inherit;
+		font-size: 0.8rem;
 		opacity: 0.7;
+		transition: opacity 0.15s;
 	}
 
 	.interrupt-btn:hover {
 		opacity: 1;
 	}
 
-	.voice-select {
-		display: flex;
-		align-items: center;
-		gap: var(--spacing-sm);
-		font-size: 0.8rem;
-	}
-
-	.voice-select label {
-		color: var(--text-secondary);
-	}
-
-	.voice-select select {
-		flex: 1;
-		padding: var(--spacing-xs) var(--spacing-sm);
+	/* Volume-Bar */
+	.volume-bar {
+		height: 3px;
 		background: var(--bg-tertiary);
-		border: 1px solid var(--border);
-		border-radius: var(--radius-md);
-		color: var(--text-primary);
-		font-size: 0.8rem;
-	}
-
-	.mic-error {
-		padding: var(--spacing-sm);
-		background: rgba(239, 68, 68, 0.1);
-		border: 1px solid rgba(239, 68, 68, 0.3);
-		border-radius: var(--radius-md);
-		color: #ef4444;
-		font-size: 0.8rem;
+		border-radius: 2px;
 		margin-bottom: var(--spacing-md);
+		overflow: hidden;
 	}
 
+	.volume-fill {
+		height: 100%;
+		background: currentColor;
+		border-radius: 2px;
+		transition: width 0.05s ease;
+	}
+
+	/* Gesprächslog */
+	.conversation-log {
+		max-height: 200px;
+		overflow-y: auto;
+		display: flex;
+		flex-direction: column;
+		gap: 4px;
+	}
+
+	.log-entry {
+		padding: 4px 8px;
+		border-radius: var(--radius-sm);
+		font-size: 0.78rem;
+		line-height: 1.3;
+	}
+
+	.log-entry.user {
+		background: rgba(96, 165, 250, 0.08);
+	}
+
+	.log-entry.assistant {
+		background: rgba(34, 197, 94, 0.08);
+	}
+
+	.log-time {
+		color: var(--text-secondary);
+		font-size: 0.65rem;
+		margin-right: 4px;
+	}
+
+	.log-role {
+		font-weight: 600;
+		margin-right: 4px;
+	}
+
+	.log-text {
+		color: var(--text-primary);
+	}
+
+	/* Setup-Hinweis */
 	.setup-hint {
 		text-align: center;
 		color: var(--text-secondary);
 		font-size: 0.85rem;
+		padding: var(--spacing-md);
+	}
+
+	.setup-hint .detail {
+		font-size: 0.75rem;
+		opacity: 0.7;
+		margin-top: var(--spacing-xs);
 	}
 
 	.setup-hint code {
@@ -750,11 +765,17 @@
 		padding: 2px 6px;
 		border-radius: 3px;
 		font-family: var(--font-mono);
+		font-size: 0.75rem;
 	}
 
-	.setup-hint .alt {
-		font-size: 0.75rem;
+	/* Fehler */
+	.error-msg {
+		padding: var(--spacing-sm);
+		background: rgba(239, 68, 68, 0.1);
+		border: 1px solid rgba(239, 68, 68, 0.3);
+		border-radius: var(--radius-md);
+		color: #ef4444;
+		font-size: 0.8rem;
 		margin-top: var(--spacing-sm);
-		opacity: 0.7;
 	}
 </style>