Voice (Phase 10): - voice.rs: OpenAI Whisper (STT) + TTS Backend - ChatPanel: Mikrofon-Button, VAD (Pause 1.5s), Live-Pegel - SettingsPanel: OpenAI-Key Konfiguration Phase 9 Nacharbeiten: - Auto-Extract vor Compacting (Entscheidungen/TODOs/Insights) - get_tool_hints() - relevante KB-Eintraege bei Tool-Start - activeKnowledgeHints Store, Anzeige im KnowledgePanel Tech-Schulden: - Dead-Code in memory.rs entfernt (MemorySystem struct) - cargo-check Warnings behoben Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
188 lines
5.8 KiB
Rust
188 lines
5.8 KiB
Rust
// Claude Desktop — Voice Interface
|
|
// Speech-to-Text mit Whisper API, Text-to-Speech mit OpenAI TTS
|
|
|
|
use base64::{Engine as _, engine::general_purpose::STANDARD as BASE64};
|
|
use serde::{Deserialize, Serialize};
|
|
use std::io::Write;
|
|
|
|
/// Whisper API Konfiguration
|
|
const OPENAI_API_URL: &str = "https://api.openai.com/v1/audio/transcriptions";
|
|
const TTS_API_URL: &str = "https://api.openai.com/v1/audio/speech";
|
|
|
|
/// Transkriptions-Ergebnis
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct TranscriptionResult {
|
|
pub text: String,
|
|
pub language: Option<String>,
|
|
pub duration: Option<f64>,
|
|
}
|
|
|
|
/// TTS-Stimmen
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub enum TtsVoice {
|
|
Alloy,
|
|
Echo,
|
|
Fable,
|
|
Onyx,
|
|
Nova,
|
|
Shimmer,
|
|
}
|
|
|
|
impl TtsVoice {
|
|
fn as_str(&self) -> &str {
|
|
match self {
|
|
TtsVoice::Alloy => "alloy",
|
|
TtsVoice::Echo => "echo",
|
|
TtsVoice::Fable => "fable",
|
|
TtsVoice::Onyx => "onyx",
|
|
TtsVoice::Nova => "nova",
|
|
TtsVoice::Shimmer => "shimmer",
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Holt den OpenAI API Key aus Umgebungsvariable oder Settings
|
|
fn get_openai_key() -> Result<String, String> {
|
|
// Erst Umgebungsvariable prüfen
|
|
if let Ok(key) = std::env::var("OPENAI_API_KEY") {
|
|
if !key.is_empty() {
|
|
return Ok(key);
|
|
}
|
|
}
|
|
|
|
// Alternativ: Aus Settings laden (TODO)
|
|
Err("OpenAI API Key nicht gefunden. Setze OPENAI_API_KEY Umgebungsvariable.".to_string())
|
|
}
|
|
|
|
/// Transkribiert Audio mit OpenAI Whisper API
|
|
#[tauri::command]
|
|
pub async fn transcribe_audio(
|
|
audio_base64: String,
|
|
format: String,
|
|
) -> Result<String, String> {
|
|
let api_key = get_openai_key()?;
|
|
|
|
// Base64 dekodieren
|
|
let audio_bytes = BASE64.decode(&audio_base64)
|
|
.map_err(|e| format!("Base64-Dekodierung fehlgeschlagen: {}", e))?;
|
|
|
|
// Temporäre Datei erstellen (Whisper API braucht Datei-Upload)
|
|
let temp_dir = std::env::temp_dir();
|
|
let temp_file = temp_dir.join(format!("whisper_audio_{}.{}", uuid::Uuid::new_v4(), format));
|
|
|
|
let mut file = std::fs::File::create(&temp_file)
|
|
.map_err(|e| format!("Temp-Datei erstellen fehlgeschlagen: {}", e))?;
|
|
file.write_all(&audio_bytes)
|
|
.map_err(|e| format!("Audio schreiben fehlgeschlagen: {}", e))?;
|
|
drop(file);
|
|
|
|
// Multipart-Request an Whisper API
|
|
let client = reqwest::Client::new();
|
|
|
|
let file_part = reqwest::multipart::Part::file(&temp_file)
|
|
.await
|
|
.map_err(|e| format!("Datei lesen fehlgeschlagen: {}", e))?
|
|
.file_name(format!("audio.{}", format))
|
|
.mime_str(&format!("audio/{}", format))
|
|
.map_err(|e| format!("MIME-Type fehlgeschlagen: {}", e))?;
|
|
|
|
let form = reqwest::multipart::Form::new()
|
|
.part("file", file_part)
|
|
.text("model", "whisper-1")
|
|
.text("language", "de") // Deutsch priorisieren
|
|
.text("response_format", "json");
|
|
|
|
let response = client
|
|
.post(OPENAI_API_URL)
|
|
.bearer_auth(&api_key)
|
|
.multipart(form)
|
|
.send()
|
|
.await
|
|
.map_err(|e| format!("API-Request fehlgeschlagen: {}", e))?;
|
|
|
|
// Temp-Datei löschen
|
|
let _ = std::fs::remove_file(&temp_file);
|
|
|
|
if !response.status().is_success() {
|
|
let error_text = response.text().await.unwrap_or_default();
|
|
return Err(format!("Whisper API Fehler: {}", error_text));
|
|
}
|
|
|
|
// Response parsen
|
|
#[derive(Deserialize)]
|
|
struct WhisperResponse {
|
|
text: String,
|
|
}
|
|
|
|
let result: WhisperResponse = response.json().await
|
|
.map_err(|e| format!("Response parsen fehlgeschlagen: {}", e))?;
|
|
|
|
println!("🎤 Transkription: \"{}\"", result.text);
|
|
|
|
Ok(result.text)
|
|
}
|
|
|
|
/// Text-to-Speech mit OpenAI TTS API
|
|
#[tauri::command]
|
|
pub async fn text_to_speech(
|
|
text: String,
|
|
voice: Option<String>,
|
|
) -> Result<String, String> {
|
|
let api_key = get_openai_key()?;
|
|
|
|
let voice_name = voice.unwrap_or_else(|| "nova".to_string());
|
|
|
|
let client = reqwest::Client::new();
|
|
|
|
let body = serde_json::json!({
|
|
"model": "tts-1",
|
|
"input": text,
|
|
"voice": voice_name,
|
|
"response_format": "mp3"
|
|
});
|
|
|
|
let response = client
|
|
.post(TTS_API_URL)
|
|
.bearer_auth(&api_key)
|
|
.json(&body)
|
|
.send()
|
|
.await
|
|
.map_err(|e| format!("TTS API-Request fehlgeschlagen: {}", e))?;
|
|
|
|
if !response.status().is_success() {
|
|
let error_text = response.text().await.unwrap_or_default();
|
|
return Err(format!("TTS API Fehler: {}", error_text));
|
|
}
|
|
|
|
// Audio-Bytes als Base64 zurückgeben
|
|
let audio_bytes = response.bytes().await
|
|
.map_err(|e| format!("Audio lesen fehlgeschlagen: {}", e))?;
|
|
|
|
let audio_base64 = BASE64.encode(&audio_bytes);
|
|
|
|
println!("🔊 TTS generiert: {} Zeichen → {} Bytes Audio", text.len(), audio_bytes.len());
|
|
|
|
Ok(audio_base64)
|
|
}
|
|
|
|
/// Prüft ob Voice-Features verfügbar sind (API Key vorhanden)
|
|
#[tauri::command]
|
|
pub async fn check_voice_availability() -> Result<bool, String> {
|
|
match get_openai_key() {
|
|
Ok(_) => Ok(true),
|
|
Err(_) => Ok(false),
|
|
}
|
|
}
|
|
|
|
/// Verfügbare TTS-Stimmen
|
|
#[tauri::command]
|
|
pub async fn get_tts_voices() -> Result<Vec<serde_json::Value>, String> {
|
|
Ok(vec![
|
|
serde_json::json!({ "id": "alloy", "name": "Alloy", "description": "Neutral, ausgewogen" }),
|
|
serde_json::json!({ "id": "echo", "name": "Echo", "description": "Männlich, warm" }),
|
|
serde_json::json!({ "id": "fable", "name": "Fable", "description": "Expressiv, britisch" }),
|
|
serde_json::json!({ "id": "onyx", "name": "Onyx", "description": "Tief, autoritär" }),
|
|
serde_json::json!({ "id": "nova", "name": "Nova", "description": "Weiblich, freundlich" }),
|
|
serde_json::json!({ "id": "shimmer", "name": "Shimmer", "description": "Weiblich, sanft" }),
|
|
])
|
|
}
|