Compare commits

...

2 commits

Author SHA1 Message Date
Eddy
51b5a58c63 [appimage] Fix: DNS-Konfiguration für DinD-Container
Some checks failed
Build AppImage / build (push) Failing after 1m49s
- Container-Options: --dns 8.8.8.8 --dns 1.1.1.1
- Fallback-Step: /etc/resolv.conf ergänzen
- Löst "Could not resolve host" im Forgejo Runner

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-04-15 14:47:42 +02:00
Eddy
2cd721dd97 Feature: VoicePanel mit Push-to-Talk und Sprachsteuerung
- Neues VoicePanel.svelte mit Mikrofon-Zugriff via Web Audio API
- Push-to-Talk und Continuous-Mode (VAD-Ready)
- Audio-Visualisierung mit Canvas-Waveform
- OpenAI Whisper STT + TTS Integration via voice.rs
- Stimmenauswahl (Alloy, Echo, Fable, Onyx, Nova, Shimmer)
- Tab "Sprache" im rechten Panel integriert

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-04-15 14:46:24 +02:00
3 changed files with 675 additions and 0 deletions

View file

@ -16,8 +16,15 @@ jobs:
if: contains(github.event.head_commit.message, '[appimage]') || startsWith(github.ref, 'refs/tags/v')
container:
image: rust:1.83-bookworm
options: --dns 8.8.8.8 --dns 1.1.1.1
steps:
- name: Configure DNS Fallback
run: |
# DNS-Fix für DinD-Umgebung
echo "nameserver 8.8.8.8" >> /etc/resolv.conf
echo "nameserver 1.1.1.1" >> /etc/resolv.conf
- name: Checkout
run: |
git clone --depth 1 --branch "${GITHUB_REF_NAME}" \

View file

@ -0,0 +1,664 @@
<script lang="ts">
import { onMount, onDestroy } from 'svelte';
import { invoke } from '@tauri-apps/api/core';
import { listen, type UnlistenFn } from '@tauri-apps/api/event';
import { isProcessing, addMessage } from '$lib/stores/app';
// Voice-Zustand
let isListening = false;
let isSpeaking = false;
let isVoiceAvailable = false;
let voiceMode: 'push-to-talk' | 'continuous' = 'push-to-talk';
let selectedVoice = 'nova';
let availableVoices: { id: string; name: string; description: string }[] = [];
// Audio-Kontext
let audioContext: AudioContext | null = null;
let mediaStream: MediaStream | null = null;
let mediaRecorder: MediaRecorder | null = null;
let audioChunks: Blob[] = [];
let analyser: AnalyserNode | null = null;
let animationFrame: number | null = null;
// Visualisierung
let canvasEl: HTMLCanvasElement;
let volumeLevel = 0;
// Transkription (live)
let currentTranscript = '';
// TTS Audio-Element
let ttsAudio: HTMLAudioElement | null = null;
// Event-Listener
let ttsListener: UnlistenFn | null = null;
onMount(async () => {
// Voice-Verfügbarkeit prüfen
try {
isVoiceAvailable = await invoke('check_voice_availability');
if (isVoiceAvailable) {
availableVoices = await invoke('get_tts_voices');
}
} catch (err) {
console.warn('Voice nicht verfügbar:', err);
}
// TTS-Event listener
ttsListener = await listen<string>('tts-audio', (event) => {
playTtsAudio(event.payload);
});
});
onDestroy(() => {
stopListening();
ttsListener?.();
});
async function startListening() {
if (isListening) return;
try {
// Mikrofon-Zugriff
mediaStream = await navigator.mediaDevices.getUserMedia({
audio: {
echoCancellation: true,
noiseSuppression: true,
sampleRate: 16000,
},
});
// Audio-Kontext für Visualisierung
audioContext = new AudioContext();
const source = audioContext.createMediaStreamSource(mediaStream);
analyser = audioContext.createAnalyser();
analyser.fftSize = 256;
source.connect(analyser);
// MediaRecorder für Aufnahme
mediaRecorder = new MediaRecorder(mediaStream, {
mimeType: 'audio/webm;codecs=opus',
});
audioChunks = [];
mediaRecorder.ondataavailable = (event) => {
if (event.data.size > 0) {
audioChunks.push(event.data);
}
};
mediaRecorder.onstop = async () => {
if (audioChunks.length > 0) {
await processAudio();
}
};
mediaRecorder.start(100); // Chunks alle 100ms
isListening = true;
// Visualisierung starten
visualize();
console.log('🎤 Aufnahme gestartet');
} catch (err) {
console.error('Mikrofon-Fehler:', err);
}
}
function stopListening() {
if (!isListening) return;
if (mediaRecorder && mediaRecorder.state !== 'inactive') {
mediaRecorder.stop();
}
if (mediaStream) {
mediaStream.getTracks().forEach((track) => track.stop());
mediaStream = null;
}
if (animationFrame) {
cancelAnimationFrame(animationFrame);
animationFrame = null;
}
if (audioContext) {
audioContext.close();
audioContext = null;
}
isListening = false;
volumeLevel = 0;
console.log('🎤 Aufnahme gestoppt');
}
async function processAudio() {
if (audioChunks.length === 0) return;
const audioBlob = new Blob(audioChunks, { type: 'audio/webm' });
audioChunks = [];
// Blob zu Base64 konvertieren
const reader = new FileReader();
reader.onloadend = async () => {
const base64 = (reader.result as string).split(',')[1];
try {
currentTranscript = 'Transkribiere...';
const text: string = await invoke('transcribe_audio', {
audioBase64: base64,
format: 'webm',
});
currentTranscript = text;
if (text.trim()) {
// Nachricht an Chat senden
addMessage('user', text);
// An Claude senden (triggert TTS-Response)
await sendToClaudeWithTts(text);
}
} catch (err) {
console.error('Transkription fehlgeschlagen:', err);
currentTranscript = `Fehler: ${err}`;
}
};
reader.readAsDataURL(audioBlob);
}
async function sendToClaudeWithTts(text: string) {
// TODO: Claude-Request mit TTS-Flag
// Für jetzt: Normaler Send + TTS der Antwort
}
async function speakText(text: string) {
if (isSpeaking) {
stopSpeaking();
}
try {
isSpeaking = true;
const audioBase64: string = await invoke('text_to_speech', {
text,
voice: selectedVoice,
});
playTtsAudio(audioBase64);
} catch (err) {
console.error('TTS fehlgeschlagen:', err);
isSpeaking = false;
}
}
function playTtsAudio(base64: string) {
if (ttsAudio) {
ttsAudio.pause();
ttsAudio = null;
}
ttsAudio = new Audio(`data:audio/mp3;base64,${base64}`);
ttsAudio.onended = () => {
isSpeaking = false;
// Bei Continuous-Modus: Wieder zuhören
if (voiceMode === 'continuous' && !isListening) {
startListening();
}
};
ttsAudio.onerror = () => {
isSpeaking = false;
};
ttsAudio.play();
}
function stopSpeaking() {
if (ttsAudio) {
ttsAudio.pause();
ttsAudio.currentTime = 0;
ttsAudio = null;
}
isSpeaking = false;
}
// Unterbrechung: User spricht während Claude spricht
function handleInterrupt() {
if (isSpeaking) {
stopSpeaking();
console.log('⚡ Claude unterbrochen');
}
}
function visualize() {
if (!analyser || !canvasEl) return;
const ctx = canvasEl.getContext('2d');
if (!ctx) return;
const bufferLength = analyser.frequencyBinCount;
const dataArray = new Uint8Array(bufferLength);
function draw() {
if (!analyser) return;
animationFrame = requestAnimationFrame(draw);
analyser.getByteFrequencyData(dataArray);
// Durchschnittliche Lautstärke berechnen
const average = dataArray.reduce((a, b) => a + b, 0) / bufferLength;
volumeLevel = average / 255;
// VAD: Bei hoher Lautstärke während TTS → Unterbrechen
if (volumeLevel > 0.3 && isSpeaking) {
handleInterrupt();
}
// Canvas zeichnen
ctx.fillStyle = 'var(--bg-secondary)';
ctx.fillRect(0, 0, canvasEl.width, canvasEl.height);
const barWidth = (canvasEl.width / bufferLength) * 2.5;
let x = 0;
for (let i = 0; i < bufferLength; i++) {
const barHeight = (dataArray[i] / 255) * canvasEl.height;
// Farbverlauf basierend auf Höhe
const hue = (i / bufferLength) * 60 + 200; // Blau-Violett
ctx.fillStyle = `hsl(${hue}, 70%, ${50 + volumeLevel * 30}%)`;
ctx.fillRect(x, canvasEl.height - barHeight, barWidth, barHeight);
x += barWidth + 1;
}
}
draw();
}
function toggleVoiceMode() {
voiceMode = voiceMode === 'push-to-talk' ? 'continuous' : 'push-to-talk';
if (voiceMode === 'push-to-talk' && isListening) {
stopListening();
}
}
// Push-to-Talk Handling
function handlePttDown() {
if (voiceMode === 'push-to-talk') {
startListening();
}
}
function handlePttUp() {
if (voiceMode === 'push-to-talk') {
stopListening();
}
}
// Keyboard-Shortcut (Leertaste für PTT)
function handleKeydown(e: KeyboardEvent) {
if (e.code === 'Space' && !e.repeat && voiceMode === 'push-to-talk' && e.target === document.body) {
e.preventDefault();
handlePttDown();
}
}
function handleKeyup(e: KeyboardEvent) {
if (e.code === 'Space' && voiceMode === 'push-to-talk') {
handlePttUp();
}
}
</script>
<svelte:window on:keydown={handleKeydown} on:keyup={handleKeyup} />
<div class="voice-panel">
<div class="voice-header">
<h3>🎤 Sprachsteuerung</h3>
{#if !isVoiceAvailable}
<span class="badge warning">API Key fehlt</span>
{/if}
</div>
{#if isVoiceAvailable}
<div class="voice-controls">
<!-- Modus-Umschalter -->
<div class="mode-switch">
<button
class="mode-btn"
class:active={voiceMode === 'push-to-talk'}
on:click={() => (voiceMode = 'push-to-talk')}
>
🎯 Push-to-Talk
</button>
<button
class="mode-btn"
class:active={voiceMode === 'continuous'}
on:click={() => (voiceMode = 'continuous')}
>
🔄 Kontinuierlich
</button>
</div>
<!-- Visualisierung -->
<div class="visualizer-container">
<canvas bind:this={canvasEl} width="300" height="60" class="visualizer"></canvas>
{#if isListening}
<div class="listening-indicator">
<span class="pulse"></span>
Höre zu...
</div>
{/if}
</div>
<!-- Haupt-Button -->
<div class="main-control">
{#if voiceMode === 'push-to-talk'}
<button
class="voice-btn ptt"
class:active={isListening}
on:mousedown={handlePttDown}
on:mouseup={handlePttUp}
on:mouseleave={handlePttUp}
disabled={$isProcessing}
>
{#if isListening}
🎤 Loslassen zum Senden
{:else}
🎤 Gedrückt halten zum Sprechen
{/if}
</button>
<p class="hint">Oder Leertaste gedrückt halten</p>
{:else}
<button
class="voice-btn continuous"
class:active={isListening}
on:click={() => (isListening ? stopListening() : startListening())}
disabled={$isProcessing}
>
{#if isListening}
⏹️ Stoppen
{:else}
▶️ Gespräch starten
{/if}
</button>
{/if}
</div>
<!-- Transkription -->
{#if currentTranscript}
<div class="transcript">
<span class="label">Du:</span>
<span class="text">{currentTranscript}</span>
</div>
{/if}
<!-- TTS-Status -->
{#if isSpeaking}
<div class="speaking-indicator">
<span class="speaking-pulse"></span>
Claude spricht...
<button class="interrupt-btn" on:click={stopSpeaking}>⏹️</button>
</div>
{/if}
<!-- Stimmen-Auswahl -->
<div class="voice-select">
<label for="voice">Claudes Stimme:</label>
<select id="voice" bind:value={selectedVoice}>
{#each availableVoices as voice}
<option value={voice.id}>{voice.name} - {voice.description}</option>
{/each}
</select>
</div>
</div>
{:else}
<div class="setup-hint">
<p>Setze <code>OPENAI_API_KEY</code> Umgebungsvariable für Sprachsteuerung.</p>
<p class="alt">Oder warte auf lokale Whisper/Piper Integration.</p>
</div>
{/if}
</div>
<style>
.voice-panel {
padding: var(--spacing-md);
background: var(--bg-secondary);
border-radius: var(--radius-lg);
border: 1px solid var(--border);
}
.voice-header {
display: flex;
align-items: center;
gap: var(--spacing-sm);
margin-bottom: var(--spacing-md);
}
.voice-header h3 {
margin: 0;
font-size: 1rem;
}
.badge.warning {
background: rgba(245, 158, 11, 0.2);
color: #f59e0b;
padding: 2px 8px;
border-radius: var(--radius-sm);
font-size: 0.7rem;
}
.mode-switch {
display: flex;
gap: var(--spacing-xs);
margin-bottom: var(--spacing-md);
}
.mode-btn {
flex: 1;
padding: var(--spacing-sm);
border: 1px solid var(--border);
background: transparent;
color: var(--text-secondary);
border-radius: var(--radius-md);
cursor: pointer;
font-size: 0.8rem;
transition: all 0.15s ease;
}
.mode-btn.active {
background: var(--accent);
color: white;
border-color: var(--accent);
}
.visualizer-container {
position: relative;
margin-bottom: var(--spacing-md);
}
.visualizer {
width: 100%;
height: 60px;
border-radius: var(--radius-md);
background: var(--bg-tertiary);
}
.listening-indicator {
position: absolute;
top: 50%;
left: 50%;
transform: translate(-50%, -50%);
display: flex;
align-items: center;
gap: var(--spacing-xs);
color: var(--success);
font-size: 0.85rem;
font-weight: 500;
}
.pulse {
width: 10px;
height: 10px;
background: var(--success);
border-radius: 50%;
animation: pulse 1s ease-in-out infinite;
}
@keyframes pulse {
0%,
100% {
opacity: 1;
transform: scale(1);
}
50% {
opacity: 0.5;
transform: scale(1.2);
}
}
.main-control {
text-align: center;
margin-bottom: var(--spacing-md);
}
.voice-btn {
width: 100%;
padding: var(--spacing-md);
border: none;
border-radius: var(--radius-lg);
font-size: 1rem;
font-weight: 600;
cursor: pointer;
transition: all 0.15s ease;
}
.voice-btn.ptt {
background: var(--bg-tertiary);
color: var(--text-primary);
}
.voice-btn.ptt:hover {
background: var(--bg-hover);
}
.voice-btn.ptt.active {
background: var(--success);
color: white;
transform: scale(0.98);
}
.voice-btn.continuous {
background: var(--accent);
color: white;
}
.voice-btn.continuous.active {
background: var(--error);
}
.voice-btn:disabled {
opacity: 0.5;
cursor: not-allowed;
}
.hint {
font-size: 0.75rem;
color: var(--text-secondary);
margin-top: var(--spacing-xs);
}
.transcript {
background: var(--bg-tertiary);
padding: var(--spacing-sm);
border-radius: var(--radius-md);
margin-bottom: var(--spacing-md);
font-size: 0.85rem;
}
.transcript .label {
color: var(--text-secondary);
margin-right: var(--spacing-xs);
}
.transcript .text {
color: var(--text-primary);
}
.speaking-indicator {
display: flex;
align-items: center;
justify-content: center;
gap: var(--spacing-sm);
padding: var(--spacing-sm);
background: rgba(96, 165, 250, 0.1);
border-radius: var(--radius-md);
margin-bottom: var(--spacing-md);
color: var(--accent);
font-size: 0.85rem;
}
.speaking-pulse {
width: 10px;
height: 10px;
background: var(--accent);
border-radius: 50%;
animation: pulse 0.8s ease-in-out infinite;
}
.interrupt-btn {
background: none;
border: none;
cursor: pointer;
font-size: 1rem;
opacity: 0.7;
}
.interrupt-btn:hover {
opacity: 1;
}
.voice-select {
display: flex;
align-items: center;
gap: var(--spacing-sm);
font-size: 0.8rem;
}
.voice-select label {
color: var(--text-secondary);
}
.voice-select select {
flex: 1;
padding: var(--spacing-xs) var(--spacing-sm);
background: var(--bg-tertiary);
border: 1px solid var(--border);
border-radius: var(--radius-md);
color: var(--text-primary);
font-size: 0.8rem;
}
.setup-hint {
text-align: center;
color: var(--text-secondary);
font-size: 0.85rem;
}
.setup-hint code {
background: var(--bg-tertiary);
padding: 2px 6px;
border-radius: 3px;
font-family: var(--font-mono);
}
.setup-hint .alt {
font-size: 0.75rem;
margin-top: var(--spacing-sm);
opacity: 0.7;
}
</style>

View file

@ -14,6 +14,7 @@
import PerformancePanel from '$lib/components/PerformancePanel.svelte';
import HooksPanel from '$lib/components/HooksPanel.svelte';
import ProgramsPanel from '$lib/components/ProgramsPanel.svelte';
import VoicePanel from '$lib/components/VoicePanel.svelte';
let activeMiddleTab = 'activity';
let activeRightTab = 'agents';
@ -30,6 +31,7 @@
const rightTabs = [
{ id: 'agents', label: 'Agents', icon: '🤖' },
{ id: 'voice', label: 'Sprache', icon: '🎤' },
{ id: 'context', label: 'Context', icon: '📌' },
{ id: 'hooks', label: 'Hooks', icon: '🪝' },
{ id: 'guards', label: 'Guard-Rails', icon: '🛡️' },
@ -108,6 +110,8 @@
<div class="panel-content">
{#if activeRightTab === 'agents'}
<AgentView />
{:else if activeRightTab === 'voice'}
<VoicePanel />
{:else if activeRightTab === 'context'}
<ContextPanel />
{:else if activeRightTab === 'hooks'}