feat: Textanalyse fuer KB-Hints — Konzept-Erkennung + Bigrams + Chat-Kontext [appimage]
All checks were successful
Build AppImage / build (push) Successful in 8m32s
All checks were successful
Build AppImage / build (push) Successful in 8m32s
- Konzept-Map: ~60 deutsche Phrasen → technische Suchbegriffe
("Nachrichten falsch rum" → "message sort chronological")
- Bigram-Extraktion: benachbarte Content-Woerter als Phrase
- Chat-Kontext: letzte 3 User-Nachrichten fliessen in die Suche ein
- Erweiterte Tech-Terms (~40) und Stoppwort-Liste
- Keywords max 12 statt 8, Konzepte haben Vorrang
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
09a9513983
commit
dcc42ee903
3 changed files with 234 additions and 41 deletions
|
|
@ -681,7 +681,26 @@ pub async fn send_message(app: AppHandle, message: String) -> Result<String, Str
|
|||
}
|
||||
|
||||
// Schicht 2: KB-Hints aus Wissensbasis laden (fehlertolerant)
|
||||
match knowledge::search_knowledge_internal(&message, 5).await {
|
||||
// Chat-Kontext: letzte 3 User-Nachrichten für bessere Themen-Erkennung
|
||||
let chat_context = {
|
||||
let mut ctx = message.clone();
|
||||
if let Some(db_state) = app.try_state::<Arc<Mutex<db::Database>>>() {
|
||||
if let Ok(db) = db_state.lock() {
|
||||
if let Ok(Some(session)) = db.get_active_session() {
|
||||
if let Ok(recent) = db.load_recent_user_messages(&session.id, 3) {
|
||||
// Ältere Nachrichten anhängen (die neueste ist die aktuelle)
|
||||
for older_msg in recent.iter().skip(1) {
|
||||
ctx.push(' ');
|
||||
// Nur die ersten 200 Zeichen jeder älteren Nachricht
|
||||
ctx.push_str(&safe_truncate(older_msg, 200));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
ctx
|
||||
};
|
||||
match knowledge::search_knowledge_internal(&chat_context, 5).await {
|
||||
Ok(hints) if !hints.is_empty() => {
|
||||
// Hints an bestehenden Context anhängen oder neuen erstellen
|
||||
let ctx = context.get_or_insert_with(String::new);
|
||||
|
|
|
|||
|
|
@ -743,6 +743,19 @@ impl Database {
|
|||
Ok(messages)
|
||||
}
|
||||
|
||||
/// Letzte N User-Nachrichten einer Session laden (für KB-Kontext-Analyse)
|
||||
pub fn load_recent_user_messages(&self, session_id: &str, limit: usize) -> SqlResult<Vec<String>> {
|
||||
let mut stmt = self.conn.prepare(
|
||||
"SELECT content FROM messages
|
||||
WHERE session_id = ?1 AND role = 'user'
|
||||
ORDER BY timestamp DESC LIMIT ?2"
|
||||
)?;
|
||||
let messages = stmt.query_map(params![session_id, limit as i64], |row| {
|
||||
row.get::<_, String>(0)
|
||||
})?.collect::<SqlResult<Vec<_>>>()?;
|
||||
Ok(messages)
|
||||
}
|
||||
|
||||
/// Löscht alle Nachrichten einer Session
|
||||
pub fn clear_messages(&self, session_id: &str) -> SqlResult<()> {
|
||||
self.conn.execute("DELETE FROM messages WHERE session_id = ?1", params![session_id])?;
|
||||
|
|
|
|||
|
|
@ -184,58 +184,219 @@ const STOP_WORDS: &[&str] = &[
|
|||
"bitte", "danke", "okay", "alles", "nächste", "mach", "zeig", "gib",
|
||||
"mir", "dir", "uns", "hier", "dort", "jetzt", "gerade", "einfach",
|
||||
"phase", "feature", "erstelle", "implementiere", "baue",
|
||||
"hast", "hatte", "hätte", "wäre", "gibt", "geht", "ging", "gehen",
|
||||
"könntest", "könnten", "sollten", "müssten", "dürfen",
|
||||
"immer", "wieder", "eigentlich", "wirklich", "halt", "eben",
|
||||
"also", "dabei", "dafür", "dagegen", "daran", "darauf", "daraus",
|
||||
"davon", "dazu", "deshalb", "deswegen", "trotzdem", "außerdem",
|
||||
"irgendwie", "irgendwas", "irgendwann", "vielleicht", "ziemlich",
|
||||
"schau", "guck", "check", "prüf", "teste", "versuch",
|
||||
"können", "müssen", "sollen", "wollen", "dürfen",
|
||||
"noch", "schon", "gerade", "gleich", "erstmal", "nochmal",
|
||||
];
|
||||
|
||||
/// Extrahiert relevante Keywords aus einer User-Nachricht
|
||||
/// Filtert Stoppwörter und kurze Wörter raus, gibt die besten Suchbegriffe zurück
|
||||
/// Phase 3.1: Erkennt auch Projektnamen und technische Terme aus Dateipfaden
|
||||
pub fn extract_keywords(message: &str) -> Vec<String> {
|
||||
let mut unique: Vec<String> = Vec::new();
|
||||
// ============ Konzept-Erkennung (wie Google/Facebook Textanalyse) ============
|
||||
//
|
||||
// Statt nur Einzelwörter zu extrahieren, erkennen wir KONZEPTE:
|
||||
// "Nachrichten kommen falsch rum" → Konzept: "message sort chronological"
|
||||
// "Seite lädt langsam" → Konzept: "performance loading speed"
|
||||
// "Button tut nichts wenn ich drücke" → Konzept: "click event handler"
|
||||
//
|
||||
// Drei Schichten:
|
||||
// 1. Konzept-Map: Deutsche Phrasen/Wörter → technische Suchbegriffe
|
||||
// 2. Bigrams: Aufeinanderfolgende Content-Wörter als Phrase
|
||||
// 3. Einzelwörter: Fallback für alles was nicht als Konzept erkannt wird
|
||||
|
||||
// Projekt-Name aus Pfad extrahieren (z.B. /mnt/.../Projekte/Leckerbuch/... → leckerbuch)
|
||||
if let Some(proj) = detect_project(message) {
|
||||
if !unique.contains(&proj) {
|
||||
unique.push(proj);
|
||||
}
|
||||
}
|
||||
/// Konzept-Map: Erkennt Themen aus natürlicher Sprache (DE + EN gemischt)
|
||||
/// Jeder Eintrag: (Trigger-Wörter die ALLE vorkommen müssen, Suchbegriffe für KB)
|
||||
/// Die Trigger werden case-insensitive gegen den Gesamttext geprüft.
|
||||
const CONCEPT_MAP: &[(&[&str], &[&str])] = &[
|
||||
// === UI / Frontend ===
|
||||
(&["scroll"], &["scroll auto-scroll sticky"]),
|
||||
(&["nachrichten", "reihenfolge"], &["message sort chronological timestamp"]),
|
||||
(&["nachrichten", "falsch"], &["message order sort"]),
|
||||
(&["nachrichten", "sortier"], &["message sort chronological"]),
|
||||
(&["chronolog"], &["chronological sort timestamp order"]),
|
||||
(&["button", "klick"], &["click event handler button"]),
|
||||
(&["button", "tut"], &["click event handler disabled"]),
|
||||
(&["lädt", "langsam"], &["performance loading speed"]),
|
||||
(&["seite", "langsam"], &["performance loading speed"]),
|
||||
(&["anzeig", "falsch"], &["display render bug"]),
|
||||
(&["layout", "kaputt"], &["layout css broken"]),
|
||||
(&["responsiv"], &["responsive mobile layout breakpoint"]),
|
||||
(&["dark", "mode"], &["theme dark mode css"]),
|
||||
(&["theme", "farb"], &["theme css color variable"]),
|
||||
(&["css", "variab"], &["css variable custom property"]),
|
||||
(&["modal", "dialog"], &["modal dialog popup"]),
|
||||
(&["tastatur", "shortcut"], &["keyboard shortcut hotkey"]),
|
||||
(&["font", "schrift"], &["font size typography"]),
|
||||
|
||||
// Technische Terme die als Ganzes erhalten bleiben sollen
|
||||
let lower = message.to_lowercase();
|
||||
let tech_terms = [
|
||||
"mysql", "docker", "tauri", "svelte", "rust", "cargo", "nixos",
|
||||
"forgejo", "portainer", "claude-bridge", "wissensbasis", "knowledge",
|
||||
// === Chat / Messages ===
|
||||
(&["chat", "scroll"], &["chat scroll auto-scroll message"]),
|
||||
(&["streaming"], &["streaming token event bridge"]),
|
||||
(&["session", "wechsel"], &["session switch load reset"]),
|
||||
(&["session", "laden"], &["session load messages restore"]),
|
||||
(&["hints"], &["knowledge hints kb search"]),
|
||||
(&["wissens"], &["knowledge kb search hint"]),
|
||||
(&["context", "window"], &["context window token limit compact"]),
|
||||
(&["token", "limit"], &["context window token limit"]),
|
||||
|
||||
// === Bridge / Backend ===
|
||||
(&["bridge", "connect"], &["bridge socket connection"]),
|
||||
(&["bridge", "crash"], &["bridge error crash restart"]),
|
||||
(&["socket"], &["unix socket bridge connection"]),
|
||||
(&["epipe"], &["epipe broken pipe bridge"]),
|
||||
(&["timeout"], &["timeout connection async"]),
|
||||
|
||||
// === Datenbank ===
|
||||
(&["datenbank", "fehler"], &["database error query sql"]),
|
||||
(&["sql", "fehler"], &["sql error query syntax"]),
|
||||
(&["migration"], &["database migration schema alter"]),
|
||||
(&["tabelle", "anleg"], &["database table create schema"]),
|
||||
(&["spalte", "hinzufüg"], &["database column alter add"]),
|
||||
|
||||
// === Build / Deploy / CI ===
|
||||
(&["build", "fehler"], &["build error compile"]),
|
||||
(&["appimage"], &["appimage build linux package"]),
|
||||
(&["pipeline"], &["ci cd pipeline forgejo workflow"]),
|
||||
(&["deploy"], &["deploy production release"]),
|
||||
(&["container"], &["docker container"]),
|
||||
|
||||
// === Dolibarr ===
|
||||
(&["rechnung"], &["invoice facture dolibarr"]),
|
||||
(&["angebot"], &["proposal propal dolibarr"]),
|
||||
(&["lieferant"], &["supplier fournisseur dolibarr"]),
|
||||
(&["artikel", "produkt"], &["product article dolibarr"]),
|
||||
(&["lager", "bestand"], &["stock warehouse inventory"]),
|
||||
(&["modul", "aktivier"], &["module activate enable dolibarr"]),
|
||||
(&["berecht"], &["permission rights dolibarr"]),
|
||||
(&["hook", "trigger"], &["hook trigger dolibarr event"]),
|
||||
(&["pdf", "generat"], &["pdf generation template dolibarr"]),
|
||||
(&["extrafield"], &["extrafield custom field dolibarr"]),
|
||||
|
||||
// === Netzwerk / Server ===
|
||||
(&["cors"], &["cors origin header access-control"]),
|
||||
(&["ssl", "zertifikat"], &["ssl tls certificate https"]),
|
||||
(&["proxy"], &["proxy reverse nginx forward"]),
|
||||
(&["port", "blockiert"], &["port blocked firewall"]),
|
||||
(&["dns"], &["dns domain nameserver"]),
|
||||
|
||||
// === Fehlersuche ===
|
||||
(&["fehler", "log"], &["error log debug trace"]),
|
||||
(&["crash"], &["crash error panic segfault"]),
|
||||
(&["speicher", "voll"], &["memory leak oom"]),
|
||||
(&["hängt"], &["hang freeze deadlock"]),
|
||||
(&["endlos"], &["infinite loop recursion"]),
|
||||
(&["leer", "seite"], &["blank page empty render"]),
|
||||
(&["weiß", "seite"], &["blank page white screen error"]),
|
||||
(&["404"], &["not found 404 route missing"]),
|
||||
(&["500"], &["server error 500 internal"]),
|
||||
(&["permiss"], &["permission denied access rights"]),
|
||||
];
|
||||
|
||||
/// Technische Terme die als Ganzes erkannt werden (case-insensitive)
|
||||
const TECH_TERMS: &[&str] = &[
|
||||
"mysql", "mariadb", "sqlite", "postgres",
|
||||
"docker", "podman", "portainer",
|
||||
"tauri", "svelte", "sveltekit", "vite", "typescript",
|
||||
"rust", "cargo", "tokio",
|
||||
"nixos", "nix-shell",
|
||||
"forgejo", "gitea", "git",
|
||||
"claude-bridge", "claude-desktop",
|
||||
"wissensbasis", "knowledge",
|
||||
"webhook", "api", "cors", "jwt", "sse", "websocket",
|
||||
];
|
||||
for term in &tech_terms {
|
||||
if lower.contains(term) && !unique.contains(&term.to_string()) {
|
||||
unique.push(term.to_string());
|
||||
"appimage", "dpkg", "rpm",
|
||||
"pwa", "service-worker", "capacitor",
|
||||
"nginx", "apache", "caddy",
|
||||
"ollama", "whisper", "piper",
|
||||
"dolibarr", "extrafield", "hook",
|
||||
"n8n", "home-assistant", "shelly",
|
||||
"fints", "hbci", "sepa",
|
||||
"paneforge", "resizeobserver", "intersectionobserver",
|
||||
"fulltext", "fts5",
|
||||
];
|
||||
|
||||
/// Extrahiert relevante Suchbegriffe aus einer User-Nachricht.
|
||||
///
|
||||
/// Drei Analyse-Schichten (wie kommerzielle Text-Analyse):
|
||||
/// 1. **Konzept-Erkennung**: Phrasen/Ideen → technische Suchbegriffe
|
||||
/// 2. **Bigrams**: Benachbarte Content-Wörter als Phrase ("auto scroll", "session wechsel")
|
||||
/// 3. **Einzelwörter**: Verbleibende relevante Terme
|
||||
///
|
||||
/// Gibt max 12 Keywords zurück, Konzepte haben Vorrang.
|
||||
pub fn extract_keywords(message: &str) -> Vec<String> {
|
||||
let lower = message.to_lowercase();
|
||||
let mut result: Vec<String> = Vec::new();
|
||||
|
||||
// --- Schicht 0: Projekt-Erkennung ---
|
||||
if let Some(proj) = detect_project(message) {
|
||||
push_unique(&mut result, &proj);
|
||||
}
|
||||
|
||||
// --- Schicht 1: Konzept-Erkennung ---
|
||||
// Prüfe ob Trigger-Wörter im Text vorkommen → füge Suchbegriffe hinzu
|
||||
for (triggers, search_terms) in CONCEPT_MAP {
|
||||
let all_match = triggers.iter().all(|trigger| lower.contains(trigger));
|
||||
if all_match {
|
||||
for term in search_terms.iter() {
|
||||
// Suchbegriffe können Leerzeichen-getrennte Wörter sein
|
||||
for word in term.split_whitespace() {
|
||||
push_unique(&mut result, word);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Normale Wort-Extraktion
|
||||
let words: Vec<String> = message
|
||||
.to_lowercase()
|
||||
.replace(|c: char| !c.is_alphanumeric() && c != '-' && c != '_' && c != '.', " ")
|
||||
.split_whitespace()
|
||||
// --- Schicht 1b: Technische Terme ---
|
||||
for term in TECH_TERMS {
|
||||
if lower.contains(term) {
|
||||
push_unique(&mut result, term);
|
||||
}
|
||||
}
|
||||
|
||||
// --- Schicht 2: Bigrams aus Content-Wörtern ---
|
||||
let content_words: Vec<&str> = extract_content_words(&lower);
|
||||
for pair in content_words.windows(2) {
|
||||
let bigram = format!("{} {}", pair[0], pair[1]);
|
||||
// Bigrams nur wenn beide Wörter > 3 Zeichen und nicht schon als Konzept erkannt
|
||||
if pair[0].len() > 3 && pair[1].len() > 3 {
|
||||
push_unique(&mut result, &bigram);
|
||||
}
|
||||
}
|
||||
|
||||
// --- Schicht 3: Einzelwörter (Fallback) ---
|
||||
for word in &content_words {
|
||||
if word.len() >= 3 {
|
||||
push_unique(&mut result, word);
|
||||
}
|
||||
}
|
||||
|
||||
// Max 12 Keywords — Konzepte stehen vorne und haben damit Vorrang
|
||||
result.truncate(12);
|
||||
|
||||
println!("🔎 Keywords extrahiert ({}): {:?}", result.len(),
|
||||
result.iter().take(6).cloned().collect::<Vec<_>>());
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Content-Wörter extrahieren: alles was kein Stoppwort und keine Zahl ist
|
||||
fn extract_content_words(text: &str) -> Vec<&str> {
|
||||
text.split(|c: char| !c.is_alphanumeric() && c != '-' && c != '_')
|
||||
.filter(|w| {
|
||||
w.len() >= 3
|
||||
&& !STOP_WORDS.contains(&w.as_ref())
|
||||
w.len() >= 2
|
||||
&& !STOP_WORDS.contains(w)
|
||||
&& !w.chars().all(|c| c.is_numeric())
|
||||
})
|
||||
.map(|w| w.to_string())
|
||||
.collect();
|
||||
.collect()
|
||||
}
|
||||
|
||||
// Deduplizieren und max 8 Keywords behalten (mehr als vorher wegen Session-Kontext)
|
||||
for w in words {
|
||||
if !unique.contains(&w) {
|
||||
unique.push(w);
|
||||
/// Fügt ein Keyword hinzu wenn es noch nicht vorhanden ist
|
||||
fn push_unique(vec: &mut Vec<String>, item: &str) {
|
||||
let s = item.to_string();
|
||||
if !vec.contains(&s) {
|
||||
vec.push(s);
|
||||
}
|
||||
if unique.len() >= 8 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
unique
|
||||
}
|
||||
|
||||
/// Wissenseintrag aus der knowledge-Tabelle
|
||||
|
|
|
|||
Loading…
Reference in a new issue