Compare commits
No commits in common. "013b037322890d267660195cefa4558e1860b032" and "87daee85f997d00c822c0a211e79d6fbca75c1a7" have entirely different histories.
013b037322
...
87daee85f9
39 changed files with 0 additions and 4839 deletions
15
.env.example
15
.env.example
|
|
@ -1,15 +0,0 @@
|
|||
# Dateiverwaltung Umgebungsvariablen
|
||||
# Kopiere diese Datei nach .env und passe sie an
|
||||
|
||||
# Datenbank
|
||||
DATABASE_URL=sqlite:///./data/dateiverwaltung.db
|
||||
|
||||
# Zeitzone
|
||||
TZ=Europe/Berlin
|
||||
|
||||
# OCR Einstellungen
|
||||
OCR_LANGUAGE=deu
|
||||
OCR_DPI=300
|
||||
|
||||
# Optional: Claude API für KI-Validierung (spätere Erweiterung)
|
||||
# CLAUDE_API_KEY=sk-ant-...
|
||||
42
Dockerfile
42
Dockerfile
|
|
@ -1,42 +0,0 @@
|
|||
# Dateiverwaltung Docker Image
|
||||
FROM python:3.11-slim
|
||||
|
||||
# System-Abhängigkeiten für OCR und PDF
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
tesseract-ocr \
|
||||
tesseract-ocr-deu \
|
||||
ocrmypdf \
|
||||
poppler-utils \
|
||||
ghostscript \
|
||||
libmagic1 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Arbeitsverzeichnis
|
||||
WORKDIR /app
|
||||
|
||||
# Python-Abhängigkeiten
|
||||
COPY backend/requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Anwendung kopieren
|
||||
COPY backend/ ./backend/
|
||||
COPY frontend/ ./frontend/
|
||||
COPY config/ ./config/
|
||||
COPY regeln/ ./regeln/
|
||||
|
||||
# Daten-Verzeichnis
|
||||
RUN mkdir -p /app/data/inbox /app/data/processed /app/data/archive /app/data/zugferd
|
||||
|
||||
# Umgebungsvariablen
|
||||
ENV PYTHONPATH=/app
|
||||
ENV DATABASE_URL=sqlite:////app/data/dateiverwaltung.db
|
||||
|
||||
# Port
|
||||
EXPOSE 8000
|
||||
|
||||
# Health Check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --retries=3 \
|
||||
CMD curl -f http://localhost:8000/health || exit 1
|
||||
|
||||
# Start
|
||||
CMD ["uvicorn", "backend.app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
147
README.md
147
README.md
|
|
@ -1,149 +1,2 @@
|
|||
<<<<<<< HEAD
|
||||
# docker.dateiverwaltung
|
||||
|
||||
=======
|
||||
# Dateiverwaltung
|
||||
|
||||
Modulares Dokumenten-Management-System für automatische Verarbeitung, Sortierung und Benennung von Dokumenten.
|
||||
|
||||
## Features
|
||||
|
||||
- **Mail-Abruf**: Automatischer Abruf von Attachments aus IMAP-Postfächern
|
||||
- **PDF-Verarbeitung**: Text-Extraktion und OCR für gescannte Dokumente
|
||||
- **ZUGFeRD-Erkennung**: Automatische Erkennung und separate Ablage von ZUGFeRD-Rechnungen
|
||||
- **Regel-Engine**: Flexible, erweiterbare Regeln für Erkennung und Benennung
|
||||
- **Pipeline-System**: Mehrere unabhängige Pipelines (Firma, Privat, etc.)
|
||||
|
||||
## Schnellstart
|
||||
|
||||
### Mit Docker (empfohlen)
|
||||
|
||||
```bash
|
||||
# Image bauen und starten
|
||||
docker-compose up -d
|
||||
|
||||
# Logs ansehen
|
||||
docker-compose logs -f
|
||||
|
||||
# Stoppen
|
||||
docker-compose down
|
||||
```
|
||||
|
||||
Dann im Browser öffnen: http://localhost:8000
|
||||
|
||||
### Ohne Docker
|
||||
|
||||
```bash
|
||||
# Virtuelle Umgebung erstellen
|
||||
cd backend
|
||||
python -m venv venv
|
||||
source venv/bin/activate # Linux/Mac
|
||||
# oder: venv\Scripts\activate # Windows
|
||||
|
||||
# Abhängigkeiten installieren
|
||||
pip install -r requirements.txt
|
||||
|
||||
# Starten
|
||||
uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
|
||||
```
|
||||
|
||||
## Benennungsschema
|
||||
|
||||
### Wiederkehrende Dokumente (Rechnungen)
|
||||
```
|
||||
{Jahr}.{Monat}.{Tag} - {Kategorie} - {Ersteller} - {Dokumentennummer} - {Sammelbegriff} - {Preis} EUR.pdf
|
||||
|
||||
Beispiel:
|
||||
2026.02.01 - Rechnung - Sonepar - 10023934 - Material - 1600 EUR.pdf
|
||||
```
|
||||
|
||||
### Einmalige Dokumente (Verträge, Zeugnisse)
|
||||
```
|
||||
{Typ} - {Aussteller} - {Beschreibung} - {Jahr}.pdf
|
||||
|
||||
Beispiel:
|
||||
Zeugnis - Schule X - Grundschulzeugnis - 2026.pdf
|
||||
```
|
||||
|
||||
## Projektstruktur
|
||||
|
||||
```
|
||||
dateiverwaltung/
|
||||
├── backend/
|
||||
│ ├── app/
|
||||
│ │ ├── models/ # Datenbank-Modelle
|
||||
│ │ ├── modules/ # Kernmodule (Mail, PDF, Sorter)
|
||||
│ │ ├── routes/ # API Endpoints
|
||||
│ │ ├── services/ # Business Logic
|
||||
│ │ └── main.py # FastAPI App
|
||||
│ └── requirements.txt
|
||||
├── frontend/
|
||||
│ ├── static/
|
||||
│ │ ├── css/
|
||||
│ │ └── js/
|
||||
│ └── templates/
|
||||
├── data/ # Persistente Daten
|
||||
│ ├── inbox/ # Neue Dateien
|
||||
│ ├── processed/ # Verarbeitete Dateien
|
||||
│ ├── archive/ # Sortierte Dateien
|
||||
│ └── zugferd/ # ZUGFeRD-Rechnungen
|
||||
├── regeln/ # Regel-Beispiele
|
||||
├── docker-compose.yml
|
||||
├── Dockerfile
|
||||
└── README.md
|
||||
```
|
||||
|
||||
## Module
|
||||
|
||||
### Mail-Fetcher
|
||||
Holt Attachments aus IMAP-Postfächern mit konfigurierbaren Filtern:
|
||||
- Dateitypen (.pdf, .jpg, etc.)
|
||||
- Maximale Größe
|
||||
- IMAP-Ordner
|
||||
|
||||
### PDF-Processor
|
||||
- **Text-Extraktion**: Mit pdfplumber/pypdf
|
||||
- **OCR**: Mit ocrmypdf + Tesseract (deutsch)
|
||||
- **ZUGFeRD**: Erkennung via factur-x Library
|
||||
|
||||
### Sorter
|
||||
Regelbasierte Erkennung und Benennung:
|
||||
- Pattern-Matching (Text, Absender, Dateiname)
|
||||
- Regex-basierte Feldextraktion
|
||||
- Konfigurierbares Namensschema
|
||||
|
||||
## API Endpoints
|
||||
|
||||
| Methode | Endpoint | Beschreibung |
|
||||
|---------|----------|--------------|
|
||||
| GET | /api/pipelines | Alle Pipelines |
|
||||
| POST | /api/pipelines | Neue Pipeline |
|
||||
| POST | /api/pipelines/{id}/run | Pipeline ausführen |
|
||||
| GET | /api/pipelines/{id}/mail-configs | Mail-Konfigurationen |
|
||||
| POST | /api/pipelines/{id}/mail-configs | Postfach hinzufügen |
|
||||
| GET | /api/pipelines/{id}/regeln | Sortier-Regeln |
|
||||
| POST | /api/pipelines/{id}/regeln | Regel hinzufügen |
|
||||
| POST | /api/regeln/test | Regel testen |
|
||||
| GET | /api/dokumente | Verarbeitete Dokumente |
|
||||
| GET | /api/stats | Statistiken |
|
||||
|
||||
## Regex-Beispiele für Regeln
|
||||
|
||||
```yaml
|
||||
# Datum (DD.MM.YYYY)
|
||||
(\d{2}[./]\d{2}[./]\d{4})
|
||||
|
||||
# Rechnungsnummer
|
||||
(?:Rechnungsnummer|Invoice)[:\s]*(\d+)
|
||||
|
||||
# Betrag mit EUR
|
||||
(?:Gesamtbetrag|Summe)[:\s]*([\d.,]+)\s*(?:EUR|€)
|
||||
```
|
||||
|
||||
## Erweiterungen (geplant)
|
||||
|
||||
- [ ] Claude API Integration für KI-Validierung
|
||||
- [ ] Scheduler für automatische Ausführung
|
||||
- [ ] Dolibarr-Integration
|
||||
- [ ] Dashboard mit Grafiken
|
||||
>>>>>>> 8585cc3 (Dateiverwaltung Email attachment abruf läuft)
|
||||
|
|
|
|||
|
|
@ -1 +0,0 @@
|
|||
# Dateiverwaltung - Modulares Dokumenten-Management-System
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -1,26 +0,0 @@
|
|||
"""Zentrale Konfiguration"""
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
# Basis-Pfade
|
||||
BASE_DIR = Path(__file__).parent.parent.parent
|
||||
DATA_DIR = BASE_DIR / "data"
|
||||
CONFIG_DIR = BASE_DIR / "config"
|
||||
REGELN_DIR = BASE_DIR / "regeln"
|
||||
|
||||
# Datenbank
|
||||
DATABASE_URL = os.getenv("DATABASE_URL", f"sqlite:///{DATA_DIR}/dateiverwaltung.db")
|
||||
|
||||
# Ordner-Struktur
|
||||
INBOX_DIR = DATA_DIR / "inbox"
|
||||
PROCESSED_DIR = DATA_DIR / "processed"
|
||||
ARCHIVE_DIR = DATA_DIR / "archive"
|
||||
ZUGFERD_DIR = DATA_DIR / "zugferd"
|
||||
|
||||
# OCR Einstellungen
|
||||
OCR_LANGUAGE = "deu" # Deutsch
|
||||
OCR_DPI = 300
|
||||
|
||||
# Erstelle Ordner falls nicht vorhanden
|
||||
for dir_path in [INBOX_DIR, PROCESSED_DIR, ARCHIVE_DIR, ZUGFERD_DIR, REGELN_DIR]:
|
||||
dir_path.mkdir(parents=True, exist_ok=True)
|
||||
|
|
@ -1,62 +0,0 @@
|
|||
"""
|
||||
Dateiverwaltung - Modulares Dokumenten-Management-System
|
||||
Hauptanwendung mit FastAPI
|
||||
"""
|
||||
from fastapi import FastAPI
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.templating import Jinja2Templates
|
||||
from fastapi.responses import HTMLResponse
|
||||
from fastapi import Request
|
||||
from pathlib import Path
|
||||
import logging
|
||||
|
||||
from .models import init_db
|
||||
from .routes.api import router as api_router
|
||||
from .config import BASE_DIR
|
||||
|
||||
# Logging konfigurieren
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||
)
|
||||
|
||||
# App erstellen
|
||||
app = FastAPI(
|
||||
title="Dateiverwaltung",
|
||||
description="Modulares Dokumenten-Management-System",
|
||||
version="1.0.0"
|
||||
)
|
||||
|
||||
# Statische Dateien
|
||||
frontend_dir = BASE_DIR / "frontend"
|
||||
app.mount("/static", StaticFiles(directory=frontend_dir / "static"), name="static")
|
||||
|
||||
# Templates
|
||||
templates = Jinja2Templates(directory=frontend_dir / "templates")
|
||||
|
||||
# API Router
|
||||
app.include_router(api_router)
|
||||
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup():
|
||||
"""Initialisierung beim Start"""
|
||||
init_db()
|
||||
logging.info("Datenbank initialisiert")
|
||||
|
||||
|
||||
@app.get("/", response_class=HTMLResponse)
|
||||
async def index(request: Request):
|
||||
"""Hauptseite"""
|
||||
return templates.TemplateResponse("index.html", {"request": request})
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health():
|
||||
"""Health Check für Docker"""
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run(app, host="0.0.0.0", port=8000)
|
||||
|
|
@ -1,4 +0,0 @@
|
|||
from .database import (
|
||||
Postfach, QuellOrdner, SortierRegel, VerarbeiteteDatei,
|
||||
init_db, get_db, SessionLocal
|
||||
)
|
||||
Binary file not shown.
Binary file not shown.
|
|
@ -1,161 +0,0 @@
|
|||
"""Datenbank-Modelle - Getrennte Bereiche: Mail-Abruf und Datei-Sortierung"""
|
||||
from sqlalchemy import create_engine, Column, Integer, String, Boolean, DateTime, Text, JSON
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from datetime import datetime
|
||||
|
||||
from ..config import DATABASE_URL
|
||||
|
||||
engine = create_engine(DATABASE_URL, echo=False)
|
||||
SessionLocal = sessionmaker(bind=engine)
|
||||
Base = declarative_base()
|
||||
|
||||
|
||||
# ============ BEREICH 1: Mail-Abruf ============
|
||||
|
||||
class Postfach(Base):
|
||||
"""IMAP-Postfach Konfiguration"""
|
||||
__tablename__ = "postfaecher"
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
name = Column(String(100), nullable=False)
|
||||
|
||||
# IMAP
|
||||
imap_server = Column(String(255), nullable=False)
|
||||
imap_port = Column(Integer, default=993)
|
||||
email = Column(String(255), nullable=False)
|
||||
passwort = Column(String(255), nullable=False)
|
||||
ordner = Column(String(100), default="INBOX")
|
||||
alle_ordner = Column(Boolean, default=False) # Alle IMAP-Ordner durchsuchen
|
||||
nur_ungelesen = Column(Boolean, default=False) # Nur ungelesene Mails (False = alle)
|
||||
|
||||
# Ziel
|
||||
ziel_ordner = Column(String(500), nullable=False)
|
||||
|
||||
# Filter
|
||||
erlaubte_typen = Column(JSON, default=lambda: [".pdf"])
|
||||
max_groesse_mb = Column(Integer, default=25)
|
||||
|
||||
# Status
|
||||
aktiv = Column(Boolean, default=True)
|
||||
letzter_abruf = Column(DateTime)
|
||||
letzte_anzahl = Column(Integer, default=0)
|
||||
|
||||
|
||||
# ============ BEREICH 2: Datei-Sortierung ============
|
||||
|
||||
class QuellOrdner(Base):
|
||||
"""Ordner der nach Dateien gescannt wird"""
|
||||
__tablename__ = "quell_ordner"
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
name = Column(String(100), nullable=False)
|
||||
pfad = Column(String(500), nullable=False)
|
||||
ziel_ordner = Column(String(500), nullable=False)
|
||||
rekursiv = Column(Boolean, default=True) # Unterordner einschließen
|
||||
dateitypen = Column(JSON, default=lambda: [".pdf", ".jpg", ".jpeg", ".png", ".tiff"])
|
||||
aktiv = Column(Boolean, default=True)
|
||||
|
||||
|
||||
class SortierRegel(Base):
|
||||
"""Regeln für Datei-Erkennung und Benennung"""
|
||||
__tablename__ = "sortier_regeln"
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
name = Column(String(100), nullable=False)
|
||||
prioritaet = Column(Integer, default=100)
|
||||
aktiv = Column(Boolean, default=True)
|
||||
|
||||
# Erkennungsmuster
|
||||
muster = Column(JSON, default=dict)
|
||||
|
||||
# Extraktion
|
||||
extraktion = Column(JSON, default=dict)
|
||||
|
||||
# Ausgabe
|
||||
schema = Column(String(500), default="{datum} - Dokument.pdf")
|
||||
unterordner = Column(String(100)) # Optional: Unterordner im Ziel
|
||||
|
||||
|
||||
class VerarbeiteteMail(Base):
|
||||
"""Tracking welche Mails bereits verarbeitet wurden"""
|
||||
__tablename__ = "verarbeitete_mails"
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
postfach_id = Column(Integer, nullable=False)
|
||||
message_id = Column(String(500), nullable=False) # Email Message-ID Header
|
||||
ordner = Column(String(200)) # IMAP Ordner
|
||||
betreff = Column(String(500))
|
||||
absender = Column(String(255))
|
||||
anzahl_attachments = Column(Integer, default=0)
|
||||
verarbeitet_am = Column(DateTime, default=datetime.utcnow)
|
||||
|
||||
|
||||
class VerarbeiteteDatei(Base):
|
||||
"""Log verarbeiteter Dateien"""
|
||||
__tablename__ = "verarbeitete_dateien"
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
original_pfad = Column(String(1000))
|
||||
original_name = Column(String(500))
|
||||
neuer_pfad = Column(String(1000))
|
||||
neuer_name = Column(String(500))
|
||||
|
||||
ist_zugferd = Column(Boolean, default=False)
|
||||
ocr_durchgefuehrt = Column(Boolean, default=False)
|
||||
|
||||
status = Column(String(50)) # sortiert, zugferd, fehler, keine_regel
|
||||
fehler = Column(Text)
|
||||
|
||||
extrahierte_daten = Column(JSON)
|
||||
verarbeitet_am = Column(DateTime, default=datetime.utcnow)
|
||||
|
||||
|
||||
def migrate_db():
|
||||
"""Fügt fehlende Spalten hinzu ohne Daten zu löschen"""
|
||||
from sqlalchemy import inspect, text
|
||||
|
||||
inspector = inspect(engine)
|
||||
|
||||
# Migrations-Definitionen: {tabelle: {spalte: sql_typ}}
|
||||
migrations = {
|
||||
"postfaecher": {
|
||||
"alle_ordner": "BOOLEAN DEFAULT 0",
|
||||
"nur_ungelesen": "BOOLEAN DEFAULT 0"
|
||||
},
|
||||
"quell_ordner": {
|
||||
"rekursiv": "BOOLEAN DEFAULT 1",
|
||||
"dateitypen": "JSON"
|
||||
}
|
||||
}
|
||||
|
||||
with engine.connect() as conn:
|
||||
for table, columns in migrations.items():
|
||||
if table not in inspector.get_table_names():
|
||||
continue
|
||||
|
||||
existing = [col["name"] for col in inspector.get_columns(table)]
|
||||
|
||||
for col_name, col_type in columns.items():
|
||||
if col_name not in existing:
|
||||
try:
|
||||
conn.execute(text(f"ALTER TABLE {table} ADD COLUMN {col_name} {col_type}"))
|
||||
conn.commit()
|
||||
print(f"Migration: {table}.{col_name} hinzugefügt")
|
||||
except Exception as e:
|
||||
print(f"Migration übersprungen: {table}.{col_name} - {e}")
|
||||
|
||||
|
||||
def init_db():
|
||||
"""Datenbank initialisieren"""
|
||||
Base.metadata.create_all(engine)
|
||||
migrate_db()
|
||||
|
||||
|
||||
def get_db():
|
||||
"""Database Session Generator"""
|
||||
db = SessionLocal()
|
||||
try:
|
||||
yield db
|
||||
finally:
|
||||
db.close()
|
||||
|
|
@ -1 +0,0 @@
|
|||
# Module für die Pipeline-Verarbeitung
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -1,373 +0,0 @@
|
|||
"""
|
||||
Globale Feld-Extraktoren mit Kaskaden-Regex
|
||||
Werden automatisch als Fallback verwendet wenn regel-spezifische Muster nicht greifen
|
||||
"""
|
||||
import re
|
||||
from datetime import datetime
|
||||
from typing import Optional, List, Dict, Any
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ============ DATUM ============
|
||||
DATUM_MUSTER = [
|
||||
# Mit Kontext (zuverlässiger)
|
||||
{"regex": r"Rechnungsdatum[:\s]*(\d{2})[./](\d{2})[./](\d{4})", "order": "dmy"},
|
||||
{"regex": r"Belegdatum[:\s]*(\d{2})[./](\d{2})[./](\d{4})", "order": "dmy"},
|
||||
{"regex": r"Datum[:\s]*(\d{2})[./](\d{2})[./](\d{4})", "order": "dmy"},
|
||||
{"regex": r"Date[:\s]*(\d{2})[./](\d{2})[./](\d{4})", "order": "dmy"},
|
||||
{"regex": r"vom[:\s]*(\d{2})[./](\d{2})[./](\d{4})", "order": "dmy"},
|
||||
|
||||
# ISO Format
|
||||
{"regex": r"(\d{4})-(\d{2})-(\d{2})", "order": "ymd"},
|
||||
|
||||
# Deutsches Format ohne Kontext
|
||||
{"regex": r"(\d{2})\.(\d{2})\.(\d{4})", "order": "dmy"},
|
||||
{"regex": r"(\d{2})/(\d{2})/(\d{4})", "order": "dmy"},
|
||||
|
||||
# Amerikanisches Format
|
||||
{"regex": r"(\d{2})/(\d{2})/(\d{4})", "order": "mdy"},
|
||||
|
||||
# Ausgeschriebene Monate
|
||||
{"regex": r"(\d{1,2})\.\s*(Januar|Februar|März|April|Mai|Juni|Juli|August|September|Oktober|November|Dezember)\s*(\d{4})", "order": "dMy"},
|
||||
{"regex": r"(\d{1,2})\s+(Jan|Feb|Mär|Apr|Mai|Jun|Jul|Aug|Sep|Okt|Nov|Dez)[a-z]*\.?\s+(\d{4})", "order": "dMy"},
|
||||
]
|
||||
|
||||
MONATE_DE = {
|
||||
"januar": 1, "februar": 2, "märz": 3, "april": 4, "mai": 5, "juni": 6,
|
||||
"juli": 7, "august": 8, "september": 9, "oktober": 10, "november": 11, "dezember": 12,
|
||||
"jan": 1, "feb": 2, "mär": 3, "apr": 4, "jun": 6, "jul": 7, "aug": 8, "sep": 9, "okt": 10, "nov": 11, "dez": 12
|
||||
}
|
||||
|
||||
|
||||
def extrahiere_datum(text: str, spezifische_muster: List[Dict] = None) -> Optional[str]:
|
||||
"""
|
||||
Extrahiert Datum aus Text mit Kaskaden-Ansatz
|
||||
Returns: ISO Format YYYY-MM-DD oder None
|
||||
"""
|
||||
muster_liste = (spezifische_muster or []) + DATUM_MUSTER
|
||||
|
||||
for muster in muster_liste:
|
||||
try:
|
||||
match = re.search(muster["regex"], text, re.IGNORECASE)
|
||||
if match:
|
||||
groups = match.groups()
|
||||
order = muster.get("order", "dmy")
|
||||
|
||||
if order == "dmy":
|
||||
tag, monat, jahr = int(groups[0]), int(groups[1]), int(groups[2])
|
||||
elif order == "ymd":
|
||||
jahr, monat, tag = int(groups[0]), int(groups[1]), int(groups[2])
|
||||
elif order == "mdy":
|
||||
monat, tag, jahr = int(groups[0]), int(groups[1]), int(groups[2])
|
||||
elif order == "dMy":
|
||||
tag = int(groups[0])
|
||||
monat = MONATE_DE.get(groups[1].lower(), 1)
|
||||
jahr = int(groups[2])
|
||||
else:
|
||||
continue
|
||||
|
||||
# Validierung
|
||||
if 1 <= tag <= 31 and 1 <= monat <= 12 and 1900 <= jahr <= 2100:
|
||||
return f"{jahr:04d}-{monat:02d}-{tag:02d}"
|
||||
except Exception as e:
|
||||
logger.debug(f"Datum-Extraktion fehlgeschlagen: {e}")
|
||||
continue
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# ============ BETRAG ============
|
||||
BETRAG_MUSTER = [
|
||||
# Mit Kontext (zuverlässiger)
|
||||
{"regex": r"Gesamtbetrag[:\s]*([\d.,]+)\s*(?:EUR|€)?", "context": True},
|
||||
{"regex": r"Rechnungsbetrag[:\s]*([\d.,]+)\s*(?:EUR|€)?", "context": True},
|
||||
{"regex": r"Endbetrag[:\s]*([\d.,]+)\s*(?:EUR|€)?", "context": True},
|
||||
{"regex": r"Summe[:\s]*([\d.,]+)\s*(?:EUR|€)?", "context": True},
|
||||
{"regex": r"Total[:\s]*([\d.,]+)\s*(?:EUR|€)?", "context": True},
|
||||
{"regex": r"Brutto[:\s]*([\d.,]+)\s*(?:EUR|€)?", "context": True},
|
||||
{"regex": r"zu zahlen[:\s]*([\d.,]+)\s*(?:EUR|€)?", "context": True},
|
||||
{"regex": r"Zahlbetrag[:\s]*([\d.,]+)\s*(?:EUR|€)?", "context": True},
|
||||
|
||||
# Mit Währung (weniger zuverlässig)
|
||||
{"regex": r"([\d.,]+)\s*(?:EUR|€)", "context": False},
|
||||
{"regex": r"€\s*([\d.,]+)", "context": False},
|
||||
]
|
||||
|
||||
|
||||
def extrahiere_betrag(text: str, spezifische_muster: List[Dict] = None) -> Optional[str]:
|
||||
"""
|
||||
Extrahiert Betrag aus Text mit Kaskaden-Ansatz
|
||||
Returns: Formatierter Betrag (z.B. "1234,56") oder None
|
||||
"""
|
||||
muster_liste = (spezifische_muster or []) + BETRAG_MUSTER
|
||||
|
||||
for muster in muster_liste:
|
||||
try:
|
||||
match = re.search(muster["regex"], text, re.IGNORECASE)
|
||||
if match:
|
||||
betrag_str = match.group(1)
|
||||
betrag = _parse_betrag(betrag_str)
|
||||
if betrag is not None and betrag > 0:
|
||||
# Formatierung: Ganzzahl wenn möglich, sonst 2 Dezimalstellen
|
||||
if betrag == int(betrag):
|
||||
return str(int(betrag))
|
||||
return f"{betrag:.2f}".replace(".", ",")
|
||||
except Exception as e:
|
||||
logger.debug(f"Betrag-Extraktion fehlgeschlagen: {e}")
|
||||
continue
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _parse_betrag(betrag_str: str) -> Optional[float]:
|
||||
"""Parst Betrag-String zu Float"""
|
||||
betrag_str = betrag_str.strip()
|
||||
|
||||
# Leerzeichen entfernen
|
||||
betrag_str = betrag_str.replace(" ", "")
|
||||
|
||||
# Deutsches Format: 1.234,56 -> 1234.56
|
||||
if "," in betrag_str and "." in betrag_str:
|
||||
if betrag_str.rfind(",") > betrag_str.rfind("."):
|
||||
# Deutsches Format
|
||||
betrag_str = betrag_str.replace(".", "").replace(",", ".")
|
||||
else:
|
||||
# Englisches Format
|
||||
betrag_str = betrag_str.replace(",", "")
|
||||
elif "," in betrag_str:
|
||||
# Nur Komma: deutsches Dezimaltrennzeichen
|
||||
betrag_str = betrag_str.replace(",", ".")
|
||||
|
||||
try:
|
||||
return float(betrag_str)
|
||||
except:
|
||||
return None
|
||||
|
||||
|
||||
# ============ RECHNUNGSNUMMER ============
|
||||
NUMMER_MUSTER = [
|
||||
# Mit Kontext
|
||||
{"regex": r"Rechnungsnummer[:\s#]*([A-Z0-9][\w\-/]+)", "context": True},
|
||||
{"regex": r"Rechnung\s*Nr\.?[:\s#]*([A-Z0-9][\w\-/]+)", "context": True},
|
||||
{"regex": r"Rechnungs-Nr\.?[:\s#]*([A-Z0-9][\w\-/]+)", "context": True},
|
||||
{"regex": r"Invoice\s*(?:No\.?|Number)?[:\s#]*([A-Z0-9][\w\-/]+)", "context": True},
|
||||
{"regex": r"Beleg-?Nr\.?[:\s#]*([A-Z0-9][\w\-/]+)", "context": True},
|
||||
{"regex": r"Dokumentnummer[:\s#]*([A-Z0-9][\w\-/]+)", "context": True},
|
||||
{"regex": r"Bestell-?Nr\.?[:\s#]*([A-Z0-9][\w\-/]+)", "context": True},
|
||||
{"regex": r"Auftrags-?Nr\.?[:\s#]*([A-Z0-9][\w\-/]+)", "context": True},
|
||||
|
||||
# Typische Formate ohne Kontext
|
||||
{"regex": r"RE-?(\d{4,})", "context": False},
|
||||
{"regex": r"INV-?(\d{4,})", "context": False},
|
||||
]
|
||||
|
||||
|
||||
def extrahiere_nummer(text: str, spezifische_muster: List[Dict] = None) -> Optional[str]:
|
||||
"""
|
||||
Extrahiert Rechnungs-/Belegnummer aus Text
|
||||
"""
|
||||
muster_liste = (spezifische_muster or []) + NUMMER_MUSTER
|
||||
|
||||
for muster in muster_liste:
|
||||
try:
|
||||
match = re.search(muster["regex"], text, re.IGNORECASE)
|
||||
if match:
|
||||
nummer = match.group(1).strip()
|
||||
if len(nummer) >= 3: # Mindestens 3 Zeichen
|
||||
return nummer
|
||||
except Exception as e:
|
||||
logger.debug(f"Nummer-Extraktion fehlgeschlagen: {e}")
|
||||
continue
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# ============ FIRMA/ABSENDER ============
|
||||
FIRMA_MUSTER = [
|
||||
# Absender-Zeile
|
||||
{"regex": r"^([A-ZÄÖÜ][A-Za-zäöüÄÖÜß\s&\-\.]+(?:GmbH|AG|KG|e\.K\.|Inc|Ltd|SE|UG))", "context": True},
|
||||
{"regex": r"Absender[:\s]*([A-Za-zäöüÄÖÜß\s&\-\.]+)", "context": True},
|
||||
{"regex": r"Von[:\s]*([A-Za-zäöüÄÖÜß\s&\-\.]+)", "context": True},
|
||||
]
|
||||
|
||||
# Bekannte Firmen (werden im Text gesucht)
|
||||
BEKANNTE_FIRMEN = [
|
||||
"Sonepar", "Amazon", "Ebay", "MediaMarkt", "Saturn", "Conrad", "Reichelt",
|
||||
"Hornbach", "Bauhaus", "OBI", "Hagebau", "Toom", "Hellweg",
|
||||
"Telekom", "Vodafone", "O2", "1&1",
|
||||
"Allianz", "HUK", "Provinzial", "DEVK", "Gothaer",
|
||||
"IKEA", "Poco", "XXXLutz", "Roller",
|
||||
"Alternate", "Mindfactory", "Caseking", "Notebooksbilliger",
|
||||
"DHL", "DPD", "Hermes", "UPS", "GLS",
|
||||
]
|
||||
|
||||
|
||||
def extrahiere_firma(text: str, absender_email: str = "", spezifische_muster: List[Dict] = None) -> Optional[str]:
|
||||
"""
|
||||
Extrahiert Firmennamen aus Text oder E-Mail-Absender
|
||||
"""
|
||||
text_lower = text.lower()
|
||||
|
||||
# 1. Bekannte Firmen im Text suchen
|
||||
for firma in BEKANNTE_FIRMEN:
|
||||
if firma.lower() in text_lower:
|
||||
return firma
|
||||
|
||||
# 2. Aus E-Mail-Domain extrahieren
|
||||
if absender_email:
|
||||
match = re.search(r"@([\w\-]+)\.", absender_email)
|
||||
if match:
|
||||
domain = match.group(1)
|
||||
# Bekannte Domain-Namen kapitalisieren
|
||||
for firma in BEKANNTE_FIRMEN:
|
||||
if firma.lower() == domain.lower():
|
||||
return firma
|
||||
return domain.capitalize()
|
||||
|
||||
# 3. Regex-Muster
|
||||
muster_liste = (spezifische_muster or []) + FIRMA_MUSTER
|
||||
for muster in muster_liste:
|
||||
try:
|
||||
match = re.search(muster["regex"], text, re.MULTILINE)
|
||||
if match:
|
||||
firma = match.group(1).strip()
|
||||
if len(firma) >= 2:
|
||||
return firma
|
||||
except:
|
||||
continue
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# ============ DOKUMENTTYP ============
|
||||
DOKUMENTTYP_KEYWORDS = {
|
||||
"Rechnung": ["rechnung", "invoice", "faktura", "bill"],
|
||||
"Angebot": ["angebot", "quotation", "quote", "offerte"],
|
||||
"Gutschrift": ["gutschrift", "credit note", "erstattung"],
|
||||
"Mahnung": ["mahnung", "zahlungserinnerung", "payment reminder"],
|
||||
"Lieferschein": ["lieferschein", "delivery note", "packing slip"],
|
||||
"Auftragsbestätigung": ["auftragsbestätigung", "order confirmation", "bestellbestätigung"],
|
||||
"Vertrag": ["vertrag", "contract", "vereinbarung"],
|
||||
"Versicherungsschein": ["versicherungsschein", "police", "versicherungspolice"],
|
||||
"Zeugnis": ["zeugnis", "certificate", "zertifikat"],
|
||||
"Bescheinigung": ["bescheinigung", "nachweis", "bestätigung"],
|
||||
"Kontoauszug": ["kontoauszug", "account statement", "bankbeleg"],
|
||||
"Beitragsrechnung": ["beitragsrechnung", "beitragsberechnung", "mitgliedsbeitrag"],
|
||||
}
|
||||
|
||||
|
||||
def extrahiere_dokumenttyp(text: str, dateiname: str = "") -> Optional[str]:
|
||||
"""
|
||||
Erkennt den Dokumenttyp anhand von Keywords
|
||||
"""
|
||||
text_lower = text.lower() + " " + dateiname.lower()
|
||||
|
||||
for typ, keywords in DOKUMENTTYP_KEYWORDS.items():
|
||||
for keyword in keywords:
|
||||
if keyword in text_lower:
|
||||
return typ
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# ============ HAUPTFUNKTION ============
|
||||
def extrahiere_alle_felder(text: str, dokument_info: Dict = None,
|
||||
regel_extraktion: Dict = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Extrahiert alle verfügbaren Felder aus einem Dokument
|
||||
|
||||
Args:
|
||||
text: Der extrahierte Text aus dem PDF
|
||||
dokument_info: Zusätzliche Infos (absender, original_name, etc.)
|
||||
regel_extraktion: Spezifische Extraktionsregeln aus der Regel
|
||||
|
||||
Returns:
|
||||
Dict mit allen extrahierten Feldern
|
||||
"""
|
||||
dokument_info = dokument_info or {}
|
||||
regel_extraktion = regel_extraktion or {}
|
||||
|
||||
felder = {}
|
||||
|
||||
# Datum
|
||||
datum_muster = regel_extraktion.get("datum", {}).get("muster", [])
|
||||
datum = extrahiere_datum(text, datum_muster if isinstance(datum_muster, list) else None)
|
||||
if datum:
|
||||
felder["datum"] = datum
|
||||
|
||||
# Betrag
|
||||
betrag_muster = regel_extraktion.get("betrag", {}).get("muster", [])
|
||||
betrag = extrahiere_betrag(text, betrag_muster if isinstance(betrag_muster, list) else None)
|
||||
if betrag:
|
||||
felder["betrag"] = betrag
|
||||
|
||||
# Nummer
|
||||
nummer_muster = regel_extraktion.get("nummer", {}).get("muster", [])
|
||||
nummer = extrahiere_nummer(text, nummer_muster if isinstance(nummer_muster, list) else None)
|
||||
if nummer:
|
||||
felder["nummer"] = nummer
|
||||
|
||||
# Firma
|
||||
absender = dokument_info.get("absender", "")
|
||||
firma = extrahiere_firma(text, absender)
|
||||
if firma:
|
||||
felder["firma"] = firma
|
||||
|
||||
# Dokumenttyp
|
||||
dateiname = dokument_info.get("original_name", "")
|
||||
typ = extrahiere_dokumenttyp(text, dateiname)
|
||||
if typ:
|
||||
felder["typ"] = typ
|
||||
|
||||
# Statische Werte aus Regel übernehmen
|
||||
for feld_name, feld_config in regel_extraktion.items():
|
||||
if isinstance(feld_config, dict) and "wert" in feld_config:
|
||||
felder[feld_name] = feld_config["wert"]
|
||||
|
||||
return felder
|
||||
|
||||
|
||||
# ============ SCHEMA-BUILDER ============
|
||||
def baue_dateiname(schema: str, felder: Dict[str, Any], endung: str = ".pdf") -> str:
|
||||
"""
|
||||
Baut Dateinamen aus Schema und Feldern.
|
||||
Entfernt automatisch Platzhalter und deren Trennzeichen wenn Feld fehlt.
|
||||
|
||||
Schema-Beispiel: "{datum} - {typ} - {firma} - {nummer} - {betrag} EUR"
|
||||
Mit felder = {datum: "2026-10-01", typ: "Rechnung", firma: "Sonepar"}
|
||||
Ergebnis: "2026-10-01 - Rechnung - Sonepar.pdf"
|
||||
"""
|
||||
# Schema ohne Endung verarbeiten
|
||||
if schema.lower().endswith(".pdf"):
|
||||
schema = schema[:-4]
|
||||
|
||||
# Platzhalter ersetzen
|
||||
result = schema
|
||||
for key, value in felder.items():
|
||||
placeholder = "{" + key + "}"
|
||||
if placeholder in result and value:
|
||||
result = result.replace(placeholder, str(value))
|
||||
|
||||
# Nicht ersetzte Platzhalter und ihre Trennzeichen entfernen
|
||||
# Muster: " - {feld}" oder "{feld} - " oder "{feld}"
|
||||
result = re.sub(r'\s*-\s*\{[^}]+\}', '', result)
|
||||
result = re.sub(r'\{[^}]+\}\s*-\s*', '', result)
|
||||
result = re.sub(r'\{[^}]+\}', '', result)
|
||||
|
||||
# Aufräumen: Doppelte Trennzeichen, Leerzeichen
|
||||
result = re.sub(r'\s*-\s*-\s*', ' - ', result)
|
||||
result = re.sub(r'\s+', ' ', result)
|
||||
result = result.strip(' -')
|
||||
|
||||
# Ungültige Zeichen entfernen
|
||||
invalid_chars = '<>:"/\\|?*'
|
||||
for char in invalid_chars:
|
||||
result = result.replace(char, "_")
|
||||
|
||||
# Endung anhängen
|
||||
if not result:
|
||||
result = "Dokument"
|
||||
|
||||
return result + endung
|
||||
|
|
@ -1,392 +0,0 @@
|
|||
"""
|
||||
Mail-Fetcher Modul
|
||||
Holt Attachments aus IMAP-Postfächern
|
||||
"""
|
||||
import imaplib
|
||||
import email
|
||||
from email.header import decode_header
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import List, Dict, Optional
|
||||
import logging
|
||||
|
||||
from ..config import INBOX_DIR
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class MailFetcher:
|
||||
"""Holt Attachments aus einem IMAP-Postfach"""
|
||||
|
||||
def __init__(self, config: Dict):
|
||||
"""
|
||||
Args:
|
||||
config: Dict mit imap_server, imap_port, email, passwort, ordner,
|
||||
erlaubte_typen, max_groesse_mb
|
||||
"""
|
||||
self.config = config
|
||||
self.connection = None
|
||||
|
||||
def connect(self) -> bool:
|
||||
"""Verbindung zum IMAP-Server herstellen"""
|
||||
try:
|
||||
self.connection = imaplib.IMAP4_SSL(
|
||||
self.config["imap_server"],
|
||||
self.config.get("imap_port", 993)
|
||||
)
|
||||
self.connection.login(
|
||||
self.config["email"],
|
||||
self.config["passwort"]
|
||||
)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"IMAP Verbindungsfehler: {e}")
|
||||
return False
|
||||
|
||||
def disconnect(self):
|
||||
"""Verbindung trennen"""
|
||||
if self.connection:
|
||||
try:
|
||||
self.connection.logout()
|
||||
except:
|
||||
pass
|
||||
self.connection = None
|
||||
|
||||
def liste_ordner(self) -> List[str]:
|
||||
"""Listet alle verfügbaren IMAP-Ordner"""
|
||||
if not self.connection:
|
||||
if not self.connect():
|
||||
return []
|
||||
|
||||
try:
|
||||
status, folders = self.connection.list()
|
||||
ordner_liste = []
|
||||
if status == "OK":
|
||||
for folder in folders:
|
||||
if isinstance(folder, bytes):
|
||||
# Format: (flags) "delimiter" "name"
|
||||
parts = folder.decode().split(' "')
|
||||
if len(parts) >= 3:
|
||||
name = parts[-1].strip('"')
|
||||
ordner_liste.append(name)
|
||||
else:
|
||||
# Fallback
|
||||
ordner_liste.append(folder.decode().split()[-1].strip('"'))
|
||||
return ordner_liste
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Auflisten der Ordner: {e}")
|
||||
return []
|
||||
|
||||
def fetch_attachments(self, ziel_ordner: Optional[Path] = None,
|
||||
nur_ungelesen: bool = False,
|
||||
markiere_gelesen: bool = False,
|
||||
alle_ordner: bool = False,
|
||||
bereits_verarbeitet: set = None) -> List[Dict]:
|
||||
"""
|
||||
Holt alle Attachments die den Filtern entsprechen
|
||||
|
||||
Args:
|
||||
alle_ordner: Wenn True, werden ALLE IMAP-Ordner durchsucht
|
||||
bereits_verarbeitet: Set von Message-IDs die übersprungen werden
|
||||
|
||||
Returns:
|
||||
Liste von Dicts mit: pfad, original_name, absender, betreff, datum, groesse, message_id
|
||||
"""
|
||||
if not self.connection:
|
||||
if not self.connect():
|
||||
return []
|
||||
|
||||
ziel = ziel_ordner or INBOX_DIR
|
||||
ziel.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
ergebnisse = []
|
||||
erlaubte_typen = self.config.get("erlaubte_typen", [".pdf"])
|
||||
max_groesse = self.config.get("max_groesse_mb", 25) * 1024 * 1024
|
||||
bereits_verarbeitet = bereits_verarbeitet or set()
|
||||
|
||||
# Ordner bestimmen
|
||||
if alle_ordner:
|
||||
ordner_liste = self.liste_ordner()
|
||||
logger.info(f"Durchsuche {len(ordner_liste)} Ordner")
|
||||
else:
|
||||
ordner_liste = [self.config.get("ordner", "INBOX")]
|
||||
|
||||
for ordner in ordner_liste:
|
||||
ergebnisse.extend(self._fetch_from_folder(
|
||||
ordner, ziel, erlaubte_typen, max_groesse,
|
||||
nur_ungelesen, markiere_gelesen, bereits_verarbeitet
|
||||
))
|
||||
|
||||
return ergebnisse
|
||||
|
||||
def _fetch_from_folder(self, ordner: str, ziel: Path,
|
||||
erlaubte_typen: List[str], max_groesse: int,
|
||||
nur_ungelesen: bool, markiere_gelesen: bool,
|
||||
bereits_verarbeitet: set) -> List[Dict]:
|
||||
"""Holt Attachments aus einem einzelnen Ordner"""
|
||||
ergebnisse = []
|
||||
|
||||
try:
|
||||
# Ordner auswählen
|
||||
status, _ = self.connection.select(ordner)
|
||||
|
||||
# Suche nach Mails
|
||||
search_criteria = "(UNSEEN)" if nur_ungelesen else "ALL"
|
||||
status, messages = self.connection.search(None, search_criteria)
|
||||
|
||||
if status != "OK":
|
||||
logger.warning(f"Keine Mails gefunden in {ordner}")
|
||||
return []
|
||||
|
||||
mail_ids = messages[0].split()
|
||||
logger.info(f"Gefunden: {len(mail_ids)} Mails in {ordner}")
|
||||
|
||||
for mail_id in mail_ids:
|
||||
try:
|
||||
# Mail abrufen
|
||||
status, msg_data = self.connection.fetch(mail_id, "(RFC822)")
|
||||
if status != "OK":
|
||||
continue
|
||||
|
||||
msg = email.message_from_bytes(msg_data[0][1])
|
||||
|
||||
# Message-ID extrahieren und prüfen ob bereits verarbeitet
|
||||
message_id = msg.get("Message-ID", "")
|
||||
if message_id and message_id in bereits_verarbeitet:
|
||||
continue # Bereits verarbeitet, überspringen
|
||||
|
||||
# Metadaten extrahieren
|
||||
absender = self._decode_header(msg.get("From", ""))
|
||||
betreff = self._decode_header(msg.get("Subject", ""))
|
||||
datum = msg.get("Date", "")
|
||||
|
||||
# Attachments durchgehen
|
||||
for part in msg.walk():
|
||||
if part.get_content_maintype() == "multipart":
|
||||
continue
|
||||
|
||||
filename = part.get_filename()
|
||||
if not filename:
|
||||
continue
|
||||
|
||||
filename = self._decode_header(filename)
|
||||
datei_endung = Path(filename).suffix.lower()
|
||||
|
||||
# Filter prüfen
|
||||
if datei_endung not in erlaubte_typen:
|
||||
logger.debug(f"Überspringe {filename}: Typ {datei_endung} nicht erlaubt")
|
||||
continue
|
||||
|
||||
payload = part.get_payload(decode=True)
|
||||
if not payload:
|
||||
continue
|
||||
|
||||
if len(payload) > max_groesse:
|
||||
logger.warning(f"Überspringe {filename}: Zu groß ({len(payload)} bytes)")
|
||||
continue
|
||||
|
||||
# Speichern
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
safe_filename = self._safe_filename(filename)
|
||||
ziel_pfad = ziel / f"{timestamp}_{safe_filename}"
|
||||
|
||||
# Eindeutigen Namen sicherstellen
|
||||
counter = 1
|
||||
while ziel_pfad.exists():
|
||||
ziel_pfad = ziel / f"{timestamp}_{counter}_{safe_filename}"
|
||||
counter += 1
|
||||
|
||||
ziel_pfad.write_bytes(payload)
|
||||
|
||||
ergebnisse.append({
|
||||
"pfad": str(ziel_pfad),
|
||||
"original_name": filename,
|
||||
"absender": absender,
|
||||
"betreff": betreff,
|
||||
"datum": datum,
|
||||
"groesse": len(payload),
|
||||
"message_id": message_id,
|
||||
"ordner": ordner
|
||||
})
|
||||
|
||||
logger.info(f"Gespeichert: {ziel_pfad.name}")
|
||||
|
||||
# Als gelesen markieren
|
||||
if markiere_gelesen and ergebnisse:
|
||||
self.connection.store(mail_id, "+FLAGS", "\\Seen")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler bei Mail {mail_id}: {e}")
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Abrufen: {e}")
|
||||
|
||||
return ergebnisse
|
||||
|
||||
def _decode_header(self, value: str) -> str:
|
||||
"""Dekodiert Email-Header (kann encoded sein)"""
|
||||
if not value:
|
||||
return ""
|
||||
try:
|
||||
decoded_parts = decode_header(value)
|
||||
result = []
|
||||
for part, charset in decoded_parts:
|
||||
if isinstance(part, bytes):
|
||||
result.append(part.decode(charset or "utf-8", errors="replace"))
|
||||
else:
|
||||
result.append(part)
|
||||
return " ".join(result)
|
||||
except:
|
||||
return str(value)
|
||||
|
||||
def _safe_filename(self, filename: str) -> str:
|
||||
"""Macht Dateinamen sicher für Dateisystem"""
|
||||
# Ungültige Zeichen ersetzen
|
||||
invalid_chars = '<>:"/\\|?*'
|
||||
for char in invalid_chars:
|
||||
filename = filename.replace(char, "_")
|
||||
return filename.strip()
|
||||
|
||||
def fetch_attachments_generator(self, ziel_ordner: Optional[Path] = None,
|
||||
nur_ungelesen: bool = False,
|
||||
markiere_gelesen: bool = False,
|
||||
alle_ordner: bool = False,
|
||||
bereits_verarbeitet: set = None):
|
||||
"""
|
||||
Generator-Version für Streaming - yielded Events während des Abrufs
|
||||
|
||||
Yields:
|
||||
Dict mit type: "ordner", "mails", "datei", "skip", "fehler"
|
||||
"""
|
||||
if not self.connection:
|
||||
if not self.connect():
|
||||
yield {"type": "fehler", "nachricht": "Verbindung fehlgeschlagen"}
|
||||
return
|
||||
|
||||
ziel = ziel_ordner or INBOX_DIR
|
||||
ziel.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
erlaubte_typen = self.config.get("erlaubte_typen", [".pdf"])
|
||||
max_groesse = self.config.get("max_groesse_mb", 25) * 1024 * 1024
|
||||
bereits_verarbeitet = bereits_verarbeitet or set()
|
||||
|
||||
# Ordner bestimmen
|
||||
if alle_ordner:
|
||||
ordner_liste = self.liste_ordner()
|
||||
yield {"type": "info", "nachricht": f"{len(ordner_liste)} Ordner gefunden"}
|
||||
else:
|
||||
ordner_liste = [self.config.get("ordner", "INBOX")]
|
||||
|
||||
for ordner in ordner_liste:
|
||||
yield {"type": "ordner", "name": ordner}
|
||||
|
||||
try:
|
||||
status, _ = self.connection.select(ordner)
|
||||
search_criteria = "(UNSEEN)" if nur_ungelesen else "ALL"
|
||||
status, messages = self.connection.search(None, search_criteria)
|
||||
|
||||
if status != "OK":
|
||||
continue
|
||||
|
||||
mail_ids = messages[0].split()
|
||||
yield {"type": "mails", "ordner": ordner, "anzahl": len(mail_ids)}
|
||||
|
||||
for mail_id in mail_ids:
|
||||
try:
|
||||
status, msg_data = self.connection.fetch(mail_id, "(RFC822)")
|
||||
if status != "OK":
|
||||
continue
|
||||
|
||||
msg = email.message_from_bytes(msg_data[0][1])
|
||||
message_id = msg.get("Message-ID", "")
|
||||
|
||||
if message_id and message_id in bereits_verarbeitet:
|
||||
continue
|
||||
|
||||
absender = self._decode_header(msg.get("From", ""))
|
||||
betreff = self._decode_header(msg.get("Subject", ""))
|
||||
datum = msg.get("Date", "")
|
||||
|
||||
for part in msg.walk():
|
||||
if part.get_content_maintype() == "multipart":
|
||||
continue
|
||||
|
||||
filename = part.get_filename()
|
||||
if not filename:
|
||||
continue
|
||||
|
||||
filename = self._decode_header(filename)
|
||||
datei_endung = Path(filename).suffix.lower()
|
||||
|
||||
if datei_endung not in erlaubte_typen:
|
||||
continue
|
||||
|
||||
payload = part.get_payload(decode=True)
|
||||
if not payload:
|
||||
continue
|
||||
|
||||
if len(payload) > max_groesse:
|
||||
yield {"type": "skip", "datei": filename, "grund": "zu groß"}
|
||||
continue
|
||||
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
safe_filename = self._safe_filename(filename)
|
||||
ziel_pfad = ziel / f"{timestamp}_{safe_filename}"
|
||||
|
||||
counter = 1
|
||||
while ziel_pfad.exists():
|
||||
ziel_pfad = ziel / f"{timestamp}_{counter}_{safe_filename}"
|
||||
counter += 1
|
||||
|
||||
ziel_pfad.write_bytes(payload)
|
||||
|
||||
yield {
|
||||
"type": "datei",
|
||||
"pfad": str(ziel_pfad),
|
||||
"original_name": filename,
|
||||
"absender": absender,
|
||||
"betreff": betreff[:100] if betreff else "",
|
||||
"datum": datum,
|
||||
"groesse": len(payload),
|
||||
"message_id": message_id,
|
||||
"ordner": ordner
|
||||
}
|
||||
|
||||
if markiere_gelesen:
|
||||
self.connection.store(mail_id, "+FLAGS", "\\Seen")
|
||||
|
||||
except Exception as e:
|
||||
yield {"type": "fehler", "nachricht": f"Mail-Fehler: {str(e)[:100]}"}
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
yield {"type": "fehler", "nachricht": f"Ordner-Fehler {ordner}: {str(e)[:100]}"}
|
||||
|
||||
def test_connection(self) -> Dict:
|
||||
"""Testet die Verbindung und gibt Status zurück"""
|
||||
try:
|
||||
if self.connect():
|
||||
# Ordner auflisten
|
||||
status, folders = self.connection.list()
|
||||
ordner_liste = []
|
||||
if status == "OK":
|
||||
for folder in folders:
|
||||
if isinstance(folder, bytes):
|
||||
ordner_liste.append(folder.decode())
|
||||
self.disconnect()
|
||||
return {
|
||||
"erfolg": True,
|
||||
"nachricht": "Verbindung erfolgreich",
|
||||
"ordner": ordner_liste
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"erfolg": False,
|
||||
"nachricht": "Verbindung fehlgeschlagen"
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"erfolg": False,
|
||||
"nachricht": str(e)
|
||||
}
|
||||
|
|
@ -1,248 +0,0 @@
|
|||
"""
|
||||
PDF-Processor Modul
|
||||
Text-Extraktion, OCR und ZUGFeRD-Erkennung
|
||||
"""
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Dict, Optional, Tuple
|
||||
import logging
|
||||
import re
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Versuche Libraries zu importieren
|
||||
try:
|
||||
import pdfplumber
|
||||
PDFPLUMBER_AVAILABLE = True
|
||||
except ImportError:
|
||||
PDFPLUMBER_AVAILABLE = False
|
||||
logger.warning("pdfplumber nicht installiert")
|
||||
|
||||
try:
|
||||
from pypdf import PdfReader
|
||||
PYPDF_AVAILABLE = True
|
||||
except ImportError:
|
||||
PYPDF_AVAILABLE = False
|
||||
logger.warning("pypdf nicht installiert")
|
||||
|
||||
|
||||
class PDFProcessor:
|
||||
"""Verarbeitet PDFs: Text-Extraktion, OCR, ZUGFeRD-Erkennung"""
|
||||
|
||||
def __init__(self, ocr_language: str = "deu", ocr_dpi: int = 300):
|
||||
self.ocr_language = ocr_language
|
||||
self.ocr_dpi = ocr_dpi
|
||||
|
||||
def verarbeite(self, pdf_pfad: str) -> Dict:
|
||||
"""
|
||||
Vollständige PDF-Verarbeitung
|
||||
|
||||
Returns:
|
||||
Dict mit: text, ist_zugferd, zugferd_xml, hat_text, ocr_durchgefuehrt
|
||||
"""
|
||||
pfad = Path(pdf_pfad)
|
||||
if not pfad.exists():
|
||||
return {"fehler": f"Datei nicht gefunden: {pdf_pfad}"}
|
||||
|
||||
ergebnis = {
|
||||
"pfad": str(pfad),
|
||||
"text": "",
|
||||
"ist_zugferd": False,
|
||||
"zugferd_xml": None,
|
||||
"hat_text": False,
|
||||
"ocr_durchgefuehrt": False,
|
||||
"seiten": 0
|
||||
}
|
||||
|
||||
# 1. ZUGFeRD prüfen
|
||||
zugferd_result = self.pruefe_zugferd(pdf_pfad)
|
||||
ergebnis["ist_zugferd"] = zugferd_result["ist_zugferd"]
|
||||
ergebnis["zugferd_xml"] = zugferd_result.get("xml")
|
||||
|
||||
# 2. Text extrahieren
|
||||
text, seiten = self.extrahiere_text(pdf_pfad)
|
||||
ergebnis["text"] = text
|
||||
ergebnis["seiten"] = seiten
|
||||
ergebnis["hat_text"] = bool(text and len(text.strip()) > 50)
|
||||
|
||||
# 3. OCR falls kein Text (aber NICHT bei ZUGFeRD!)
|
||||
if not ergebnis["hat_text"] and not ergebnis["ist_zugferd"]:
|
||||
logger.info(f"Kein Text gefunden, starte OCR für {pfad.name}")
|
||||
ocr_text, ocr_erfolg = self.fuehre_ocr_aus(pdf_pfad)
|
||||
if ocr_erfolg:
|
||||
ergebnis["text"] = ocr_text
|
||||
ergebnis["hat_text"] = bool(ocr_text and len(ocr_text.strip()) > 50)
|
||||
ergebnis["ocr_durchgefuehrt"] = True
|
||||
|
||||
return ergebnis
|
||||
|
||||
def extrahiere_text(self, pdf_pfad: str) -> Tuple[str, int]:
|
||||
"""
|
||||
Extrahiert Text aus PDF
|
||||
|
||||
Returns:
|
||||
Tuple von (text, seitenanzahl)
|
||||
"""
|
||||
text_parts = []
|
||||
seiten = 0
|
||||
|
||||
# Methode 1: pdfplumber (besser für Tabellen)
|
||||
if PDFPLUMBER_AVAILABLE:
|
||||
try:
|
||||
with pdfplumber.open(pdf_pfad) as pdf:
|
||||
seiten = len(pdf.pages)
|
||||
for page in pdf.pages:
|
||||
page_text = page.extract_text()
|
||||
if page_text:
|
||||
text_parts.append(page_text)
|
||||
if text_parts:
|
||||
return "\n\n".join(text_parts), seiten
|
||||
except Exception as e:
|
||||
logger.debug(f"pdfplumber Fehler: {e}")
|
||||
|
||||
# Methode 2: pypdf (Fallback)
|
||||
if PYPDF_AVAILABLE:
|
||||
try:
|
||||
reader = PdfReader(pdf_pfad)
|
||||
seiten = len(reader.pages)
|
||||
for page in reader.pages:
|
||||
page_text = page.extract_text()
|
||||
if page_text:
|
||||
text_parts.append(page_text)
|
||||
if text_parts:
|
||||
return "\n\n".join(text_parts), seiten
|
||||
except Exception as e:
|
||||
logger.debug(f"pypdf Fehler: {e}")
|
||||
|
||||
# Methode 3: pdftotext CLI (Fallback)
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["pdftotext", "-layout", pdf_pfad, "-"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30
|
||||
)
|
||||
if result.returncode == 0 and result.stdout.strip():
|
||||
return result.stdout, seiten
|
||||
except Exception as e:
|
||||
logger.debug(f"pdftotext Fehler: {e}")
|
||||
|
||||
return "", seiten
|
||||
|
||||
def pruefe_zugferd(self, pdf_pfad: str) -> Dict:
|
||||
"""
|
||||
Prüft ob PDF eine ZUGFeRD/Factur-X Rechnung ist
|
||||
|
||||
Returns:
|
||||
Dict mit: ist_zugferd, xml (falls vorhanden)
|
||||
"""
|
||||
ergebnis = {"ist_zugferd": False, "xml": None}
|
||||
|
||||
# Methode 1: factur-x Library
|
||||
try:
|
||||
from facturx import get_facturx_xml_from_pdf
|
||||
xml_bytes = get_facturx_xml_from_pdf(pdf_pfad)
|
||||
if xml_bytes:
|
||||
ergebnis["ist_zugferd"] = True
|
||||
ergebnis["xml"] = xml_bytes.decode("utf-8") if isinstance(xml_bytes, bytes) else xml_bytes
|
||||
logger.info(f"ZUGFeRD erkannt: {Path(pdf_pfad).name}")
|
||||
return ergebnis
|
||||
except ImportError:
|
||||
logger.debug("factur-x nicht installiert")
|
||||
except Exception as e:
|
||||
logger.debug(f"factur-x Fehler: {e}")
|
||||
|
||||
# Methode 2: Manuell nach XML-Attachment suchen
|
||||
if PYPDF_AVAILABLE:
|
||||
try:
|
||||
reader = PdfReader(pdf_pfad)
|
||||
if "/Names" in reader.trailer.get("/Root", {}):
|
||||
# Embedded Files prüfen
|
||||
pass # Komplexere Logik hier
|
||||
|
||||
# Alternativ: Im Text nach ZUGFeRD-Markern suchen
|
||||
for page in reader.pages[:1]: # Nur erste Seite
|
||||
text = page.extract_text() or ""
|
||||
if any(marker in text.upper() for marker in ["ZUGFERD", "FACTUR-X", "EN 16931"]):
|
||||
ergebnis["ist_zugferd"] = True
|
||||
logger.info(f"ZUGFeRD-Marker gefunden: {Path(pdf_pfad).name}")
|
||||
break
|
||||
except Exception as e:
|
||||
logger.debug(f"ZUGFeRD-Prüfung Fehler: {e}")
|
||||
|
||||
return ergebnis
|
||||
|
||||
def fuehre_ocr_aus(self, pdf_pfad: str) -> Tuple[str, bool]:
|
||||
"""
|
||||
Führt OCR mit ocrmypdf durch
|
||||
|
||||
Returns:
|
||||
Tuple von (text, erfolg)
|
||||
"""
|
||||
pfad = Path(pdf_pfad)
|
||||
temp_pfad = pfad.with_suffix(".ocr.pdf")
|
||||
|
||||
try:
|
||||
# ocrmypdf ausführen
|
||||
result = subprocess.run(
|
||||
[
|
||||
"ocrmypdf",
|
||||
"--language", self.ocr_language,
|
||||
"--deskew", # Schräge Scans korrigieren
|
||||
"--clean", # Bild verbessern
|
||||
"--skip-text", # Seiten mit Text überspringen
|
||||
"--force-ocr", # OCR erzwingen falls nötig
|
||||
str(pfad),
|
||||
str(temp_pfad)
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120 # 2 Minuten Timeout
|
||||
)
|
||||
|
||||
if result.returncode == 0 and temp_pfad.exists():
|
||||
# Original mit OCR-Version ersetzen
|
||||
pfad.unlink()
|
||||
temp_pfad.rename(pfad)
|
||||
|
||||
# Text aus OCR-PDF extrahieren
|
||||
text, _ = self.extrahiere_text(str(pfad))
|
||||
return text, True
|
||||
else:
|
||||
logger.error(f"OCR Fehler: {result.stderr}")
|
||||
if temp_pfad.exists():
|
||||
temp_pfad.unlink()
|
||||
return "", False
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.error(f"OCR Timeout für {pfad.name}")
|
||||
if temp_pfad.exists():
|
||||
temp_pfad.unlink()
|
||||
return "", False
|
||||
except FileNotFoundError:
|
||||
logger.error("ocrmypdf nicht installiert")
|
||||
return "", False
|
||||
except Exception as e:
|
||||
logger.error(f"OCR Fehler: {e}")
|
||||
if temp_pfad.exists():
|
||||
temp_pfad.unlink()
|
||||
return "", False
|
||||
|
||||
def extrahiere_metadaten(self, pdf_pfad: str) -> Dict:
|
||||
"""Extrahiert PDF-Metadaten"""
|
||||
metadaten = {}
|
||||
|
||||
if PYPDF_AVAILABLE:
|
||||
try:
|
||||
reader = PdfReader(pdf_pfad)
|
||||
if reader.metadata:
|
||||
metadaten = {
|
||||
"titel": reader.metadata.get("/Title", ""),
|
||||
"autor": reader.metadata.get("/Author", ""),
|
||||
"ersteller": reader.metadata.get("/Creator", ""),
|
||||
"erstellt": reader.metadata.get("/CreationDate", ""),
|
||||
}
|
||||
except Exception as e:
|
||||
logger.debug(f"Metadaten-Fehler: {e}")
|
||||
|
||||
return metadaten
|
||||
|
|
@ -1,323 +0,0 @@
|
|||
"""
|
||||
Sorter Modul
|
||||
Regel-basierte Erkennung und Benennung von Dokumenten
|
||||
"""
|
||||
import re
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional, Any
|
||||
import logging
|
||||
import shutil
|
||||
|
||||
from .extraktoren import extrahiere_alle_felder, baue_dateiname
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Sorter:
|
||||
"""Sortiert und benennt Dokumente basierend auf Regeln"""
|
||||
|
||||
def __init__(self, regeln: List[Dict]):
|
||||
"""
|
||||
Args:
|
||||
regeln: Liste von Regel-Dicts, sortiert nach Priorität
|
||||
"""
|
||||
# Nach Priorität sortieren (niedrig = wichtig)
|
||||
self.regeln = sorted(regeln, key=lambda r: r.get("prioritaet", 100))
|
||||
|
||||
def finde_passende_regel(self, dokument_info: Dict) -> Optional[Dict]:
|
||||
"""
|
||||
Findet die erste passende Regel für ein Dokument
|
||||
|
||||
Args:
|
||||
dokument_info: Dict mit text, original_name, absender, etc.
|
||||
|
||||
Returns:
|
||||
Passende Regel oder None
|
||||
"""
|
||||
for regel in self.regeln:
|
||||
if not regel.get("aktiv", True):
|
||||
continue
|
||||
|
||||
muster = regel.get("muster", {})
|
||||
if self._pruefe_muster(muster, dokument_info):
|
||||
logger.info(f"Regel '{regel.get('name')}' matched für {dokument_info.get('original_name')}")
|
||||
return regel
|
||||
|
||||
return None
|
||||
|
||||
def _pruefe_muster(self, muster: Dict, dokument_info: Dict) -> bool:
|
||||
"""Prüft ob alle Muster auf das Dokument zutreffen"""
|
||||
text = dokument_info.get("text", "").lower()
|
||||
original_name = dokument_info.get("original_name", "").lower()
|
||||
absender = dokument_info.get("absender", "").lower()
|
||||
|
||||
# keywords (einfache Komma-getrennte Liste - für UI)
|
||||
if "keywords" in muster:
|
||||
keywords = muster["keywords"]
|
||||
if isinstance(keywords, str):
|
||||
keywords = [k.strip() for k in keywords.split(",")]
|
||||
# Alle Keywords müssen vorkommen
|
||||
for keyword in keywords:
|
||||
keyword = keyword.lower().strip()
|
||||
if keyword and keyword not in text and keyword not in original_name:
|
||||
return False
|
||||
|
||||
# absender_contains
|
||||
if "absender_contains" in muster:
|
||||
if muster["absender_contains"].lower() not in absender:
|
||||
return False
|
||||
|
||||
# dateiname_match
|
||||
if "dateiname_match" in muster:
|
||||
pattern = muster["dateiname_match"]
|
||||
if isinstance(pattern, str):
|
||||
if pattern.lower() not in original_name:
|
||||
return False
|
||||
elif isinstance(pattern, list):
|
||||
if not any(p.lower() in original_name for p in pattern):
|
||||
return False
|
||||
|
||||
# text_match (alle müssen enthalten sein)
|
||||
if "text_match" in muster:
|
||||
patterns = muster["text_match"]
|
||||
if isinstance(patterns, str):
|
||||
patterns = [patterns]
|
||||
for pattern in patterns:
|
||||
if pattern.lower() not in text:
|
||||
return False
|
||||
|
||||
# text_match_any (mindestens einer muss enthalten sein)
|
||||
if "text_match_any" in muster:
|
||||
patterns = muster["text_match_any"]
|
||||
if isinstance(patterns, str):
|
||||
patterns = [patterns]
|
||||
if not any(p.lower() in text for p in patterns):
|
||||
return False
|
||||
|
||||
# text_regex
|
||||
if "text_regex" in muster:
|
||||
pattern = muster["text_regex"]
|
||||
if not re.search(pattern, text, re.IGNORECASE):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def extrahiere_felder(self, regel: Dict, dokument_info: Dict) -> Dict[str, Any]:
|
||||
"""
|
||||
Extrahiert Felder aus dem Dokument - nutzt globale Extraktoren mit Fallbacks
|
||||
|
||||
Returns:
|
||||
Dict mit extrahierten Werten
|
||||
"""
|
||||
text = dokument_info.get("text", "")
|
||||
regel_extraktion = regel.get("extraktion", {})
|
||||
|
||||
# Globale Extraktoren nutzen (mit Regel-spezifischen Überschreibungen)
|
||||
felder = extrahiere_alle_felder(text, dokument_info, regel_extraktion)
|
||||
|
||||
# Regel-spezifische statische Werte überschreiben
|
||||
for feld_name, feld_config in regel_extraktion.items():
|
||||
if isinstance(feld_config, dict):
|
||||
if "wert" in feld_config:
|
||||
felder[feld_name] = feld_config["wert"]
|
||||
elif "regex" in feld_config:
|
||||
# Einzelnes Regex aus Regel
|
||||
wert = self._extrahiere_mit_regex(feld_config, text)
|
||||
if wert:
|
||||
felder[feld_name] = wert
|
||||
elif isinstance(feld_config, str):
|
||||
# Direkter statischer Wert
|
||||
felder[feld_name] = feld_config
|
||||
|
||||
return felder
|
||||
|
||||
def _extrahiere_mit_regex(self, config: Dict, text: str) -> Optional[str]:
|
||||
"""Extrahiert ein Feld mit einem einzelnen Regex"""
|
||||
try:
|
||||
match = re.search(config["regex"], text, re.IGNORECASE | re.MULTILINE)
|
||||
if match:
|
||||
wert = match.group(1) if match.groups() else match.group(0)
|
||||
|
||||
# Datum formatieren
|
||||
if "format" in config:
|
||||
try:
|
||||
datum = datetime.strptime(wert.strip(), config["format"])
|
||||
return datum.strftime("%Y-%m-%d")
|
||||
except:
|
||||
pass
|
||||
|
||||
# Betrag formatieren
|
||||
if config.get("typ") == "betrag":
|
||||
wert = self._formatiere_betrag(wert)
|
||||
|
||||
return wert.strip()
|
||||
except Exception as e:
|
||||
logger.debug(f"Regex-Extraktion fehlgeschlagen: {e}")
|
||||
|
||||
return None
|
||||
|
||||
def _formatiere_betrag(self, betrag: str) -> str:
|
||||
"""Formatiert Betrag einheitlich"""
|
||||
betrag = betrag.replace(" ", "").replace(".", "").replace(",", ".")
|
||||
|
||||
try:
|
||||
wert = float(betrag)
|
||||
if wert == int(wert):
|
||||
return str(int(wert))
|
||||
return f"{wert:.2f}".replace(".", ",")
|
||||
except:
|
||||
return betrag
|
||||
|
||||
def generiere_dateinamen(self, regel: Dict, extrahierte_felder: Dict) -> str:
|
||||
"""
|
||||
Generiert den neuen Dateinamen basierend auf Schema
|
||||
Nutzt den intelligenten Schema-Builder der fehlende Felder entfernt
|
||||
"""
|
||||
schema = regel.get("schema", "{datum} - Dokument.pdf")
|
||||
return baue_dateiname(schema, extrahierte_felder, ".pdf")
|
||||
|
||||
def verschiebe_datei(self, quell_pfad: str, ziel_ordner: str, neuer_name: str) -> str:
|
||||
"""
|
||||
Verschiebt und benennt Datei um
|
||||
|
||||
Returns:
|
||||
Neuer Pfad der Datei
|
||||
"""
|
||||
ziel_dir = Path(ziel_ordner)
|
||||
ziel_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
ziel_pfad = ziel_dir / neuer_name
|
||||
|
||||
# Eindeutigen Namen sicherstellen
|
||||
counter = 1
|
||||
original_name = ziel_pfad.stem
|
||||
suffix = ziel_pfad.suffix
|
||||
while ziel_pfad.exists():
|
||||
ziel_pfad = ziel_dir / f"{original_name} ({counter}){suffix}"
|
||||
counter += 1
|
||||
|
||||
# Verschieben
|
||||
shutil.move(quell_pfad, ziel_pfad)
|
||||
logger.info(f"Verschoben: {quell_pfad} -> {ziel_pfad}")
|
||||
|
||||
return str(ziel_pfad)
|
||||
|
||||
|
||||
# ============ STANDARD-DOKUMENTTYPEN ============
|
||||
# Diese werden für das einfache UI verwendet
|
||||
|
||||
DOKUMENTTYPEN = {
|
||||
"rechnung": {
|
||||
"name": "Rechnung",
|
||||
"keywords": ["rechnung", "invoice"],
|
||||
"schema": "{datum} - Rechnung - {firma} - {nummer} - {betrag} EUR.pdf",
|
||||
"unterordner": "rechnungen"
|
||||
},
|
||||
"angebot": {
|
||||
"name": "Angebot",
|
||||
"keywords": ["angebot", "quotation", "offerte"],
|
||||
"schema": "{datum} - Angebot - {firma} - {nummer} - {betrag} EUR.pdf",
|
||||
"unterordner": "angebote"
|
||||
},
|
||||
"gutschrift": {
|
||||
"name": "Gutschrift",
|
||||
"keywords": ["gutschrift", "credit"],
|
||||
"schema": "{datum} - Gutschrift - {firma} - {nummer} - {betrag} EUR.pdf",
|
||||
"unterordner": "gutschriften"
|
||||
},
|
||||
"lieferschein": {
|
||||
"name": "Lieferschein",
|
||||
"keywords": ["lieferschein", "delivery"],
|
||||
"schema": "{datum} - Lieferschein - {firma} - {nummer}.pdf",
|
||||
"unterordner": "lieferscheine"
|
||||
},
|
||||
"auftragsbestaetigung": {
|
||||
"name": "Auftragsbestätigung",
|
||||
"keywords": ["auftragsbestätigung", "bestellbestätigung"],
|
||||
"schema": "{datum} - Auftragsbestätigung - {firma} - {nummer}.pdf",
|
||||
"unterordner": "auftraege"
|
||||
},
|
||||
"vertrag": {
|
||||
"name": "Vertrag",
|
||||
"keywords": ["vertrag", "contract"],
|
||||
"schema": "Vertrag - {firma} - {nummer} - {datum}.pdf",
|
||||
"unterordner": "vertraege"
|
||||
},
|
||||
"versicherung": {
|
||||
"name": "Versicherung",
|
||||
"keywords": ["versicherung", "police", "beitrag"],
|
||||
"schema": "Versicherung - {firma} - {nummer} - {datum}.pdf",
|
||||
"unterordner": "versicherungen"
|
||||
},
|
||||
"zeugnis": {
|
||||
"name": "Zeugnis",
|
||||
"keywords": ["zeugnis", "zertifikat"],
|
||||
"schema": "Zeugnis - {firma} - {nummer} - {datum}.pdf",
|
||||
"unterordner": "zeugnisse"
|
||||
},
|
||||
"bescheinigung": {
|
||||
"name": "Bescheinigung",
|
||||
"keywords": ["bescheinigung", "nachweis", "bestätigung"],
|
||||
"schema": "Bescheinigung - {firma} - {nummer} - {datum}.pdf",
|
||||
"unterordner": "bescheinigungen"
|
||||
},
|
||||
"kontoauszug": {
|
||||
"name": "Kontoauszug",
|
||||
"keywords": ["kontoauszug", "account statement"],
|
||||
"schema": "{datum} - Kontoauszug - {firma} - {nummer}.pdf",
|
||||
"unterordner": "kontoauszuege"
|
||||
},
|
||||
"sonstiges": {
|
||||
"name": "Sonstiges",
|
||||
"keywords": [],
|
||||
"schema": "{datum} - {typ} - {firma}.pdf",
|
||||
"unterordner": "sonstiges"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def erstelle_einfache_regel(name: str, dokumenttyp: str, keywords: str,
|
||||
firma_wert: str = None, unterordner: str = None,
|
||||
prioritaet: int = 50) -> Dict:
|
||||
"""
|
||||
Erstellt eine einfache Regel basierend auf Dokumenttyp
|
||||
|
||||
Args:
|
||||
name: Name der Regel (z.B. "Sonepar Rechnung")
|
||||
dokumenttyp: Typ aus DOKUMENTTYPEN
|
||||
keywords: Komma-getrennte Keywords zur Erkennung
|
||||
firma_wert: Optionaler fester Firmenwert
|
||||
unterordner: Optionaler Unterordner (überschreibt Standard)
|
||||
prioritaet: Priorität (niedriger = wichtiger)
|
||||
|
||||
Returns:
|
||||
Regel-Dict für die Datenbank
|
||||
"""
|
||||
typ_config = DOKUMENTTYPEN.get(dokumenttyp, DOKUMENTTYPEN["sonstiges"])
|
||||
|
||||
regel = {
|
||||
"name": name,
|
||||
"prioritaet": prioritaet,
|
||||
"aktiv": True,
|
||||
"muster": {
|
||||
"keywords": keywords
|
||||
},
|
||||
"extraktion": {},
|
||||
"schema": typ_config["schema"],
|
||||
"unterordner": unterordner or typ_config["unterordner"]
|
||||
}
|
||||
|
||||
# Feste Firma wenn angegeben
|
||||
if firma_wert:
|
||||
regel["extraktion"]["firma"] = {"wert": firma_wert}
|
||||
|
||||
return regel
|
||||
|
||||
|
||||
def liste_dokumenttypen() -> List[Dict]:
|
||||
"""Gibt Liste aller Dokumenttypen für UI zurück"""
|
||||
return [
|
||||
{"id": key, "name": config["name"], "schema": config["schema"]}
|
||||
for key, config in DOKUMENTTYPEN.items()
|
||||
]
|
||||
|
|
@ -1 +0,0 @@
|
|||
# API Routes
|
||||
Binary file not shown.
Binary file not shown.
|
|
@ -1,851 +0,0 @@
|
|||
"""
|
||||
API Routes - Getrennte Bereiche: Mail-Abruf und Datei-Sortierung
|
||||
"""
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import StreamingResponse
|
||||
from sqlalchemy.orm import Session
|
||||
from typing import List, Optional
|
||||
from pydantic import BaseModel
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
import json
|
||||
import asyncio
|
||||
|
||||
from ..models.database import get_db, Postfach, QuellOrdner, SortierRegel, VerarbeiteteDatei, VerarbeiteteMail
|
||||
from ..modules.mail_fetcher import MailFetcher
|
||||
from ..modules.pdf_processor import PDFProcessor
|
||||
from ..modules.sorter import Sorter
|
||||
|
||||
router = APIRouter(prefix="/api", tags=["api"])
|
||||
|
||||
|
||||
# ============ Pydantic Models ============
|
||||
|
||||
class PostfachCreate(BaseModel):
|
||||
name: str
|
||||
imap_server: str
|
||||
imap_port: int = 993
|
||||
email: str
|
||||
passwort: str
|
||||
ordner: str = "INBOX"
|
||||
alle_ordner: bool = False # Alle IMAP-Ordner durchsuchen
|
||||
nur_ungelesen: bool = False # Nur ungelesene Mails (False = alle)
|
||||
ziel_ordner: str
|
||||
erlaubte_typen: List[str] = [".pdf"]
|
||||
max_groesse_mb: int = 25
|
||||
|
||||
|
||||
class PostfachResponse(BaseModel):
|
||||
id: int
|
||||
name: str
|
||||
imap_server: str
|
||||
email: str
|
||||
ordner: str
|
||||
alle_ordner: bool
|
||||
nur_ungelesen: bool
|
||||
ziel_ordner: str
|
||||
erlaubte_typen: List[str]
|
||||
max_groesse_mb: int
|
||||
letzter_abruf: Optional[datetime]
|
||||
letzte_anzahl: int
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class OrdnerCreate(BaseModel):
|
||||
name: str
|
||||
pfad: str
|
||||
ziel_ordner: str
|
||||
rekursiv: bool = True
|
||||
dateitypen: List[str] = [".pdf", ".jpg", ".jpeg", ".png", ".tiff"]
|
||||
|
||||
|
||||
class OrdnerResponse(BaseModel):
|
||||
id: int
|
||||
name: str
|
||||
pfad: str
|
||||
ziel_ordner: str
|
||||
rekursiv: bool
|
||||
dateitypen: List[str]
|
||||
aktiv: bool
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class RegelCreate(BaseModel):
|
||||
name: str
|
||||
prioritaet: int = 100
|
||||
muster: dict = {}
|
||||
extraktion: dict = {}
|
||||
schema: str = "{datum} - Dokument.pdf"
|
||||
unterordner: Optional[str] = None
|
||||
|
||||
|
||||
class RegelResponse(BaseModel):
|
||||
id: int
|
||||
name: str
|
||||
prioritaet: int
|
||||
aktiv: bool
|
||||
muster: dict
|
||||
extraktion: dict
|
||||
schema: str
|
||||
unterordner: Optional[str]
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class RegelTestRequest(BaseModel):
|
||||
regel: dict
|
||||
text: str
|
||||
|
||||
|
||||
# ============ Verzeichnis-Browser ============
|
||||
|
||||
@router.get("/browse")
|
||||
def browse_directory(path: str = "/"):
|
||||
"""Listet Verzeichnisse für File-Browser"""
|
||||
import os
|
||||
|
||||
# Sicherheit: Nur bestimmte Basispfade erlauben
|
||||
allowed_bases = ["/srv", "/home", "/mnt", "/media", "/data", "/tmp"]
|
||||
path = os.path.abspath(path)
|
||||
|
||||
# Prüfen ob Pfad erlaubt
|
||||
is_allowed = any(path.startswith(base) for base in allowed_bases) or path == "/"
|
||||
if not is_allowed:
|
||||
return {"error": "Pfad nicht erlaubt", "entries": []}
|
||||
|
||||
if not os.path.exists(path):
|
||||
return {"error": "Pfad existiert nicht", "entries": []}
|
||||
|
||||
if not os.path.isdir(path):
|
||||
return {"error": "Kein Verzeichnis", "entries": []}
|
||||
|
||||
try:
|
||||
entries = []
|
||||
for entry in sorted(os.listdir(path)):
|
||||
full_path = os.path.join(path, entry)
|
||||
if os.path.isdir(full_path):
|
||||
entries.append({
|
||||
"name": entry,
|
||||
"path": full_path,
|
||||
"type": "directory"
|
||||
})
|
||||
|
||||
return {
|
||||
"current": path,
|
||||
"parent": os.path.dirname(path) if path != "/" else None,
|
||||
"entries": entries
|
||||
}
|
||||
except PermissionError:
|
||||
return {"error": "Zugriff verweigert", "entries": []}
|
||||
|
||||
|
||||
# ============ BEREICH 1: Postfächer ============
|
||||
|
||||
@router.get("/postfaecher", response_model=List[PostfachResponse])
|
||||
def liste_postfaecher(db: Session = Depends(get_db)):
|
||||
return db.query(Postfach).all()
|
||||
|
||||
|
||||
@router.post("/postfaecher", response_model=PostfachResponse)
|
||||
def erstelle_postfach(data: PostfachCreate, db: Session = Depends(get_db)):
|
||||
postfach = Postfach(**data.dict())
|
||||
db.add(postfach)
|
||||
db.commit()
|
||||
db.refresh(postfach)
|
||||
return postfach
|
||||
|
||||
|
||||
@router.put("/postfaecher/{id}", response_model=PostfachResponse)
|
||||
def aktualisiere_postfach(id: int, data: PostfachCreate, db: Session = Depends(get_db)):
|
||||
postfach = db.query(Postfach).filter(Postfach.id == id).first()
|
||||
if not postfach:
|
||||
raise HTTPException(status_code=404, detail="Nicht gefunden")
|
||||
|
||||
update_data = data.dict()
|
||||
# Passwort nur aktualisieren wenn nicht leer
|
||||
if not update_data.get("passwort"):
|
||||
del update_data["passwort"]
|
||||
|
||||
for key, value in update_data.items():
|
||||
setattr(postfach, key, value)
|
||||
|
||||
db.commit()
|
||||
db.refresh(postfach)
|
||||
return postfach
|
||||
|
||||
|
||||
@router.delete("/postfaecher/{id}")
|
||||
def loesche_postfach(id: int, db: Session = Depends(get_db)):
|
||||
postfach = db.query(Postfach).filter(Postfach.id == id).first()
|
||||
if not postfach:
|
||||
raise HTTPException(status_code=404, detail="Nicht gefunden")
|
||||
db.delete(postfach)
|
||||
db.commit()
|
||||
return {"message": "Gelöscht"}
|
||||
|
||||
|
||||
@router.post("/postfaecher/{id}/test")
|
||||
def teste_postfach(id: int, db: Session = Depends(get_db)):
|
||||
postfach = db.query(Postfach).filter(Postfach.id == id).first()
|
||||
if not postfach:
|
||||
raise HTTPException(status_code=404, detail="Nicht gefunden")
|
||||
|
||||
fetcher = MailFetcher({
|
||||
"imap_server": postfach.imap_server,
|
||||
"imap_port": postfach.imap_port,
|
||||
"email": postfach.email,
|
||||
"passwort": postfach.passwort,
|
||||
"ordner": postfach.ordner
|
||||
})
|
||||
return fetcher.test_connection()
|
||||
|
||||
|
||||
@router.get("/postfaecher/{id}/abrufen/stream")
|
||||
def rufe_postfach_ab_stream(id: int, db: Session = Depends(get_db)):
|
||||
"""Streaming-Endpoint für Mail-Abruf mit Live-Updates"""
|
||||
postfach = db.query(Postfach).filter(Postfach.id == id).first()
|
||||
if not postfach:
|
||||
raise HTTPException(status_code=404, detail="Nicht gefunden")
|
||||
|
||||
# Daten kopieren für Generator (Session ist nach return nicht mehr verfügbar)
|
||||
pf_data = {
|
||||
"id": postfach.id,
|
||||
"name": postfach.name,
|
||||
"imap_server": postfach.imap_server,
|
||||
"imap_port": postfach.imap_port,
|
||||
"email": postfach.email,
|
||||
"passwort": postfach.passwort,
|
||||
"ordner": postfach.ordner,
|
||||
"alle_ordner": postfach.alle_ordner,
|
||||
"erlaubte_typen": postfach.erlaubte_typen,
|
||||
"max_groesse_mb": postfach.max_groesse_mb,
|
||||
"ziel_ordner": postfach.ziel_ordner
|
||||
}
|
||||
|
||||
# Bereits verarbeitete Message-IDs laden
|
||||
bereits_verarbeitet = set(
|
||||
row.message_id for row in
|
||||
db.query(VerarbeiteteMail.message_id)
|
||||
.filter(VerarbeiteteMail.postfach_id == id)
|
||||
.all()
|
||||
)
|
||||
|
||||
def event_generator():
|
||||
from ..models.database import SessionLocal
|
||||
|
||||
def send_event(data):
|
||||
return f"data: {json.dumps(data)}\n\n"
|
||||
|
||||
yield send_event({"type": "start", "postfach": pf_data["name"], "bereits_verarbeitet": len(bereits_verarbeitet)})
|
||||
|
||||
# Zielordner erstellen
|
||||
ziel = Path(pf_data["ziel_ordner"])
|
||||
ziel.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
fetcher = MailFetcher({
|
||||
"imap_server": pf_data["imap_server"],
|
||||
"imap_port": pf_data["imap_port"],
|
||||
"email": pf_data["email"],
|
||||
"passwort": pf_data["passwort"],
|
||||
"ordner": pf_data["ordner"],
|
||||
"erlaubte_typen": pf_data["erlaubte_typen"],
|
||||
"max_groesse_mb": pf_data["max_groesse_mb"]
|
||||
})
|
||||
|
||||
attachments = []
|
||||
|
||||
try:
|
||||
# Generator für streaming
|
||||
for event in fetcher.fetch_attachments_generator(
|
||||
ziel,
|
||||
nur_ungelesen=False,
|
||||
alle_ordner=pf_data["alle_ordner"],
|
||||
bereits_verarbeitet=bereits_verarbeitet
|
||||
):
|
||||
yield send_event(event)
|
||||
|
||||
if event.get("type") == "datei":
|
||||
attachments.append(event)
|
||||
|
||||
# DB-Session für Speicherung
|
||||
session = SessionLocal()
|
||||
try:
|
||||
verarbeitete_msg_ids = set()
|
||||
for att in attachments:
|
||||
msg_id = att.get("message_id")
|
||||
if msg_id and msg_id not in verarbeitete_msg_ids:
|
||||
verarbeitete_msg_ids.add(msg_id)
|
||||
session.add(VerarbeiteteMail(
|
||||
postfach_id=pf_data["id"],
|
||||
message_id=msg_id,
|
||||
ordner=att.get("ordner", ""),
|
||||
betreff=att.get("betreff", "")[:500] if att.get("betreff") else None,
|
||||
absender=att.get("absender", "")[:255] if att.get("absender") else None,
|
||||
anzahl_attachments=1
|
||||
))
|
||||
|
||||
# Postfach aktualisieren
|
||||
pf = session.query(Postfach).filter(Postfach.id == pf_data["id"]).first()
|
||||
if pf:
|
||||
pf.letzter_abruf = datetime.utcnow()
|
||||
pf.letzte_anzahl = len(attachments)
|
||||
session.commit()
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
yield send_event({"type": "fertig", "anzahl": len(attachments)})
|
||||
|
||||
except Exception as e:
|
||||
yield send_event({"type": "fehler", "nachricht": str(e)})
|
||||
finally:
|
||||
fetcher.disconnect()
|
||||
|
||||
return StreamingResponse(
|
||||
event_generator(),
|
||||
media_type="text/event-stream",
|
||||
headers={
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"X-Accel-Buffering": "no"
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@router.post("/postfaecher/{id}/abrufen")
|
||||
def rufe_postfach_ab(id: int, db: Session = Depends(get_db)):
|
||||
postfach = db.query(Postfach).filter(Postfach.id == id).first()
|
||||
if not postfach:
|
||||
raise HTTPException(status_code=404, detail="Nicht gefunden")
|
||||
|
||||
# Bereits verarbeitete Message-IDs laden
|
||||
bereits_verarbeitet = set(
|
||||
row.message_id for row in
|
||||
db.query(VerarbeiteteMail.message_id)
|
||||
.filter(VerarbeiteteMail.postfach_id == id)
|
||||
.all()
|
||||
)
|
||||
|
||||
# Zielordner erstellen
|
||||
ziel = Path(postfach.ziel_ordner)
|
||||
ziel.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
fetcher = MailFetcher({
|
||||
"imap_server": postfach.imap_server,
|
||||
"imap_port": postfach.imap_port,
|
||||
"email": postfach.email,
|
||||
"passwort": postfach.passwort,
|
||||
"ordner": postfach.ordner,
|
||||
"erlaubte_typen": postfach.erlaubte_typen,
|
||||
"max_groesse_mb": postfach.max_groesse_mb
|
||||
})
|
||||
|
||||
try:
|
||||
attachments = fetcher.fetch_attachments(
|
||||
ziel,
|
||||
nur_ungelesen=False, # Alle Mails durchsuchen
|
||||
alle_ordner=postfach.alle_ordner,
|
||||
bereits_verarbeitet=bereits_verarbeitet
|
||||
)
|
||||
|
||||
# Verarbeitete Mails in DB speichern
|
||||
verarbeitete_msg_ids = set()
|
||||
for att in attachments:
|
||||
msg_id = att.get("message_id")
|
||||
if msg_id and msg_id not in verarbeitete_msg_ids:
|
||||
verarbeitete_msg_ids.add(msg_id)
|
||||
db.add(VerarbeiteteMail(
|
||||
postfach_id=id,
|
||||
message_id=msg_id,
|
||||
ordner=att.get("ordner", ""),
|
||||
betreff=att.get("betreff", "")[:500] if att.get("betreff") else None,
|
||||
absender=att.get("absender", "")[:255] if att.get("absender") else None,
|
||||
anzahl_attachments=1
|
||||
))
|
||||
|
||||
postfach.letzter_abruf = datetime.utcnow()
|
||||
postfach.letzte_anzahl = len(attachments)
|
||||
db.commit()
|
||||
|
||||
return {
|
||||
"ergebnisse": [{
|
||||
"postfach": postfach.name,
|
||||
"anzahl": len(attachments),
|
||||
"dateien": [a["original_name"] for a in attachments],
|
||||
"bereits_verarbeitet": len(bereits_verarbeitet)
|
||||
}]
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"ergebnisse": [{
|
||||
"postfach": postfach.name,
|
||||
"fehler": str(e)
|
||||
}]
|
||||
}
|
||||
finally:
|
||||
fetcher.disconnect()
|
||||
|
||||
|
||||
@router.post("/postfaecher/abrufen-alle")
|
||||
def rufe_alle_postfaecher_ab(db: Session = Depends(get_db)):
|
||||
postfaecher = db.query(Postfach).filter(Postfach.aktiv == True).all()
|
||||
ergebnisse = []
|
||||
|
||||
for postfach in postfaecher:
|
||||
ziel = Path(postfach.ziel_ordner)
|
||||
ziel.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
fetcher = MailFetcher({
|
||||
"imap_server": postfach.imap_server,
|
||||
"imap_port": postfach.imap_port,
|
||||
"email": postfach.email,
|
||||
"passwort": postfach.passwort,
|
||||
"ordner": postfach.ordner,
|
||||
"erlaubte_typen": postfach.erlaubte_typen,
|
||||
"max_groesse_mb": postfach.max_groesse_mb
|
||||
})
|
||||
|
||||
try:
|
||||
attachments = fetcher.fetch_attachments(ziel)
|
||||
postfach.letzter_abruf = datetime.utcnow()
|
||||
postfach.letzte_anzahl = len(attachments)
|
||||
|
||||
ergebnisse.append({
|
||||
"postfach": postfach.name,
|
||||
"anzahl": len(attachments),
|
||||
"dateien": [a["original_name"] for a in attachments]
|
||||
})
|
||||
except Exception as e:
|
||||
ergebnisse.append({
|
||||
"postfach": postfach.name,
|
||||
"fehler": str(e)
|
||||
})
|
||||
finally:
|
||||
fetcher.disconnect()
|
||||
|
||||
db.commit()
|
||||
return {"ergebnisse": ergebnisse}
|
||||
|
||||
|
||||
# ============ BEREICH 2: Quell-Ordner ============
|
||||
|
||||
@router.get("/ordner", response_model=List[OrdnerResponse])
|
||||
def liste_ordner(db: Session = Depends(get_db)):
|
||||
return db.query(QuellOrdner).all()
|
||||
|
||||
|
||||
@router.post("/ordner", response_model=OrdnerResponse)
|
||||
def erstelle_ordner(data: OrdnerCreate, db: Session = Depends(get_db)):
|
||||
ordner = QuellOrdner(**data.dict())
|
||||
db.add(ordner)
|
||||
db.commit()
|
||||
db.refresh(ordner)
|
||||
return ordner
|
||||
|
||||
|
||||
@router.delete("/ordner/{id}")
|
||||
def loesche_ordner(id: int, db: Session = Depends(get_db)):
|
||||
ordner = db.query(QuellOrdner).filter(QuellOrdner.id == id).first()
|
||||
if not ordner:
|
||||
raise HTTPException(status_code=404, detail="Nicht gefunden")
|
||||
db.delete(ordner)
|
||||
db.commit()
|
||||
return {"message": "Gelöscht"}
|
||||
|
||||
|
||||
@router.get("/ordner/{id}/scannen")
|
||||
def scanne_ordner(id: int, db: Session = Depends(get_db)):
|
||||
ordner = db.query(QuellOrdner).filter(QuellOrdner.id == id).first()
|
||||
if not ordner:
|
||||
raise HTTPException(status_code=404, detail="Nicht gefunden")
|
||||
|
||||
pfad = Path(ordner.pfad)
|
||||
if not pfad.exists():
|
||||
return {"anzahl": 0, "fehler": "Ordner existiert nicht"}
|
||||
|
||||
# Dateien sammeln (rekursiv oder nicht)
|
||||
dateien = []
|
||||
pattern = "**/*" if ordner.rekursiv else "*"
|
||||
for f in pfad.glob(pattern):
|
||||
if f.is_file() and f.suffix.lower() in [t.lower() for t in ordner.dateitypen]:
|
||||
dateien.append(f)
|
||||
|
||||
return {"anzahl": len(dateien), "dateien": [str(f.relative_to(pfad)) for f in dateien[:30]]}
|
||||
|
||||
|
||||
# ============ Regeln ============
|
||||
|
||||
@router.get("/regeln", response_model=List[RegelResponse])
|
||||
def liste_regeln(db: Session = Depends(get_db)):
|
||||
return db.query(SortierRegel).order_by(SortierRegel.prioritaet).all()
|
||||
|
||||
|
||||
@router.post("/regeln", response_model=RegelResponse)
|
||||
def erstelle_regel(data: RegelCreate, db: Session = Depends(get_db)):
|
||||
regel = SortierRegel(**data.dict())
|
||||
db.add(regel)
|
||||
db.commit()
|
||||
db.refresh(regel)
|
||||
return regel
|
||||
|
||||
|
||||
@router.put("/regeln/{id}", response_model=RegelResponse)
|
||||
def aktualisiere_regel(id: int, data: RegelCreate, db: Session = Depends(get_db)):
|
||||
regel = db.query(SortierRegel).filter(SortierRegel.id == id).first()
|
||||
if not regel:
|
||||
raise HTTPException(status_code=404, detail="Nicht gefunden")
|
||||
for key, value in data.dict().items():
|
||||
setattr(regel, key, value)
|
||||
db.commit()
|
||||
db.refresh(regel)
|
||||
return regel
|
||||
|
||||
|
||||
@router.delete("/regeln/{id}")
|
||||
def loesche_regel(id: int, db: Session = Depends(get_db)):
|
||||
regel = db.query(SortierRegel).filter(SortierRegel.id == id).first()
|
||||
if not regel:
|
||||
raise HTTPException(status_code=404, detail="Nicht gefunden")
|
||||
db.delete(regel)
|
||||
db.commit()
|
||||
return {"message": "Gelöscht"}
|
||||
|
||||
|
||||
@router.post("/regeln/test")
|
||||
def teste_regel(data: RegelTestRequest):
|
||||
regel = data.regel
|
||||
regel["aktiv"] = True
|
||||
regel["prioritaet"] = 1
|
||||
|
||||
sorter = Sorter([regel])
|
||||
doc_info = {"text": data.text, "original_name": "test.pdf", "absender": ""}
|
||||
|
||||
passend = sorter.finde_passende_regel(doc_info)
|
||||
|
||||
if passend:
|
||||
extrahiert = sorter.extrahiere_felder(passend, doc_info)
|
||||
dateiname = sorter.generiere_dateinamen(passend, extrahiert)
|
||||
return {"passt": True, "extrahiert": extrahiert, "dateiname": dateiname}
|
||||
|
||||
return {"passt": False}
|
||||
|
||||
|
||||
# ============ Sortierung ============
|
||||
|
||||
def sammle_dateien(ordner: QuellOrdner) -> list:
|
||||
"""Sammelt alle Dateien aus einem Ordner (rekursiv oder nicht)"""
|
||||
pfad = Path(ordner.pfad)
|
||||
if not pfad.exists():
|
||||
return []
|
||||
|
||||
dateien = []
|
||||
pattern = "**/*" if ordner.rekursiv else "*"
|
||||
erlaubte = [t.lower() for t in (ordner.dateitypen or [".pdf"])]
|
||||
|
||||
for f in pfad.glob(pattern):
|
||||
if f.is_file() and f.suffix.lower() in erlaubte:
|
||||
dateien.append(f)
|
||||
|
||||
return dateien
|
||||
|
||||
|
||||
@router.post("/sortierung/starten")
|
||||
def starte_sortierung(db: Session = Depends(get_db)):
|
||||
ordner_liste = db.query(QuellOrdner).filter(QuellOrdner.aktiv == True).all()
|
||||
regeln = db.query(SortierRegel).filter(SortierRegel.aktiv == True).order_by(SortierRegel.prioritaet).all()
|
||||
|
||||
if not ordner_liste:
|
||||
return {"fehler": "Keine Quell-Ordner konfiguriert", "verarbeitet": []}
|
||||
if not regeln:
|
||||
return {"fehler": "Keine Regeln definiert", "verarbeitet": []}
|
||||
|
||||
# Regeln in Dict-Format
|
||||
regeln_dicts = []
|
||||
for r in regeln:
|
||||
regeln_dicts.append({
|
||||
"id": r.id,
|
||||
"name": r.name,
|
||||
"prioritaet": r.prioritaet,
|
||||
"muster": r.muster,
|
||||
"extraktion": r.extraktion,
|
||||
"schema": r.schema,
|
||||
"unterordner": r.unterordner
|
||||
})
|
||||
|
||||
sorter = Sorter(regeln_dicts)
|
||||
pdf_processor = PDFProcessor()
|
||||
|
||||
ergebnis = {
|
||||
"gesamt": 0,
|
||||
"sortiert": 0,
|
||||
"zugferd": 0,
|
||||
"fehler": 0,
|
||||
"verarbeitet": []
|
||||
}
|
||||
|
||||
for quell_ordner in ordner_liste:
|
||||
pfad = Path(quell_ordner.pfad)
|
||||
if not pfad.exists():
|
||||
continue
|
||||
|
||||
ziel_basis = Path(quell_ordner.ziel_ordner)
|
||||
dateien = sammle_dateien(quell_ordner)
|
||||
|
||||
for datei in dateien:
|
||||
ergebnis["gesamt"] += 1
|
||||
# Relativer Pfad für Anzeige
|
||||
try:
|
||||
rel_pfad = str(datei.relative_to(pfad))
|
||||
except:
|
||||
rel_pfad = datei.name
|
||||
datei_info = {"original": rel_pfad}
|
||||
|
||||
try:
|
||||
ist_pdf = datei.suffix.lower() == ".pdf"
|
||||
text = ""
|
||||
ist_zugferd = False
|
||||
ocr_gemacht = False
|
||||
|
||||
# Nur PDFs durch den PDF-Processor
|
||||
if ist_pdf:
|
||||
pdf_result = pdf_processor.verarbeite(str(datei))
|
||||
|
||||
if pdf_result.get("fehler"):
|
||||
raise Exception(pdf_result["fehler"])
|
||||
|
||||
text = pdf_result.get("text", "")
|
||||
ist_zugferd = pdf_result.get("ist_zugferd", False)
|
||||
ocr_gemacht = pdf_result.get("ocr_durchgefuehrt", False)
|
||||
|
||||
# ZUGFeRD separat behandeln
|
||||
if ist_zugferd:
|
||||
zugferd_ziel = ziel_basis / "zugferd"
|
||||
zugferd_ziel.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
neuer_pfad = zugferd_ziel / datei.name
|
||||
counter = 1
|
||||
while neuer_pfad.exists():
|
||||
neuer_pfad = zugferd_ziel / f"{datei.stem}_{counter}{datei.suffix}"
|
||||
counter += 1
|
||||
|
||||
datei.rename(neuer_pfad)
|
||||
|
||||
ergebnis["zugferd"] += 1
|
||||
datei_info["zugferd"] = True
|
||||
datei_info["neuer_name"] = neuer_pfad.name
|
||||
|
||||
db.add(VerarbeiteteDatei(
|
||||
original_pfad=str(datei),
|
||||
original_name=datei.name,
|
||||
neuer_pfad=str(neuer_pfad),
|
||||
neuer_name=neuer_pfad.name,
|
||||
ist_zugferd=True,
|
||||
status="zugferd"
|
||||
))
|
||||
ergebnis["verarbeitet"].append(datei_info)
|
||||
continue
|
||||
|
||||
# Regel finden (für PDFs mit Text, für andere nur Dateiname)
|
||||
doc_info = {
|
||||
"text": text,
|
||||
"original_name": datei.name,
|
||||
"absender": "",
|
||||
"dateityp": datei.suffix.lower()
|
||||
}
|
||||
|
||||
regel = sorter.finde_passende_regel(doc_info)
|
||||
|
||||
if not regel:
|
||||
datei_info["fehler"] = "Keine passende Regel"
|
||||
ergebnis["fehler"] += 1
|
||||
ergebnis["verarbeitet"].append(datei_info)
|
||||
continue
|
||||
|
||||
# Felder extrahieren
|
||||
extrahiert = sorter.extrahiere_felder(regel, doc_info)
|
||||
|
||||
# Dateiendung beibehalten
|
||||
schema = regel.get("schema", "{datum} - Dokument.pdf")
|
||||
# Endung aus Schema entfernen und Original-Endung anhängen
|
||||
if schema.endswith(".pdf"):
|
||||
schema = schema[:-4] + datei.suffix
|
||||
neuer_name = sorter.generiere_dateinamen({"schema": schema, **regel}, extrahiert)
|
||||
|
||||
# Zielordner
|
||||
ziel = ziel_basis
|
||||
if regel.get("unterordner"):
|
||||
ziel = ziel / regel["unterordner"]
|
||||
ziel.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Verschieben
|
||||
neuer_pfad = sorter.verschiebe_datei(str(datei), str(ziel), neuer_name)
|
||||
|
||||
ergebnis["sortiert"] += 1
|
||||
datei_info["neuer_name"] = neuer_name
|
||||
|
||||
db.add(VerarbeiteteDatei(
|
||||
original_pfad=str(datei),
|
||||
original_name=datei.name,
|
||||
neuer_pfad=neuer_pfad,
|
||||
neuer_name=neuer_name,
|
||||
ist_zugferd=False,
|
||||
ocr_durchgefuehrt=ocr_gemacht,
|
||||
status="sortiert",
|
||||
extrahierte_daten=extrahiert
|
||||
))
|
||||
|
||||
except Exception as e:
|
||||
ergebnis["fehler"] += 1
|
||||
datei_info["fehler"] = str(e)
|
||||
|
||||
ergebnis["verarbeitet"].append(datei_info)
|
||||
|
||||
db.commit()
|
||||
return ergebnis
|
||||
|
||||
|
||||
@router.get("/health")
|
||||
def health():
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
# ============ Einfache Regeln (UI-freundlich) ============
|
||||
|
||||
@router.get("/dokumenttypen")
|
||||
def liste_dokumenttypen():
|
||||
"""Gibt alle verfügbaren Dokumenttypen für das UI zurück"""
|
||||
from ..modules.sorter import DOKUMENTTYPEN
|
||||
return [
|
||||
{"id": key, "name": config["name"], "schema": config["schema"], "unterordner": config["unterordner"]}
|
||||
for key, config in DOKUMENTTYPEN.items()
|
||||
]
|
||||
|
||||
|
||||
class EinfacheRegelCreate(BaseModel):
|
||||
name: str
|
||||
dokumenttyp: str # z.B. "rechnung", "vertrag"
|
||||
keywords: str # Komma-getrennt
|
||||
firma: Optional[str] = None # Fester Firmenwert
|
||||
unterordner: Optional[str] = None
|
||||
prioritaet: int = 50
|
||||
|
||||
|
||||
@router.post("/regeln/einfach")
|
||||
def erstelle_einfache_regel_api(data: EinfacheRegelCreate, db: Session = Depends(get_db)):
|
||||
"""Erstellt eine Regel basierend auf Dokumenttyp - für einfaches UI"""
|
||||
from ..modules.sorter import DOKUMENTTYPEN
|
||||
|
||||
typ_config = DOKUMENTTYPEN.get(data.dokumenttyp, DOKUMENTTYPEN["sonstiges"])
|
||||
|
||||
# Muster als Dict (keywords werden vom Sorter geparst)
|
||||
muster = {"keywords": data.keywords}
|
||||
|
||||
# Extraktion (nur Firma wenn angegeben)
|
||||
extraktion = {}
|
||||
if data.firma:
|
||||
extraktion["firma"] = {"wert": data.firma}
|
||||
|
||||
regel = SortierRegel(
|
||||
name=data.name,
|
||||
prioritaet=data.prioritaet,
|
||||
aktiv=True,
|
||||
muster=muster,
|
||||
extraktion=extraktion,
|
||||
schema=typ_config["schema"],
|
||||
unterordner=data.unterordner or typ_config["unterordner"]
|
||||
)
|
||||
|
||||
db.add(regel)
|
||||
db.commit()
|
||||
db.refresh(regel)
|
||||
|
||||
return {
|
||||
"id": regel.id,
|
||||
"name": regel.name,
|
||||
"dokumenttyp": data.dokumenttyp,
|
||||
"keywords": data.keywords,
|
||||
"schema": regel.schema
|
||||
}
|
||||
|
||||
|
||||
class ExtraktionTestRequest(BaseModel):
|
||||
text: str
|
||||
dateiname: Optional[str] = "test.pdf"
|
||||
|
||||
|
||||
@router.post("/extraktion/test")
|
||||
def teste_extraktion(data: ExtraktionTestRequest):
|
||||
"""Testet die automatische Extraktion auf einem Text"""
|
||||
from ..modules.extraktoren import extrahiere_alle_felder, baue_dateiname
|
||||
|
||||
dokument_info = {
|
||||
"original_name": data.dateiname,
|
||||
"absender": ""
|
||||
}
|
||||
|
||||
# Felder extrahieren
|
||||
felder = extrahiere_alle_felder(data.text, dokument_info)
|
||||
|
||||
# Beispiel-Dateinamen für verschiedene Typen generieren
|
||||
beispiele = {}
|
||||
from ..modules.sorter import DOKUMENTTYPEN
|
||||
for typ_id, typ_config in DOKUMENTTYPEN.items():
|
||||
beispiele[typ_id] = baue_dateiname(typ_config["schema"], felder, ".pdf")
|
||||
|
||||
return {
|
||||
"extrahiert": felder,
|
||||
"beispiel_dateinamen": beispiele
|
||||
}
|
||||
|
||||
|
||||
@router.post("/regeln/{id}/vorschau")
|
||||
def regel_vorschau(id: int, data: ExtraktionTestRequest, db: Session = Depends(get_db)):
|
||||
"""Zeigt Vorschau wie eine Regel auf einen Text angewendet würde"""
|
||||
regel = db.query(SortierRegel).filter(SortierRegel.id == id).first()
|
||||
if not regel:
|
||||
raise HTTPException(status_code=404, detail="Regel nicht gefunden")
|
||||
|
||||
from ..modules.sorter import Sorter
|
||||
|
||||
sorter = Sorter([{
|
||||
"id": regel.id,
|
||||
"name": regel.name,
|
||||
"prioritaet": regel.prioritaet,
|
||||
"aktiv": True,
|
||||
"muster": regel.muster,
|
||||
"extraktion": regel.extraktion,
|
||||
"schema": regel.schema,
|
||||
"unterordner": regel.unterordner
|
||||
}])
|
||||
|
||||
dokument_info = {
|
||||
"text": data.text,
|
||||
"original_name": data.dateiname or "test.pdf",
|
||||
"absender": ""
|
||||
}
|
||||
|
||||
# Prüfen ob Regel matched
|
||||
passende_regel = sorter.finde_passende_regel(dokument_info)
|
||||
|
||||
if not passende_regel:
|
||||
return {
|
||||
"matched": False,
|
||||
"grund": "Keywords nicht gefunden"
|
||||
}
|
||||
|
||||
# Felder extrahieren
|
||||
felder = sorter.extrahiere_felder(passende_regel, dokument_info)
|
||||
|
||||
# Dateiname generieren
|
||||
dateiname = sorter.generiere_dateinamen(passende_regel, felder)
|
||||
|
||||
return {
|
||||
"matched": True,
|
||||
"extrahiert": felder,
|
||||
"dateiname": dateiname,
|
||||
"unterordner": passende_regel.get("unterordner")
|
||||
}
|
||||
|
|
@ -1 +0,0 @@
|
|||
# Services
|
||||
Binary file not shown.
Binary file not shown.
|
|
@ -1,360 +0,0 @@
|
|||
"""
|
||||
Pipeline Service
|
||||
Orchestriert die gesamte Dokumentenverarbeitung
|
||||
"""
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional
|
||||
import logging
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ..models import Pipeline, MailConfig, SortierRegel, Dokument, VerarbeitungsLog
|
||||
from ..modules.mail_fetcher import MailFetcher
|
||||
from ..modules.pdf_processor import PDFProcessor
|
||||
from ..modules.sorter import Sorter
|
||||
from ..config import INBOX_DIR, PROCESSED_DIR, ZUGFERD_DIR
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PipelineService:
|
||||
"""Führt die komplette Pipeline-Verarbeitung durch"""
|
||||
|
||||
def __init__(self, db: Session):
|
||||
self.db = db
|
||||
self.pdf_processor = PDFProcessor()
|
||||
|
||||
def verarbeite_pipeline(self, pipeline_id: int) -> Dict:
|
||||
"""
|
||||
Führt alle Schritte einer Pipeline aus
|
||||
|
||||
Returns:
|
||||
Dict mit Statistiken und Ergebnissen
|
||||
"""
|
||||
pipeline = self.db.query(Pipeline).filter(Pipeline.id == pipeline_id).first()
|
||||
if not pipeline:
|
||||
return {"fehler": f"Pipeline {pipeline_id} nicht gefunden"}
|
||||
|
||||
if not pipeline.aktiv:
|
||||
return {"fehler": f"Pipeline {pipeline.name} ist deaktiviert"}
|
||||
|
||||
ergebnis = {
|
||||
"pipeline": pipeline.name,
|
||||
"gestartet": datetime.now().isoformat(),
|
||||
"mails_abgerufen": 0,
|
||||
"attachments": 0,
|
||||
"verarbeitet": 0,
|
||||
"zugferd": 0,
|
||||
"ocr": 0,
|
||||
"sortiert": 0,
|
||||
"fehler": []
|
||||
}
|
||||
|
||||
# 1. Mails abrufen
|
||||
inbox_pfad = INBOX_DIR / f"pipeline_{pipeline_id}"
|
||||
inbox_pfad.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
for mail_config in pipeline.mail_configs:
|
||||
if not mail_config.aktiv:
|
||||
continue
|
||||
|
||||
try:
|
||||
attachments = self._rufe_mails_ab(mail_config, inbox_pfad)
|
||||
ergebnis["attachments"] += len(attachments)
|
||||
|
||||
# Dokumente in DB anlegen
|
||||
for att in attachments:
|
||||
dokument = Dokument(
|
||||
pipeline_id=pipeline_id,
|
||||
original_name=att["original_name"],
|
||||
original_pfad=att["pfad"],
|
||||
status="neu",
|
||||
extrahierte_daten={
|
||||
"absender": att.get("absender"),
|
||||
"betreff": att.get("betreff"),
|
||||
"mail_datum": att.get("datum")
|
||||
}
|
||||
)
|
||||
self.db.add(dokument)
|
||||
self._log(dokument.id, "mail_abruf", "erfolg", att)
|
||||
|
||||
# Letzten Abruf aktualisieren
|
||||
mail_config.letzter_abruf = datetime.utcnow()
|
||||
|
||||
except Exception as e:
|
||||
ergebnis["fehler"].append(f"Mail-Abruf {mail_config.name}: {e}")
|
||||
logger.error(f"Mail-Abruf Fehler: {e}")
|
||||
|
||||
self.db.commit()
|
||||
|
||||
# 2. PDFs verarbeiten
|
||||
neue_dokumente = self.db.query(Dokument).filter(
|
||||
Dokument.pipeline_id == pipeline_id,
|
||||
Dokument.status == "neu"
|
||||
).all()
|
||||
|
||||
for dokument in neue_dokumente:
|
||||
try:
|
||||
self._verarbeite_dokument(dokument, pipeline)
|
||||
ergebnis["verarbeitet"] += 1
|
||||
|
||||
if dokument.ist_zugferd:
|
||||
ergebnis["zugferd"] += 1
|
||||
if dokument.ocr_durchgefuehrt:
|
||||
ergebnis["ocr"] += 1
|
||||
if dokument.status == "sortiert":
|
||||
ergebnis["sortiert"] += 1
|
||||
|
||||
except Exception as e:
|
||||
dokument.status = "fehler"
|
||||
dokument.fehler_meldung = str(e)
|
||||
ergebnis["fehler"].append(f"Verarbeitung {dokument.original_name}: {e}")
|
||||
logger.error(f"Verarbeitungs-Fehler: {e}")
|
||||
|
||||
self.db.commit()
|
||||
|
||||
ergebnis["beendet"] = datetime.now().isoformat()
|
||||
return ergebnis
|
||||
|
||||
def _rufe_mails_ab(self, mail_config: MailConfig, ziel_ordner: Path) -> List[Dict]:
|
||||
"""Ruft Mails von einem Postfach ab"""
|
||||
config = {
|
||||
"imap_server": mail_config.imap_server,
|
||||
"imap_port": mail_config.imap_port,
|
||||
"email": mail_config.email,
|
||||
"passwort": mail_config.passwort,
|
||||
"ordner": mail_config.ordner,
|
||||
"erlaubte_typen": mail_config.erlaubte_typen,
|
||||
"max_groesse_mb": mail_config.max_groesse_mb
|
||||
}
|
||||
|
||||
fetcher = MailFetcher(config)
|
||||
try:
|
||||
attachments = fetcher.fetch_attachments(ziel_ordner)
|
||||
return attachments
|
||||
finally:
|
||||
fetcher.disconnect()
|
||||
|
||||
def _verarbeite_dokument(self, dokument: Dokument, pipeline: Pipeline):
|
||||
"""Verarbeitet ein einzelnes Dokument"""
|
||||
pfad = Path(dokument.original_pfad)
|
||||
|
||||
if not pfad.exists():
|
||||
raise FileNotFoundError(f"Datei nicht gefunden: {pfad}")
|
||||
|
||||
# Nur PDFs verarbeiten
|
||||
if pfad.suffix.lower() != ".pdf":
|
||||
dokument.status = "uebersprungen"
|
||||
self._log(dokument.id, "verarbeitung", "uebersprungen", {"grund": "Kein PDF"})
|
||||
return
|
||||
|
||||
# PDF verarbeiten
|
||||
pdf_ergebnis = self.pdf_processor.verarbeite(str(pfad))
|
||||
|
||||
if "fehler" in pdf_ergebnis:
|
||||
raise Exception(pdf_ergebnis["fehler"])
|
||||
|
||||
dokument.ist_zugferd = pdf_ergebnis["ist_zugferd"]
|
||||
dokument.hat_text = pdf_ergebnis["hat_text"]
|
||||
dokument.ocr_durchgefuehrt = pdf_ergebnis["ocr_durchgefuehrt"]
|
||||
|
||||
# ZUGFeRD separat behandeln - NICHT umbenennen!
|
||||
if dokument.ist_zugferd:
|
||||
self._behandle_zugferd(dokument, pdf_ergebnis)
|
||||
return
|
||||
|
||||
# Sortieren
|
||||
self._sortiere_dokument(dokument, pdf_ergebnis, pipeline)
|
||||
|
||||
def _behandle_zugferd(self, dokument: Dokument, pdf_ergebnis: Dict):
|
||||
"""Behandelt ZUGFeRD-Rechnungen (werden nicht verändert)"""
|
||||
# In separaten Ordner verschieben
|
||||
ziel_dir = ZUGFERD_DIR / datetime.now().strftime("%Y-%m")
|
||||
ziel_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
quell_pfad = Path(dokument.original_pfad)
|
||||
ziel_pfad = ziel_dir / quell_pfad.name
|
||||
|
||||
# Eindeutigen Namen sicherstellen
|
||||
counter = 1
|
||||
while ziel_pfad.exists():
|
||||
ziel_pfad = ziel_dir / f"{quell_pfad.stem}_{counter}{quell_pfad.suffix}"
|
||||
counter += 1
|
||||
|
||||
# Verschieben (nicht umbenennen!)
|
||||
import shutil
|
||||
shutil.move(str(quell_pfad), str(ziel_pfad))
|
||||
|
||||
dokument.neuer_pfad = str(ziel_pfad)
|
||||
dokument.neuer_name = ziel_pfad.name
|
||||
dokument.status = "zugferd"
|
||||
dokument.extrahierte_daten = {
|
||||
**(dokument.extrahierte_daten or {}),
|
||||
"zugferd_xml": pdf_ergebnis.get("zugferd_xml")
|
||||
}
|
||||
|
||||
self._log(dokument.id, "zugferd", "erfolg", {
|
||||
"ziel": str(ziel_pfad),
|
||||
"hinweis": "ZUGFeRD wird nicht umbenannt"
|
||||
})
|
||||
|
||||
def _sortiere_dokument(self, dokument: Dokument, pdf_ergebnis: Dict, pipeline: Pipeline):
|
||||
"""Sortiert Dokument nach Regeln"""
|
||||
# Regeln laden
|
||||
regeln = self.db.query(SortierRegel).filter(
|
||||
SortierRegel.pipeline_id == pipeline.id,
|
||||
SortierRegel.aktiv == True
|
||||
).order_by(SortierRegel.prioritaet).all()
|
||||
|
||||
# In Sorter-Format konvertieren
|
||||
regeln_dicts = []
|
||||
for r in regeln:
|
||||
regeln_dicts.append({
|
||||
"id": r.id,
|
||||
"name": r.name,
|
||||
"prioritaet": r.prioritaet,
|
||||
"aktiv": r.aktiv,
|
||||
"muster": r.muster,
|
||||
"extraktion": r.extraktion,
|
||||
"schema": r.schema,
|
||||
"ziel_ordner": r.ziel_ordner
|
||||
})
|
||||
|
||||
sorter = Sorter(regeln_dicts)
|
||||
|
||||
# Dokument-Info zusammenstellen
|
||||
dokument_info = {
|
||||
"text": pdf_ergebnis.get("text", ""),
|
||||
"original_name": dokument.original_name,
|
||||
"absender": (dokument.extrahierte_daten or {}).get("absender", ""),
|
||||
"betreff": (dokument.extrahierte_daten or {}).get("betreff", "")
|
||||
}
|
||||
|
||||
# Passende Regel finden
|
||||
regel = sorter.finde_passende_regel(dokument_info)
|
||||
|
||||
if not regel:
|
||||
dokument.status = "keine_regel"
|
||||
self._log(dokument.id, "sortierung", "keine_regel", {})
|
||||
return
|
||||
|
||||
# Felder extrahieren
|
||||
extrahiert = sorter.extrahiere_felder(regel, dokument_info)
|
||||
|
||||
# Dateinamen generieren
|
||||
neuer_name = sorter.generiere_dateinamen(regel, extrahiert)
|
||||
|
||||
# Zielordner bestimmen
|
||||
ziel_ordner = regel.get("ziel_ordner") or str(PROCESSED_DIR / pipeline.name)
|
||||
|
||||
# Verschieben
|
||||
neuer_pfad = sorter.verschiebe_datei(
|
||||
dokument.original_pfad,
|
||||
ziel_ordner,
|
||||
neuer_name
|
||||
)
|
||||
|
||||
# Dokument aktualisieren
|
||||
dokument.neuer_name = neuer_name
|
||||
dokument.neuer_pfad = neuer_pfad
|
||||
dokument.extrahierte_daten = {
|
||||
**(dokument.extrahierte_daten or {}),
|
||||
"text_auszug": pdf_ergebnis.get("text", "")[:500],
|
||||
**extrahiert
|
||||
}
|
||||
dokument.regel_id = regel.get("id")
|
||||
dokument.status = "sortiert"
|
||||
dokument.verarbeitet = datetime.utcnow()
|
||||
|
||||
self._log(dokument.id, "sortierung", "erfolg", {
|
||||
"regel": regel.get("name"),
|
||||
"neuer_name": neuer_name,
|
||||
"ziel": neuer_pfad
|
||||
})
|
||||
|
||||
def _log(self, dokument_id: int, schritt: str, status: str, details: Dict):
|
||||
"""Erstellt Log-Eintrag"""
|
||||
log = VerarbeitungsLog(
|
||||
dokument_id=dokument_id,
|
||||
schritt=schritt,
|
||||
status=status,
|
||||
details=details
|
||||
)
|
||||
self.db.add(log)
|
||||
|
||||
|
||||
class PipelineManager:
|
||||
"""Verwaltet Pipelines (CRUD-Operationen)"""
|
||||
|
||||
def __init__(self, db: Session):
|
||||
self.db = db
|
||||
|
||||
def erstelle_pipeline(self, name: str, beschreibung: str = "") -> Pipeline:
|
||||
"""Erstellt neue Pipeline"""
|
||||
pipeline = Pipeline(name=name, beschreibung=beschreibung)
|
||||
self.db.add(pipeline)
|
||||
self.db.commit()
|
||||
self.db.refresh(pipeline)
|
||||
return pipeline
|
||||
|
||||
def hole_alle_pipelines(self) -> List[Pipeline]:
|
||||
"""Gibt alle Pipelines zurück"""
|
||||
return self.db.query(Pipeline).all()
|
||||
|
||||
def hole_pipeline(self, pipeline_id: int) -> Optional[Pipeline]:
|
||||
"""Gibt eine Pipeline zurück"""
|
||||
return self.db.query(Pipeline).filter(Pipeline.id == pipeline_id).first()
|
||||
|
||||
def fuege_mail_config_hinzu(self, pipeline_id: int, config: Dict) -> MailConfig:
|
||||
"""Fügt Mail-Konfiguration zu Pipeline hinzu"""
|
||||
mail_config = MailConfig(
|
||||
pipeline_id=pipeline_id,
|
||||
name=config.get("name", "Unbenannt"),
|
||||
imap_server=config["imap_server"],
|
||||
imap_port=config.get("imap_port", 993),
|
||||
email=config["email"],
|
||||
passwort=config["passwort"],
|
||||
ordner=config.get("ordner", "INBOX"),
|
||||
erlaubte_typen=config.get("erlaubte_typen", [".pdf"]),
|
||||
max_groesse_mb=config.get("max_groesse_mb", 25)
|
||||
)
|
||||
self.db.add(mail_config)
|
||||
self.db.commit()
|
||||
self.db.refresh(mail_config)
|
||||
return mail_config
|
||||
|
||||
def fuege_regel_hinzu(self, pipeline_id: int, regel: Dict) -> SortierRegel:
|
||||
"""Fügt Sortier-Regel zu Pipeline hinzu"""
|
||||
sortier_regel = SortierRegel(
|
||||
pipeline_id=pipeline_id,
|
||||
name=regel["name"],
|
||||
prioritaet=regel.get("prioritaet", 100),
|
||||
muster=regel.get("muster", {}),
|
||||
extraktion=regel.get("extraktion", {}),
|
||||
schema=regel.get("schema", "{datum} - Dokument.pdf"),
|
||||
ziel_ordner=regel.get("ziel_ordner")
|
||||
)
|
||||
self.db.add(sortier_regel)
|
||||
self.db.commit()
|
||||
self.db.refresh(sortier_regel)
|
||||
return sortier_regel
|
||||
|
||||
def teste_regel(self, regel: Dict, text: str) -> Dict:
|
||||
"""Testet eine Regel gegen einen Text"""
|
||||
sorter = Sorter([regel])
|
||||
dokument_info = {"text": text, "original_name": "test.pdf", "absender": ""}
|
||||
|
||||
passend = sorter.finde_passende_regel(dokument_info) is not None
|
||||
|
||||
extrahiert = {}
|
||||
dateiname = ""
|
||||
if passend:
|
||||
extrahiert = sorter.extrahiere_felder(regel, dokument_info)
|
||||
dateiname = sorter.generiere_dateinamen(regel, extrahiert)
|
||||
|
||||
return {
|
||||
"regel_passt": passend,
|
||||
"extrahierte_felder": extrahiert,
|
||||
"vorgeschlagener_name": dateiname
|
||||
}
|
||||
|
|
@ -1 +0,0 @@
|
|||
# Utilities
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
# Web Framework
|
||||
fastapi==0.109.2
|
||||
uvicorn[standard]==0.27.1
|
||||
python-multipart==0.0.9
|
||||
jinja2==3.1.3
|
||||
|
||||
# Database
|
||||
sqlalchemy==2.0.25
|
||||
aiosqlite==0.19.0
|
||||
|
||||
# PDF Processing
|
||||
pypdf==4.0.1
|
||||
pdfplumber==0.10.4
|
||||
|
||||
# ZUGFeRD
|
||||
factur-x==3.0
|
||||
|
||||
# Utilities
|
||||
pydantic==2.6.1
|
||||
python-dotenv==1.0.1
|
||||
Binary file not shown.
|
|
@ -1,36 +0,0 @@
|
|||
version: '3.8'
|
||||
|
||||
services:
|
||||
dateiverwaltung:
|
||||
build: .
|
||||
container_name: dateiverwaltung
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "8000:8000"
|
||||
volumes:
|
||||
# Persistente Daten
|
||||
- ./data:/app/data
|
||||
# Regeln können außerhalb bearbeitet werden
|
||||
- ./regeln:/app/regeln
|
||||
# Archiv auf Host mounten (optional, für direkten Zugriff)
|
||||
# - /mnt/user/archiv:/archiv
|
||||
environment:
|
||||
- TZ=Europe/Berlin
|
||||
- DATABASE_URL=sqlite:////app/data/dateiverwaltung.db
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
|
||||
# Optional: Automatische Ausführung via Cron
|
||||
# scheduler:
|
||||
# build: .
|
||||
# container_name: dateiverwaltung-scheduler
|
||||
# command: >
|
||||
# sh -c "while true; do
|
||||
# sleep 3600;
|
||||
# curl -X POST http://dateiverwaltung:8000/api/pipelines/1/run;
|
||||
# done"
|
||||
# depends_on:
|
||||
# - dateiverwaltung
|
||||
|
|
@ -1,543 +0,0 @@
|
|||
/* ============ Variables ============ */
|
||||
:root {
|
||||
--primary: #3b82f6;
|
||||
--primary-dark: #2563eb;
|
||||
--success: #22c55e;
|
||||
--danger: #ef4444;
|
||||
--warning: #f59e0b;
|
||||
--bg: #0f172a;
|
||||
--bg-secondary: #1e293b;
|
||||
--bg-tertiary: #334155;
|
||||
--text: #f1f5f9;
|
||||
--text-secondary: #94a3b8;
|
||||
--border: #475569;
|
||||
--radius: 8px;
|
||||
--shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.3);
|
||||
}
|
||||
|
||||
/* ============ Reset & Base ============ */
|
||||
* {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
body {
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
||||
background: var(--bg);
|
||||
color: var(--text);
|
||||
line-height: 1.6;
|
||||
}
|
||||
|
||||
/* ============ Layout ============ */
|
||||
#app {
|
||||
min-height: 100vh;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
.header {
|
||||
background: var(--bg-secondary);
|
||||
padding: 1rem 1.5rem;
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
border-bottom: 1px solid var(--border);
|
||||
}
|
||||
|
||||
.header h1 {
|
||||
font-size: 1.25rem;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.main-container {
|
||||
display: grid;
|
||||
grid-template-columns: 1fr 1fr;
|
||||
gap: 1px;
|
||||
flex: 1;
|
||||
background: var(--border);
|
||||
}
|
||||
|
||||
@media (max-width: 1200px) {
|
||||
.main-container {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
}
|
||||
|
||||
/* ============ Bereiche ============ */
|
||||
.bereich {
|
||||
background: var(--bg);
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
.bereich-header {
|
||||
padding: 1.5rem;
|
||||
border-bottom: 1px solid var(--border);
|
||||
}
|
||||
|
||||
.bereich-header h2 {
|
||||
font-size: 1.25rem;
|
||||
margin-bottom: 0.25rem;
|
||||
}
|
||||
|
||||
.bereich-desc {
|
||||
color: var(--text-secondary);
|
||||
font-size: 0.875rem;
|
||||
}
|
||||
|
||||
.bereich-content {
|
||||
padding: 1rem;
|
||||
flex: 1;
|
||||
overflow-y: auto;
|
||||
}
|
||||
|
||||
/* ============ Buttons ============ */
|
||||
.btn {
|
||||
padding: 0.5rem 1rem;
|
||||
border: none;
|
||||
border-radius: var(--radius);
|
||||
font-size: 0.875rem;
|
||||
cursor: pointer;
|
||||
transition: all 0.2s;
|
||||
background: var(--bg-tertiary);
|
||||
color: var(--text);
|
||||
}
|
||||
|
||||
.btn:hover {
|
||||
filter: brightness(1.1);
|
||||
}
|
||||
|
||||
.btn:disabled {
|
||||
opacity: 0.5;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
|
||||
.btn-primary {
|
||||
background: var(--primary);
|
||||
color: white;
|
||||
}
|
||||
|
||||
.btn-success {
|
||||
background: var(--success);
|
||||
color: white;
|
||||
}
|
||||
|
||||
.btn-danger {
|
||||
background: var(--danger);
|
||||
color: white;
|
||||
}
|
||||
|
||||
.btn-sm {
|
||||
padding: 0.25rem 0.5rem;
|
||||
font-size: 0.75rem;
|
||||
}
|
||||
|
||||
.btn-large {
|
||||
padding: 0.75rem 1.5rem;
|
||||
font-size: 1rem;
|
||||
}
|
||||
|
||||
/* ============ Cards ============ */
|
||||
.card {
|
||||
background: var(--bg-secondary);
|
||||
border-radius: var(--radius);
|
||||
margin-bottom: 1rem;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.card-header {
|
||||
padding: 0.75rem 1rem;
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
border-bottom: 1px solid var(--border);
|
||||
background: var(--bg-tertiary);
|
||||
}
|
||||
|
||||
.card-header h3 {
|
||||
font-size: 0.875rem;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.card-body {
|
||||
padding: 1rem;
|
||||
}
|
||||
|
||||
/* ============ Action Bar ============ */
|
||||
.action-bar {
|
||||
padding: 1rem;
|
||||
text-align: center;
|
||||
background: var(--bg-secondary);
|
||||
border-radius: var(--radius);
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
/* ============ Config Items ============ */
|
||||
.config-item {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
padding: 0.75rem;
|
||||
background: var(--bg-tertiary);
|
||||
border-radius: var(--radius);
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.config-item:last-child {
|
||||
margin-bottom: 0;
|
||||
}
|
||||
|
||||
.config-item-info h4 {
|
||||
font-size: 0.875rem;
|
||||
margin-bottom: 0.125rem;
|
||||
}
|
||||
|
||||
.config-item-info small {
|
||||
color: var(--text-secondary);
|
||||
font-size: 0.75rem;
|
||||
}
|
||||
|
||||
.config-item-actions {
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
/* ============ Forms ============ */
|
||||
.form-group {
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.form-group label {
|
||||
display: block;
|
||||
margin-bottom: 0.5rem;
|
||||
font-size: 0.875rem;
|
||||
color: var(--text-secondary);
|
||||
}
|
||||
|
||||
.form-group input,
|
||||
.form-group textarea,
|
||||
.form-group select {
|
||||
width: 100%;
|
||||
padding: 0.75rem;
|
||||
border: 1px solid var(--border);
|
||||
border-radius: var(--radius);
|
||||
background: var(--bg-tertiary);
|
||||
color: var(--text);
|
||||
font-size: 0.875rem;
|
||||
}
|
||||
|
||||
.form-group input:focus,
|
||||
.form-group textarea:focus {
|
||||
outline: none;
|
||||
border-color: var(--primary);
|
||||
}
|
||||
|
||||
.form-group small {
|
||||
display: block;
|
||||
margin-top: 0.25rem;
|
||||
color: var(--text-secondary);
|
||||
font-size: 0.75rem;
|
||||
}
|
||||
|
||||
.form-row {
|
||||
display: grid;
|
||||
grid-template-columns: 1fr 1fr;
|
||||
gap: 1rem;
|
||||
}
|
||||
|
||||
.code-input {
|
||||
font-family: 'Consolas', 'Monaco', monospace;
|
||||
font-size: 0.8rem;
|
||||
}
|
||||
|
||||
/* ============ Log Output ============ */
|
||||
.log-output {
|
||||
font-family: 'Consolas', 'Monaco', monospace;
|
||||
font-size: 0.8rem;
|
||||
max-height: 350px;
|
||||
min-height: 100px;
|
||||
overflow-y: auto;
|
||||
}
|
||||
|
||||
.log-entry {
|
||||
padding: 0.5rem;
|
||||
border-radius: 4px;
|
||||
margin-bottom: 0.25rem;
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.log-entry.success {
|
||||
background: rgba(34, 197, 94, 0.2);
|
||||
border-left: 3px solid var(--success);
|
||||
}
|
||||
|
||||
.log-entry.error {
|
||||
background: rgba(239, 68, 68, 0.2);
|
||||
border-left: 3px solid var(--danger);
|
||||
}
|
||||
|
||||
.log-entry.info {
|
||||
background: rgba(59, 130, 246, 0.2);
|
||||
border-left: 3px solid var(--primary);
|
||||
}
|
||||
|
||||
.empty-state {
|
||||
color: var(--text-secondary);
|
||||
text-align: center;
|
||||
padding: 1rem;
|
||||
font-size: 0.875rem;
|
||||
}
|
||||
|
||||
/* ============ Modals ============ */
|
||||
.modal {
|
||||
position: fixed;
|
||||
top: 0;
|
||||
left: 0;
|
||||
right: 0;
|
||||
bottom: 0;
|
||||
background: rgba(0, 0, 0, 0.7);
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
z-index: 1000;
|
||||
}
|
||||
|
||||
.modal-content {
|
||||
background: var(--bg-secondary);
|
||||
border-radius: var(--radius);
|
||||
width: 90%;
|
||||
max-width: 500px;
|
||||
max-height: 90vh;
|
||||
overflow-y: auto;
|
||||
}
|
||||
|
||||
.modal-large {
|
||||
max-width: 700px;
|
||||
}
|
||||
|
||||
.modal-header {
|
||||
padding: 1rem;
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
border-bottom: 1px solid var(--border);
|
||||
}
|
||||
|
||||
.modal-header h3 {
|
||||
font-size: 1.125rem;
|
||||
}
|
||||
|
||||
.modal-close {
|
||||
background: none;
|
||||
border: none;
|
||||
color: var(--text-secondary);
|
||||
font-size: 1.5rem;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.modal-body {
|
||||
padding: 1rem;
|
||||
}
|
||||
|
||||
.modal-footer {
|
||||
padding: 1rem;
|
||||
display: flex;
|
||||
justify-content: flex-end;
|
||||
gap: 0.5rem;
|
||||
border-top: 1px solid var(--border);
|
||||
}
|
||||
|
||||
/* ============ Test Result ============ */
|
||||
.test-result {
|
||||
margin-top: 0.5rem;
|
||||
padding: 0.75rem;
|
||||
border-radius: var(--radius);
|
||||
background: var(--bg-tertiary);
|
||||
font-family: monospace;
|
||||
font-size: 0.8rem;
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
.test-result.success {
|
||||
border-left: 3px solid var(--success);
|
||||
}
|
||||
|
||||
.test-result.error {
|
||||
border-left: 3px solid var(--danger);
|
||||
}
|
||||
|
||||
/* ============ Status Badges ============ */
|
||||
.badge {
|
||||
display: inline-block;
|
||||
padding: 0.125rem 0.5rem;
|
||||
border-radius: 4px;
|
||||
font-size: 0.7rem;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.badge-success { background: var(--success); }
|
||||
.badge-warning { background: var(--warning); color: #000; }
|
||||
.badge-danger { background: var(--danger); }
|
||||
.badge-info { background: var(--primary); }
|
||||
|
||||
/* ============ Loading Overlay ============ */
|
||||
.loading-overlay {
|
||||
position: fixed;
|
||||
top: 0;
|
||||
left: 0;
|
||||
right: 0;
|
||||
bottom: 0;
|
||||
background: rgba(0, 0, 0, 0.7);
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
z-index: 2000;
|
||||
}
|
||||
|
||||
.spinner {
|
||||
width: 50px;
|
||||
height: 50px;
|
||||
border: 4px solid var(--border);
|
||||
border-top-color: var(--primary);
|
||||
border-radius: 50%;
|
||||
animation: spin 1s linear infinite;
|
||||
}
|
||||
|
||||
@keyframes spin {
|
||||
to { transform: rotate(360deg); }
|
||||
}
|
||||
|
||||
.loading-text {
|
||||
margin-top: 1rem;
|
||||
color: var(--text);
|
||||
font-size: 0.875rem;
|
||||
}
|
||||
|
||||
.progress-bar {
|
||||
width: 200px;
|
||||
height: 6px;
|
||||
background: var(--bg-tertiary);
|
||||
border-radius: 3px;
|
||||
margin-top: 1rem;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.progress-bar-fill {
|
||||
height: 100%;
|
||||
background: var(--primary);
|
||||
transition: width 0.3s ease;
|
||||
}
|
||||
|
||||
/* ============ File Browser ============ */
|
||||
.file-browser {
|
||||
max-height: 300px;
|
||||
overflow-y: auto;
|
||||
border: 1px solid var(--border);
|
||||
border-radius: var(--radius);
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.file-browser-path {
|
||||
padding: 0.75rem;
|
||||
background: var(--bg-tertiary);
|
||||
border-bottom: 1px solid var(--border);
|
||||
font-family: monospace;
|
||||
font-size: 0.8rem;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
.file-browser-list {
|
||||
list-style: none;
|
||||
}
|
||||
|
||||
.file-browser-item {
|
||||
padding: 0.5rem 1rem;
|
||||
cursor: pointer;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
border-bottom: 1px solid var(--border);
|
||||
}
|
||||
|
||||
.file-browser-item:hover {
|
||||
background: var(--bg-tertiary);
|
||||
}
|
||||
|
||||
.file-browser-item.selected {
|
||||
background: var(--primary);
|
||||
}
|
||||
|
||||
.file-browser-item:last-child {
|
||||
border-bottom: none;
|
||||
}
|
||||
|
||||
.file-icon {
|
||||
font-size: 1rem;
|
||||
}
|
||||
|
||||
/* ============ Checkbox Group ============ */
|
||||
.checkbox-group {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 0.5rem;
|
||||
margin-top: 0.5rem;
|
||||
}
|
||||
|
||||
.checkbox-item {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.25rem;
|
||||
padding: 0.25rem 0.5rem;
|
||||
background: var(--bg-tertiary);
|
||||
border-radius: 4px;
|
||||
font-size: 0.75rem;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.checkbox-item input {
|
||||
width: auto;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.checkbox-item:has(input:checked) {
|
||||
background: var(--primary);
|
||||
}
|
||||
|
||||
/* ============ Input with Button ============ */
|
||||
.input-with-btn {
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
.input-with-btn input {
|
||||
flex: 1;
|
||||
}
|
||||
|
||||
/* ============ Utilities ============ */
|
||||
.hidden {
|
||||
display: none !important;
|
||||
}
|
||||
|
||||
/* ============ Scrollbar ============ */
|
||||
::-webkit-scrollbar {
|
||||
width: 8px;
|
||||
}
|
||||
|
||||
::-webkit-scrollbar-track {
|
||||
background: var(--bg);
|
||||
}
|
||||
|
||||
::-webkit-scrollbar-thumb {
|
||||
background: var(--border);
|
||||
border-radius: 4px;
|
||||
}
|
||||
|
||||
::-webkit-scrollbar-thumb:hover {
|
||||
background: var(--text-secondary);
|
||||
}
|
||||
|
|
@ -1,693 +0,0 @@
|
|||
/**
|
||||
* Dateiverwaltung Frontend
|
||||
* Zwei getrennte Bereiche: Mail-Abruf und Datei-Sortierung
|
||||
*/
|
||||
|
||||
// ============ API ============
|
||||
|
||||
async function api(endpoint, options = {}) {
|
||||
const response = await fetch(`/api${endpoint}`, {
|
||||
headers: { 'Content-Type': 'application/json', ...options.headers },
|
||||
...options
|
||||
});
|
||||
if (!response.ok) {
|
||||
const error = await response.json().catch(() => ({}));
|
||||
throw new Error(error.detail || 'API Fehler');
|
||||
}
|
||||
return response.json();
|
||||
}
|
||||
|
||||
// ============ Loading Overlay ============
|
||||
|
||||
function zeigeLoading(text = 'Wird geladen...') {
|
||||
document.getElementById('loading-text').textContent = text;
|
||||
document.getElementById('loading-overlay').classList.remove('hidden');
|
||||
}
|
||||
|
||||
function versteckeLoading() {
|
||||
document.getElementById('loading-overlay').classList.add('hidden');
|
||||
}
|
||||
|
||||
// ============ File Browser ============
|
||||
|
||||
let browserTargetInput = null;
|
||||
let browserCurrentPath = '/srv/http/dateiverwaltung/data';
|
||||
|
||||
function oeffneBrowser(inputId) {
|
||||
browserTargetInput = inputId;
|
||||
const currentValue = document.getElementById(inputId).value;
|
||||
browserCurrentPath = currentValue || '/srv/http/dateiverwaltung/data';
|
||||
ladeBrowserInhalt(browserCurrentPath);
|
||||
document.getElementById('browser-modal').classList.remove('hidden');
|
||||
}
|
||||
|
||||
async function ladeBrowserInhalt(path) {
|
||||
try {
|
||||
const data = await api(`/browse?path=${encodeURIComponent(path)}`);
|
||||
|
||||
if (data.error) {
|
||||
document.getElementById('browser-list').innerHTML =
|
||||
`<li class="file-browser-item" style="color: var(--danger);">${data.error}</li>`;
|
||||
return;
|
||||
}
|
||||
|
||||
browserCurrentPath = data.current;
|
||||
document.getElementById('browser-current-path').textContent = data.current;
|
||||
|
||||
let html = '';
|
||||
|
||||
// Parent directory
|
||||
if (data.parent) {
|
||||
html += `<li class="file-browser-item" onclick="ladeBrowserInhalt('${data.parent}')">
|
||||
<span class="file-icon">📁</span> ..
|
||||
</li>`;
|
||||
}
|
||||
|
||||
// Directories
|
||||
for (const entry of data.entries) {
|
||||
html += `<li class="file-browser-item" ondblclick="ladeBrowserInhalt('${entry.path}')" onclick="browserSelect(this, '${entry.path}')">
|
||||
<span class="file-icon">📁</span> ${entry.name}
|
||||
</li>`;
|
||||
}
|
||||
|
||||
if (data.entries.length === 0 && !data.parent) {
|
||||
html = '<li class="file-browser-item">Keine Unterordner</li>';
|
||||
}
|
||||
|
||||
document.getElementById('browser-list').innerHTML = html;
|
||||
} catch (error) {
|
||||
document.getElementById('browser-list').innerHTML =
|
||||
`<li class="file-browser-item" style="color: var(--danger);">Fehler: ${error.message}</li>`;
|
||||
}
|
||||
}
|
||||
|
||||
function browserSelect(element, path) {
|
||||
document.querySelectorAll('.file-browser-item.selected').forEach(el => el.classList.remove('selected'));
|
||||
element.classList.add('selected');
|
||||
browserCurrentPath = path;
|
||||
}
|
||||
|
||||
function browserAuswahl() {
|
||||
if (browserTargetInput && browserCurrentPath) {
|
||||
document.getElementById(browserTargetInput).value = browserCurrentPath + '/';
|
||||
}
|
||||
schliesseModal('browser-modal');
|
||||
}
|
||||
|
||||
// ============ Checkbox Helpers ============
|
||||
|
||||
function getCheckedTypes(groupId) {
|
||||
const checkboxes = document.querySelectorAll(`#${groupId} input[type="checkbox"]:checked`);
|
||||
return Array.from(checkboxes).map(cb => cb.value);
|
||||
}
|
||||
|
||||
function setCheckedTypes(groupId, types) {
|
||||
const checkboxes = document.querySelectorAll(`#${groupId} input[type="checkbox"]`);
|
||||
checkboxes.forEach(cb => {
|
||||
cb.checked = types.includes(cb.value);
|
||||
});
|
||||
}
|
||||
|
||||
// ============ Init ============
|
||||
|
||||
document.addEventListener('DOMContentLoaded', () => {
|
||||
ladePostfaecher();
|
||||
ladeOrdner();
|
||||
ladeRegeln();
|
||||
});
|
||||
|
||||
// ============ BEREICH 1: Mail-Abruf ============
|
||||
|
||||
async function ladePostfaecher() {
|
||||
try {
|
||||
const postfaecher = await api('/postfaecher');
|
||||
renderPostfaecher(postfaecher);
|
||||
} catch (error) {
|
||||
console.error('Fehler:', error);
|
||||
}
|
||||
}
|
||||
|
||||
let bearbeitetesPostfachId = null;
|
||||
|
||||
function renderPostfaecher(postfaecher) {
|
||||
const container = document.getElementById('postfaecher-liste');
|
||||
|
||||
if (!postfaecher || postfaecher.length === 0) {
|
||||
container.innerHTML = '<p class="empty-state">Keine Postfächer konfiguriert</p>';
|
||||
return;
|
||||
}
|
||||
|
||||
container.innerHTML = postfaecher.map(p => `
|
||||
<div class="config-item">
|
||||
<div class="config-item-info">
|
||||
<h4>${escapeHtml(p.name)}</h4>
|
||||
<small>${escapeHtml(p.email)} → ${escapeHtml(p.ziel_ordner)}</small>
|
||||
</div>
|
||||
<div class="config-item-actions">
|
||||
<button class="btn btn-sm" onclick="postfachAbrufen(${p.id})">Abrufen</button>
|
||||
<button class="btn btn-sm" onclick="postfachBearbeiten(${p.id})">Bearbeiten</button>
|
||||
<button class="btn btn-sm" onclick="postfachTesten(${p.id})">Testen</button>
|
||||
<button class="btn btn-sm btn-danger" onclick="postfachLoeschen(${p.id})">×</button>
|
||||
</div>
|
||||
</div>
|
||||
`).join('');
|
||||
}
|
||||
|
||||
function zeigePostfachModal(postfach = null) {
|
||||
bearbeitetesPostfachId = postfach?.id || null;
|
||||
|
||||
document.getElementById('pf-name').value = postfach?.name || '';
|
||||
document.getElementById('pf-server').value = postfach?.imap_server || '';
|
||||
document.getElementById('pf-port').value = postfach?.imap_port || '993';
|
||||
document.getElementById('pf-email').value = postfach?.email || '';
|
||||
document.getElementById('pf-passwort').value = ''; // Passwort nicht vorausfüllen
|
||||
document.getElementById('pf-ordner').value = postfach?.ordner || 'INBOX';
|
||||
document.getElementById('pf-alle-ordner').value = postfach?.alle_ordner ? 'true' : 'false';
|
||||
document.getElementById('pf-ziel').value = postfach?.ziel_ordner || '/srv/http/dateiverwaltung/data/inbox/';
|
||||
setCheckedTypes('pf-typen-gruppe', postfach?.erlaubte_typen || ['.pdf']);
|
||||
document.getElementById('pf-max-groesse').value = postfach?.max_groesse_mb || '25';
|
||||
|
||||
document.getElementById('postfach-modal').classList.remove('hidden');
|
||||
}
|
||||
|
||||
async function postfachBearbeiten(id) {
|
||||
try {
|
||||
const postfaecher = await api('/postfaecher');
|
||||
const postfach = postfaecher.find(p => p.id === id);
|
||||
if (postfach) {
|
||||
zeigePostfachModal(postfach);
|
||||
}
|
||||
} catch (error) {
|
||||
alert('Fehler: ' + error.message);
|
||||
}
|
||||
}
|
||||
|
||||
async function speicherePostfach() {
|
||||
const erlaubteTypen = getCheckedTypes('pf-typen-gruppe');
|
||||
if (erlaubteTypen.length === 0) {
|
||||
alert('Bitte mindestens einen Dateityp auswählen');
|
||||
return;
|
||||
}
|
||||
|
||||
const data = {
|
||||
name: document.getElementById('pf-name').value.trim(),
|
||||
imap_server: document.getElementById('pf-server').value.trim(),
|
||||
imap_port: parseInt(document.getElementById('pf-port').value),
|
||||
email: document.getElementById('pf-email').value.trim(),
|
||||
passwort: document.getElementById('pf-passwort').value,
|
||||
ordner: document.getElementById('pf-ordner').value.trim(),
|
||||
alle_ordner: document.getElementById('pf-alle-ordner').value === 'true',
|
||||
ziel_ordner: document.getElementById('pf-ziel').value.trim(),
|
||||
erlaubte_typen: erlaubteTypen,
|
||||
max_groesse_mb: parseInt(document.getElementById('pf-max-groesse').value)
|
||||
};
|
||||
|
||||
if (!data.name || !data.imap_server || !data.email || !data.ziel_ordner) {
|
||||
alert('Bitte alle Pflichtfelder ausfüllen');
|
||||
return;
|
||||
}
|
||||
|
||||
// Bei Bearbeitung: Passwort nur senden wenn eingegeben
|
||||
if (bearbeitetesPostfachId && !data.passwort) {
|
||||
delete data.passwort;
|
||||
} else if (!data.passwort) {
|
||||
alert('Passwort ist erforderlich');
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
if (bearbeitetesPostfachId) {
|
||||
await api(`/postfaecher/${bearbeitetesPostfachId}`, { method: 'PUT', body: JSON.stringify(data) });
|
||||
} else {
|
||||
await api('/postfaecher', { method: 'POST', body: JSON.stringify(data) });
|
||||
}
|
||||
schliesseModal('postfach-modal');
|
||||
ladePostfaecher();
|
||||
} catch (error) {
|
||||
alert('Fehler: ' + error.message);
|
||||
}
|
||||
}
|
||||
|
||||
async function postfachTesten(id) {
|
||||
try {
|
||||
const result = await api(`/postfaecher/${id}/test`, { method: 'POST' });
|
||||
alert(result.erfolg ? 'Verbindung erfolgreich!' : 'Fehler: ' + result.nachricht);
|
||||
} catch (error) {
|
||||
alert('Fehler: ' + error.message);
|
||||
}
|
||||
}
|
||||
|
||||
async function postfachAbrufen(id) {
|
||||
const logContainer = document.getElementById('abruf-log');
|
||||
logContainer.innerHTML = '<div class="log-entry info"><span>Verbinde...</span></div>';
|
||||
|
||||
// EventSource für Server-Sent Events
|
||||
const eventSource = new EventSource(`/api/postfaecher/${id}/abrufen/stream`);
|
||||
let dateiCount = 0;
|
||||
let currentOrdner = '';
|
||||
|
||||
eventSource.onmessage = (event) => {
|
||||
const data = JSON.parse(event.data);
|
||||
|
||||
switch (data.type) {
|
||||
case 'start':
|
||||
logContainer.innerHTML = `<div class="log-entry info">
|
||||
<span>Starte Abruf: ${escapeHtml(data.postfach)}</span>
|
||||
<small>${data.bereits_verarbeitet} bereits verarbeitet</small>
|
||||
</div>`;
|
||||
break;
|
||||
|
||||
case 'info':
|
||||
logContainer.innerHTML += `<div class="log-entry info">
|
||||
<span>${escapeHtml(data.nachricht)}</span>
|
||||
</div>`;
|
||||
break;
|
||||
|
||||
case 'ordner':
|
||||
currentOrdner = data.name;
|
||||
logContainer.innerHTML += `<div class="log-entry info" id="ordner-status">
|
||||
<span>📁 ${escapeHtml(data.name)}</span>
|
||||
</div>`;
|
||||
break;
|
||||
|
||||
case 'mails':
|
||||
const ordnerStatus = document.getElementById('ordner-status');
|
||||
if (ordnerStatus) {
|
||||
ordnerStatus.innerHTML = `<span>📁 ${escapeHtml(data.ordner)}: ${data.anzahl} Mails</span>`;
|
||||
ordnerStatus.id = ''; // ID entfernen für nächsten Ordner
|
||||
}
|
||||
break;
|
||||
|
||||
case 'datei':
|
||||
dateiCount++;
|
||||
logContainer.innerHTML += `<div class="log-entry success">
|
||||
<span>✓ ${escapeHtml(data.original_name)}</span>
|
||||
<small>${formatBytes(data.groesse)}</small>
|
||||
</div>`;
|
||||
// Scroll nach unten
|
||||
logContainer.scrollTop = logContainer.scrollHeight;
|
||||
break;
|
||||
|
||||
case 'skip':
|
||||
logContainer.innerHTML += `<div class="log-entry" style="opacity:0.6;">
|
||||
<span>⊘ ${escapeHtml(data.datei)}: ${data.grund}</span>
|
||||
</div>`;
|
||||
break;
|
||||
|
||||
case 'fehler':
|
||||
logContainer.innerHTML += `<div class="log-entry error">
|
||||
<span>✗ ${escapeHtml(data.nachricht)}</span>
|
||||
</div>`;
|
||||
break;
|
||||
|
||||
case 'fertig':
|
||||
logContainer.innerHTML += `<div class="log-entry success" style="font-weight:bold;">
|
||||
<span>✓ Fertig: ${data.anzahl} Dateien gespeichert</span>
|
||||
</div>`;
|
||||
eventSource.close();
|
||||
ladePostfaecher();
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
eventSource.onerror = (error) => {
|
||||
logContainer.innerHTML += `<div class="log-entry error">
|
||||
<span>✗ Verbindung unterbrochen</span>
|
||||
</div>`;
|
||||
eventSource.close();
|
||||
};
|
||||
}
|
||||
|
||||
function formatBytes(bytes) {
|
||||
if (bytes < 1024) return bytes + ' B';
|
||||
if (bytes < 1024 * 1024) return (bytes / 1024).toFixed(1) + ' KB';
|
||||
return (bytes / (1024 * 1024)).toFixed(1) + ' MB';
|
||||
}
|
||||
|
||||
async function allePostfaecherAbrufen() {
|
||||
try {
|
||||
zeigeLoading('Rufe alle Postfächer ab...');
|
||||
const result = await api('/postfaecher/abrufen-alle', { method: 'POST' });
|
||||
zeigeAbrufLog(result);
|
||||
ladePostfaecher();
|
||||
} catch (error) {
|
||||
alert('Fehler: ' + error.message);
|
||||
} finally {
|
||||
versteckeLoading();
|
||||
}
|
||||
}
|
||||
|
||||
async function postfachLoeschen(id) {
|
||||
if (!confirm('Postfach wirklich löschen?')) return;
|
||||
try {
|
||||
await api(`/postfaecher/${id}`, { method: 'DELETE' });
|
||||
ladePostfaecher();
|
||||
} catch (error) {
|
||||
alert('Fehler: ' + error.message);
|
||||
}
|
||||
}
|
||||
|
||||
function zeigeAbrufLog(result) {
|
||||
const container = document.getElementById('abruf-log');
|
||||
|
||||
if (!result.ergebnisse || result.ergebnisse.length === 0) {
|
||||
container.innerHTML = '<p class="empty-state">Keine neuen Attachments gefunden</p>';
|
||||
return;
|
||||
}
|
||||
|
||||
let html = '';
|
||||
for (const r of result.ergebnisse) {
|
||||
const status = r.fehler ? 'error' : 'success';
|
||||
const icon = r.fehler ? '✗' : '✓';
|
||||
html += `<div class="log-entry ${status}">
|
||||
<span>${icon} ${escapeHtml(r.postfach)}: ${r.anzahl || 0} Dateien</span>
|
||||
${r.fehler ? `<small>${escapeHtml(r.fehler)}</small>` : ''}
|
||||
</div>`;
|
||||
|
||||
if (r.dateien) {
|
||||
for (const d of r.dateien) {
|
||||
html += `<div class="log-entry info">
|
||||
<span style="padding-left: 1rem;">→ ${escapeHtml(d)}</span>
|
||||
</div>`;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
container.innerHTML = html;
|
||||
}
|
||||
|
||||
// ============ BEREICH 2: Datei-Sortierung ============
|
||||
|
||||
async function ladeOrdner() {
|
||||
try {
|
||||
const ordner = await api('/ordner');
|
||||
renderOrdner(ordner);
|
||||
} catch (error) {
|
||||
console.error('Fehler:', error);
|
||||
}
|
||||
}
|
||||
|
||||
function renderOrdner(ordner) {
|
||||
const container = document.getElementById('ordner-liste');
|
||||
|
||||
if (!ordner || ordner.length === 0) {
|
||||
container.innerHTML = '<p class="empty-state">Keine Ordner konfiguriert</p>';
|
||||
return;
|
||||
}
|
||||
|
||||
container.innerHTML = ordner.map(o => `
|
||||
<div class="config-item">
|
||||
<div class="config-item-info">
|
||||
<h4>${escapeHtml(o.name)} ${o.rekursiv ? '<span class="badge badge-info">rekursiv</span>' : ''}</h4>
|
||||
<small>${escapeHtml(o.pfad)} → ${escapeHtml(o.ziel_ordner)}</small>
|
||||
<small style="display:block;">${(o.dateitypen || []).join(', ')}</small>
|
||||
</div>
|
||||
<div class="config-item-actions">
|
||||
<button class="btn btn-sm" onclick="ordnerScannen(${o.id})">Scannen</button>
|
||||
<button class="btn btn-sm btn-danger" onclick="ordnerLoeschen(${o.id})">×</button>
|
||||
</div>
|
||||
</div>
|
||||
`).join('');
|
||||
}
|
||||
|
||||
function zeigeOrdnerModal() {
|
||||
document.getElementById('ord-name').value = '';
|
||||
document.getElementById('ord-pfad').value = '/srv/http/dateiverwaltung/data/inbox/';
|
||||
document.getElementById('ord-ziel').value = '/srv/http/dateiverwaltung/data/archiv/';
|
||||
setCheckedTypes('ord-typen-gruppe', ['.pdf', '.jpg', '.jpeg', '.png', '.tiff']);
|
||||
document.getElementById('ord-rekursiv').value = 'true';
|
||||
document.getElementById('ordner-modal').classList.remove('hidden');
|
||||
}
|
||||
|
||||
async function speichereOrdner() {
|
||||
const dateitypen = getCheckedTypes('ord-typen-gruppe');
|
||||
if (dateitypen.length === 0) {
|
||||
alert('Bitte mindestens einen Dateityp auswählen');
|
||||
return;
|
||||
}
|
||||
|
||||
const data = {
|
||||
name: document.getElementById('ord-name').value.trim(),
|
||||
pfad: document.getElementById('ord-pfad').value.trim(),
|
||||
ziel_ordner: document.getElementById('ord-ziel').value.trim(),
|
||||
rekursiv: document.getElementById('ord-rekursiv').value === 'true',
|
||||
dateitypen: dateitypen
|
||||
};
|
||||
|
||||
if (!data.name || !data.pfad || !data.ziel_ordner) {
|
||||
alert('Bitte alle Felder ausfüllen');
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
zeigeLoading('Speichere Ordner...');
|
||||
await api('/ordner', { method: 'POST', body: JSON.stringify(data) });
|
||||
schliesseModal('ordner-modal');
|
||||
ladeOrdner();
|
||||
} catch (error) {
|
||||
alert('Fehler: ' + error.message);
|
||||
} finally {
|
||||
versteckeLoading();
|
||||
}
|
||||
}
|
||||
|
||||
async function ordnerLoeschen(id) {
|
||||
if (!confirm('Ordner wirklich löschen?')) return;
|
||||
try {
|
||||
await api(`/ordner/${id}`, { method: 'DELETE' });
|
||||
ladeOrdner();
|
||||
} catch (error) {
|
||||
alert('Fehler: ' + error.message);
|
||||
}
|
||||
}
|
||||
|
||||
async function ordnerScannen(id) {
|
||||
try {
|
||||
const result = await api(`/ordner/${id}/scannen`);
|
||||
alert(`${result.anzahl} Dateien im Ordner gefunden`);
|
||||
} catch (error) {
|
||||
alert('Fehler: ' + error.message);
|
||||
}
|
||||
}
|
||||
|
||||
// ============ Regeln ============
|
||||
|
||||
let editierteRegelId = null;
|
||||
|
||||
async function ladeRegeln() {
|
||||
try {
|
||||
const regeln = await api('/regeln');
|
||||
renderRegeln(regeln);
|
||||
} catch (error) {
|
||||
console.error('Fehler:', error);
|
||||
}
|
||||
}
|
||||
|
||||
function renderRegeln(regeln) {
|
||||
const container = document.getElementById('regeln-liste');
|
||||
|
||||
if (!regeln || regeln.length === 0) {
|
||||
container.innerHTML = '<p class="empty-state">Keine Regeln definiert</p>';
|
||||
return;
|
||||
}
|
||||
|
||||
container.innerHTML = regeln.map(r => `
|
||||
<div class="config-item">
|
||||
<div class="config-item-info">
|
||||
<h4>${escapeHtml(r.name)} <span class="badge badge-info">Prio ${r.prioritaet}</span></h4>
|
||||
<small>${escapeHtml(r.schema)}</small>
|
||||
</div>
|
||||
<div class="config-item-actions">
|
||||
<button class="btn btn-sm" onclick="bearbeiteRegel(${r.id})">Bearbeiten</button>
|
||||
<button class="btn btn-sm btn-danger" onclick="regelLoeschen(${r.id})">×</button>
|
||||
</div>
|
||||
</div>
|
||||
`).join('');
|
||||
}
|
||||
|
||||
function zeigeRegelModal(regel = null) {
|
||||
editierteRegelId = regel?.id || null;
|
||||
document.getElementById('regel-modal-title').textContent = regel ? 'Regel bearbeiten' : 'Regel hinzufügen';
|
||||
|
||||
document.getElementById('regel-name').value = regel?.name || '';
|
||||
document.getElementById('regel-prioritaet').value = regel?.prioritaet || 100;
|
||||
document.getElementById('regel-muster').value = JSON.stringify(regel?.muster || {"text_match_any": [], "text_match": []}, null, 2);
|
||||
document.getElementById('regel-extraktion').value = JSON.stringify(regel?.extraktion || {}, null, 2);
|
||||
document.getElementById('regel-schema').value = regel?.schema || '{datum} - Dokument.pdf';
|
||||
document.getElementById('regel-unterordner').value = regel?.unterordner || '';
|
||||
document.getElementById('regel-test-text').value = '';
|
||||
document.getElementById('regel-test-ergebnis').classList.add('hidden');
|
||||
|
||||
document.getElementById('regel-modal').classList.remove('hidden');
|
||||
}
|
||||
|
||||
async function bearbeiteRegel(id) {
|
||||
try {
|
||||
const regeln = await api('/regeln');
|
||||
const regel = regeln.find(r => r.id === id);
|
||||
if (regel) zeigeRegelModal(regel);
|
||||
} catch (error) {
|
||||
alert('Fehler: ' + error.message);
|
||||
}
|
||||
}
|
||||
|
||||
async function speichereRegel() {
|
||||
let muster, extraktion;
|
||||
|
||||
try {
|
||||
muster = JSON.parse(document.getElementById('regel-muster').value);
|
||||
} catch (e) {
|
||||
alert('Ungültiges JSON im Muster-Feld');
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
extraktion = JSON.parse(document.getElementById('regel-extraktion').value);
|
||||
} catch (e) {
|
||||
alert('Ungültiges JSON im Extraktion-Feld');
|
||||
return;
|
||||
}
|
||||
|
||||
const data = {
|
||||
name: document.getElementById('regel-name').value.trim(),
|
||||
prioritaet: parseInt(document.getElementById('regel-prioritaet').value),
|
||||
muster,
|
||||
extraktion,
|
||||
schema: document.getElementById('regel-schema').value.trim(),
|
||||
unterordner: document.getElementById('regel-unterordner').value.trim() || null
|
||||
};
|
||||
|
||||
if (!data.name) {
|
||||
alert('Bitte einen Namen eingeben');
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
if (editierteRegelId) {
|
||||
await api(`/regeln/${editierteRegelId}`, { method: 'PUT', body: JSON.stringify(data) });
|
||||
} else {
|
||||
await api('/regeln', { method: 'POST', body: JSON.stringify(data) });
|
||||
}
|
||||
schliesseModal('regel-modal');
|
||||
ladeRegeln();
|
||||
} catch (error) {
|
||||
alert('Fehler: ' + error.message);
|
||||
}
|
||||
}
|
||||
|
||||
async function regelLoeschen(id) {
|
||||
if (!confirm('Regel wirklich löschen?')) return;
|
||||
try {
|
||||
await api(`/regeln/${id}`, { method: 'DELETE' });
|
||||
ladeRegeln();
|
||||
} catch (error) {
|
||||
alert('Fehler: ' + error.message);
|
||||
}
|
||||
}
|
||||
|
||||
async function testeRegel() {
|
||||
const text = document.getElementById('regel-test-text').value;
|
||||
if (!text) {
|
||||
alert('Bitte Testtext eingeben');
|
||||
return;
|
||||
}
|
||||
|
||||
let muster, extraktion;
|
||||
try {
|
||||
muster = JSON.parse(document.getElementById('regel-muster').value);
|
||||
extraktion = JSON.parse(document.getElementById('regel-extraktion').value);
|
||||
} catch (e) {
|
||||
alert('Ungültiges JSON');
|
||||
return;
|
||||
}
|
||||
|
||||
const regel = {
|
||||
name: 'Test',
|
||||
muster,
|
||||
extraktion,
|
||||
schema: document.getElementById('regel-schema').value.trim()
|
||||
};
|
||||
|
||||
try {
|
||||
const result = await api('/regeln/test', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ regel, text })
|
||||
});
|
||||
|
||||
const container = document.getElementById('regel-test-ergebnis');
|
||||
container.classList.remove('hidden', 'success', 'error');
|
||||
|
||||
if (result.passt) {
|
||||
container.classList.add('success');
|
||||
container.textContent = `✓ Regel passt!\n\nExtrahiert:\n${JSON.stringify(result.extrahiert, null, 2)}\n\nDateiname:\n${result.dateiname}`;
|
||||
} else {
|
||||
container.classList.add('error');
|
||||
container.textContent = '✗ Regel passt nicht';
|
||||
}
|
||||
} catch (error) {
|
||||
alert('Fehler: ' + error.message);
|
||||
}
|
||||
}
|
||||
|
||||
// ============ Sortierung starten ============
|
||||
|
||||
async function sortierungStarten() {
|
||||
try {
|
||||
zeigeLoading('Sortiere Dateien...');
|
||||
const result = await api('/sortierung/starten', { method: 'POST' });
|
||||
zeigeSortierungLog(result);
|
||||
} catch (error) {
|
||||
alert('Fehler: ' + error.message);
|
||||
} finally {
|
||||
versteckeLoading();
|
||||
}
|
||||
}
|
||||
|
||||
function zeigeSortierungLog(result) {
|
||||
const container = document.getElementById('sortierung-log');
|
||||
|
||||
if (!result.verarbeitet || result.verarbeitet.length === 0) {
|
||||
container.innerHTML = '<p class="empty-state">Keine Dateien verarbeitet</p>';
|
||||
return;
|
||||
}
|
||||
|
||||
let html = `<div class="log-entry info">
|
||||
<span>Gesamt: ${result.gesamt} | Sortiert: ${result.sortiert} | ZUGFeRD: ${result.zugferd} | Fehler: ${result.fehler}</span>
|
||||
</div>`;
|
||||
|
||||
for (const d of result.verarbeitet) {
|
||||
const status = d.fehler ? 'error' : (d.zugferd ? 'info' : 'success');
|
||||
const icon = d.fehler ? '✗' : (d.zugferd ? '🧾' : '✓');
|
||||
html += `<div class="log-entry ${status}">
|
||||
<span>${icon} ${escapeHtml(d.neuer_name || d.original)}</span>
|
||||
${d.fehler ? `<small>${escapeHtml(d.fehler)}</small>` : ''}
|
||||
</div>`;
|
||||
}
|
||||
|
||||
container.innerHTML = html;
|
||||
}
|
||||
|
||||
// ============ Utilities ============
|
||||
|
||||
function schliesseModal(id) {
|
||||
document.getElementById(id).classList.add('hidden');
|
||||
}
|
||||
|
||||
function escapeHtml(text) {
|
||||
if (!text) return '';
|
||||
const div = document.createElement('div');
|
||||
div.textContent = text;
|
||||
return div.innerHTML;
|
||||
}
|
||||
|
||||
document.addEventListener('click', (e) => {
|
||||
if (e.target.classList.contains('modal')) {
|
||||
e.target.classList.add('hidden');
|
||||
}
|
||||
});
|
||||
|
||||
document.addEventListener('keydown', (e) => {
|
||||
if (e.key === 'Escape') {
|
||||
document.querySelectorAll('.modal:not(.hidden)').forEach(m => m.classList.add('hidden'));
|
||||
}
|
||||
});
|
||||
|
|
@ -1,366 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="de">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Dateiverwaltung</title>
|
||||
<link rel="stylesheet" href="/static/css/style.css">
|
||||
</head>
|
||||
<body>
|
||||
<div id="app">
|
||||
<!-- Header -->
|
||||
<header class="header">
|
||||
<div class="header-left">
|
||||
<h1>Dateiverwaltung</h1>
|
||||
</div>
|
||||
<div class="header-right">
|
||||
<span id="status-indicator"></span>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
<!-- Main Content -->
|
||||
<div class="main-container">
|
||||
<!-- Bereich 1: Mail-Abruf -->
|
||||
<section class="bereich">
|
||||
<div class="bereich-header">
|
||||
<h2>📧 Mail-Abruf</h2>
|
||||
<p class="bereich-desc">Attachments aus Postfächern in Ordner speichern</p>
|
||||
</div>
|
||||
|
||||
<div class="bereich-content">
|
||||
<!-- Postfächer Liste -->
|
||||
<div class="card">
|
||||
<div class="card-header">
|
||||
<h3>Postfächer</h3>
|
||||
<button class="btn btn-sm btn-primary" onclick="zeigePostfachModal()">+ Hinzufügen</button>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<div id="postfaecher-liste">
|
||||
<p class="empty-state">Keine Postfächer konfiguriert</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Abruf starten -->
|
||||
<div class="action-bar">
|
||||
<button class="btn btn-success btn-large" onclick="allePostfaecherAbrufen()">
|
||||
▶ Alle Postfächer abrufen
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<!-- Letzter Abruf Log -->
|
||||
<div class="card">
|
||||
<div class="card-header">
|
||||
<h3>Letzter Abruf</h3>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<div id="abruf-log" class="log-output">
|
||||
<p class="empty-state">Noch kein Abruf durchgeführt</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- Bereich 2: Datei-Sortierung -->
|
||||
<section class="bereich">
|
||||
<div class="bereich-header">
|
||||
<h2>📁 Datei-Sortierung</h2>
|
||||
<p class="bereich-desc">Dateien nach Regeln umbenennen und verschieben</p>
|
||||
</div>
|
||||
|
||||
<div class="bereich-content">
|
||||
<!-- Quell-Ordner -->
|
||||
<div class="card">
|
||||
<div class="card-header">
|
||||
<h3>Quell-Ordner</h3>
|
||||
<button class="btn btn-sm btn-primary" onclick="zeigeOrdnerModal()">+ Hinzufügen</button>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<div id="ordner-liste">
|
||||
<p class="empty-state">Keine Ordner konfiguriert</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Regeln -->
|
||||
<div class="card">
|
||||
<div class="card-header">
|
||||
<h3>Sortier-Regeln</h3>
|
||||
<button class="btn btn-sm btn-primary" onclick="zeigeRegelModal()">+ Hinzufügen</button>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<div id="regeln-liste">
|
||||
<p class="empty-state">Keine Regeln definiert</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Sortierung starten -->
|
||||
<div class="action-bar">
|
||||
<button class="btn btn-success btn-large" onclick="sortierungStarten()">
|
||||
▶ Sortierung starten
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<!-- Sortierungs-Log -->
|
||||
<div class="card">
|
||||
<div class="card-header">
|
||||
<h3>Verarbeitete Dateien</h3>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<div id="sortierung-log" class="log-output">
|
||||
<p class="empty-state">Noch keine Dateien verarbeitet</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
|
||||
<!-- Modal: Postfach hinzufügen -->
|
||||
<div id="postfach-modal" class="modal hidden">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h3>Postfach hinzufügen</h3>
|
||||
<button class="modal-close" onclick="schliesseModal('postfach-modal')">×</button>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
<div class="form-group">
|
||||
<label>Name</label>
|
||||
<input type="text" id="pf-name" placeholder="z.B. Firma Rechnungen">
|
||||
</div>
|
||||
<div class="form-row">
|
||||
<div class="form-group">
|
||||
<label>IMAP Server</label>
|
||||
<input type="text" id="pf-server" placeholder="imap.example.com">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label>Port</label>
|
||||
<input type="number" id="pf-port" value="993">
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label>E-Mail</label>
|
||||
<input type="email" id="pf-email" placeholder="mail@example.com">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label>Passwort</label>
|
||||
<input type="password" id="pf-passwort">
|
||||
</div>
|
||||
<div class="form-row">
|
||||
<div class="form-group">
|
||||
<label>IMAP-Ordner</label>
|
||||
<input type="text" id="pf-ordner" value="INBOX">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label>Alle Ordner durchsuchen</label>
|
||||
<select id="pf-alle-ordner">
|
||||
<option value="false">Nein (nur angegebenen Ordner)</option>
|
||||
<option value="true">Ja (alle Ordner)</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label>Welche Mails durchsuchen</label>
|
||||
<select id="pf-nur-ungelesen">
|
||||
<option value="false" selected>Alle Mails</option>
|
||||
<option value="true">Nur ungelesene Mails</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label>Ziel-Ordner</label>
|
||||
<div class="input-with-btn">
|
||||
<input type="text" id="pf-ziel" value="/srv/http/dateiverwaltung/data/inbox/">
|
||||
<button class="btn" type="button" onclick="oeffneBrowser('pf-ziel')">📁</button>
|
||||
</div>
|
||||
<small>Hier landen die Attachments</small>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label>Erlaubte Dateitypen</label>
|
||||
<div class="checkbox-group" id="pf-typen-gruppe">
|
||||
<label class="checkbox-item"><input type="checkbox" value=".pdf" checked> PDF</label>
|
||||
<label class="checkbox-item"><input type="checkbox" value=".jpg"> JPG</label>
|
||||
<label class="checkbox-item"><input type="checkbox" value=".jpeg"> JPEG</label>
|
||||
<label class="checkbox-item"><input type="checkbox" value=".png"> PNG</label>
|
||||
<label class="checkbox-item"><input type="checkbox" value=".gif"> GIF</label>
|
||||
<label class="checkbox-item"><input type="checkbox" value=".tiff"> TIFF</label>
|
||||
<label class="checkbox-item"><input type="checkbox" value=".doc"> DOC</label>
|
||||
<label class="checkbox-item"><input type="checkbox" value=".docx"> DOCX</label>
|
||||
<label class="checkbox-item"><input type="checkbox" value=".xls"> XLS</label>
|
||||
<label class="checkbox-item"><input type="checkbox" value=".xlsx"> XLSX</label>
|
||||
<label class="checkbox-item"><input type="checkbox" value=".csv"> CSV</label>
|
||||
<label class="checkbox-item"><input type="checkbox" value=".txt"> TXT</label>
|
||||
<label class="checkbox-item"><input type="checkbox" value=".zip"> ZIP</label>
|
||||
<label class="checkbox-item"><input type="checkbox" value=".xml"> XML</label>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label>Max. Größe (MB)</label>
|
||||
<input type="number" id="pf-max-groesse" value="25" style="width: 100px;">
|
||||
</div>
|
||||
</div>
|
||||
<div class="modal-footer">
|
||||
<button class="btn" onclick="schliesseModal('postfach-modal')">Abbrechen</button>
|
||||
<button class="btn btn-primary" onclick="speicherePostfach()">Speichern</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Modal: Ordner hinzufügen -->
|
||||
<div id="ordner-modal" class="modal hidden">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h3>Quell-Ordner hinzufügen</h3>
|
||||
<button class="modal-close" onclick="schliesseModal('ordner-modal')">×</button>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
<div class="form-group">
|
||||
<label>Name</label>
|
||||
<input type="text" id="ord-name" placeholder="z.B. Firma Inbox">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label>Quell-Pfad (wo liegen die Dateien?)</label>
|
||||
<div class="input-with-btn">
|
||||
<input type="text" id="ord-pfad" value="/srv/http/dateiverwaltung/data/inbox/">
|
||||
<button class="btn" type="button" onclick="oeffneBrowser('ord-pfad')">📁</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label>Ziel-Ordner (wohin nach Sortierung?)</label>
|
||||
<div class="input-with-btn">
|
||||
<input type="text" id="ord-ziel" value="/srv/http/dateiverwaltung/data/archiv/">
|
||||
<button class="btn" type="button" onclick="oeffneBrowser('ord-ziel')">📁</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label>Dateitypen</label>
|
||||
<div class="checkbox-group" id="ord-typen-gruppe">
|
||||
<label class="checkbox-item"><input type="checkbox" value=".pdf" checked> PDF</label>
|
||||
<label class="checkbox-item"><input type="checkbox" value=".jpg" checked> JPG</label>
|
||||
<label class="checkbox-item"><input type="checkbox" value=".jpeg" checked> JPEG</label>
|
||||
<label class="checkbox-item"><input type="checkbox" value=".png" checked> PNG</label>
|
||||
<label class="checkbox-item"><input type="checkbox" value=".gif"> GIF</label>
|
||||
<label class="checkbox-item"><input type="checkbox" value=".tiff" checked> TIFF</label>
|
||||
<label class="checkbox-item"><input type="checkbox" value=".bmp"> BMP</label>
|
||||
<label class="checkbox-item"><input type="checkbox" value=".doc"> DOC</label>
|
||||
<label class="checkbox-item"><input type="checkbox" value=".docx"> DOCX</label>
|
||||
<label class="checkbox-item"><input type="checkbox" value=".xls"> XLS</label>
|
||||
<label class="checkbox-item"><input type="checkbox" value=".xlsx"> XLSX</label>
|
||||
<label class="checkbox-item"><input type="checkbox" value=".csv"> CSV</label>
|
||||
<label class="checkbox-item"><input type="checkbox" value=".txt"> TXT</label>
|
||||
<label class="checkbox-item"><input type="checkbox" value=".xml"> XML</label>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label>Unterordner einschließen</label>
|
||||
<select id="ord-rekursiv">
|
||||
<option value="true" selected>Ja (rekursiv)</option>
|
||||
<option value="false">Nein (nur dieser Ordner)</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
<div class="modal-footer">
|
||||
<button class="btn" onclick="schliesseModal('ordner-modal')">Abbrechen</button>
|
||||
<button class="btn btn-primary" onclick="speichereOrdner()">Speichern</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Modal: Regel hinzufügen -->
|
||||
<div id="regel-modal" class="modal hidden">
|
||||
<div class="modal-content modal-large">
|
||||
<div class="modal-header">
|
||||
<h3 id="regel-modal-title">Regel hinzufügen</h3>
|
||||
<button class="modal-close" onclick="schliesseModal('regel-modal')">×</button>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
<div class="form-row">
|
||||
<div class="form-group">
|
||||
<label>Name</label>
|
||||
<input type="text" id="regel-name" placeholder="z.B. Sonepar Rechnung">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label>Priorität (niedriger = wichtiger)</label>
|
||||
<input type="number" id="regel-prioritaet" value="100">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label>Erkennungsmuster (JSON)</label>
|
||||
<textarea id="regel-muster" class="code-input" rows="4">{
|
||||
"text_match_any": ["sonepar"],
|
||||
"text_match": ["rechnung"]
|
||||
}</textarea>
|
||||
<small>text_match_any: mindestens eins | text_match: alle müssen passen</small>
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label>Feld-Extraktion (JSON)</label>
|
||||
<textarea id="regel-extraktion" class="code-input" rows="6">{
|
||||
"datum": {"regex": "(\\d{2}[./]\\d{2}[./]\\d{4})", "format": "%d.%m.%Y"},
|
||||
"rechnungsnummer": {"regex": "Rechnungsnummer[:\\s]*(\\d+)"},
|
||||
"betrag": {"regex": "Gesamtbetrag[:\\s]*([\\d.,]+)", "typ": "betrag"},
|
||||
"ersteller": {"wert": "Sonepar"}
|
||||
}</textarea>
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label>Dateiname-Schema</label>
|
||||
<input type="text" id="regel-schema"
|
||||
value="{datum} - Rechnung - {ersteller} - {rechnungsnummer} - {betrag} EUR.pdf">
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label>Ziel-Unterordner (optional)</label>
|
||||
<input type="text" id="regel-unterordner" placeholder="sonepar">
|
||||
<small>Wird an den Ziel-Ordner des Quell-Ordners angehängt</small>
|
||||
</div>
|
||||
|
||||
<!-- Tester -->
|
||||
<div class="form-group">
|
||||
<label>Regel testen</label>
|
||||
<textarea id="regel-test-text" rows="3" placeholder="Text zum Testen einfügen..."></textarea>
|
||||
<button class="btn btn-sm" onclick="testeRegel()">Testen</button>
|
||||
<div id="regel-test-ergebnis" class="test-result hidden"></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="modal-footer">
|
||||
<button class="btn" onclick="schliesseModal('regel-modal')">Abbrechen</button>
|
||||
<button class="btn btn-primary" onclick="speichereRegel()">Speichern</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Modal: Verzeichnis-Browser -->
|
||||
<div id="browser-modal" class="modal hidden">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h3>Verzeichnis wählen</h3>
|
||||
<button class="modal-close" onclick="schliesseModal('browser-modal')">×</button>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
<div class="file-browser">
|
||||
<div class="file-browser-path">
|
||||
<span id="browser-current-path">/</span>
|
||||
</div>
|
||||
<ul class="file-browser-list" id="browser-list"></ul>
|
||||
</div>
|
||||
</div>
|
||||
<div class="modal-footer">
|
||||
<button class="btn" onclick="schliesseModal('browser-modal')">Abbrechen</button>
|
||||
<button class="btn btn-primary" onclick="browserAuswahl()">Auswählen</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Loading Overlay -->
|
||||
<div id="loading-overlay" class="loading-overlay hidden">
|
||||
<div class="spinner"></div>
|
||||
<div class="loading-text" id="loading-text">Wird geladen...</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script src="/static/js/app.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,172 +0,0 @@
|
|||
# Beispiel-Regeln für die Dateiverwaltung
|
||||
# Diese Datei dient als Referenz - Regeln werden in der Web-UI konfiguriert
|
||||
|
||||
# ============================================
|
||||
# RECHNUNGEN
|
||||
# ============================================
|
||||
|
||||
sonepar_rechnung:
|
||||
name: "Sonepar Rechnung"
|
||||
prioritaet: 10
|
||||
muster:
|
||||
text_match_any:
|
||||
- sonepar
|
||||
- "elektro großhandel"
|
||||
text_match:
|
||||
- rechnung
|
||||
extraktion:
|
||||
datum:
|
||||
regex: '(\d{2}[./]\d{2}[./]\d{4})'
|
||||
typ: datum
|
||||
format: "%d.%m.%Y"
|
||||
rechnungsnummer:
|
||||
regex: 'Rechnungsnummer[:\s]*(\d+)'
|
||||
betrag:
|
||||
regex: 'Gesamtbetrag[:\s]*([\d.,]+)\s*(?:EUR|€)'
|
||||
typ: betrag
|
||||
kategorie:
|
||||
wert: "Rechnung"
|
||||
ersteller:
|
||||
wert: "Sonepar"
|
||||
schema: "{datum} - {kategorie} - {ersteller} - {rechnungsnummer} - Material - {betrag} EUR.pdf"
|
||||
ziel_ordner: "/archiv/rechnungen/sonepar"
|
||||
|
||||
wuerth_rechnung:
|
||||
name: "Würth Rechnung"
|
||||
prioritaet: 10
|
||||
muster:
|
||||
text_match_any:
|
||||
- würth
|
||||
- wuerth
|
||||
text_match:
|
||||
- rechnung
|
||||
extraktion:
|
||||
datum:
|
||||
regex: '(\d{2}[./]\d{2}[./]\d{4})'
|
||||
typ: datum
|
||||
format: "%d.%m.%Y"
|
||||
rechnungsnummer:
|
||||
regex: '(?:Rechnungs?nummer|Beleg)[:\s]*([A-Z0-9-]+)'
|
||||
betrag:
|
||||
regex: '(?:Gesamtbetrag|Summe|Total)[:\s]*([\d.,]+)\s*(?:EUR|€)?'
|
||||
typ: betrag
|
||||
kategorie:
|
||||
wert: "Rechnung"
|
||||
ersteller:
|
||||
wert: "Würth"
|
||||
schema: "{datum} - {kategorie} - {ersteller} - {rechnungsnummer} - Material - {betrag} EUR.pdf"
|
||||
ziel_ordner: "/archiv/rechnungen/wuerth"
|
||||
|
||||
amazon_rechnung:
|
||||
name: "Amazon Rechnung"
|
||||
prioritaet: 10
|
||||
muster:
|
||||
text_match_any:
|
||||
- amazon
|
||||
- "Amazon EU S.à r.l."
|
||||
text_match:
|
||||
- rechnung
|
||||
extraktion:
|
||||
datum:
|
||||
regex: '(\d{2}[./]\d{2}[./]\d{4})'
|
||||
typ: datum
|
||||
format: "%d.%m.%Y"
|
||||
rechnungsnummer:
|
||||
regex: '(?:Rechnungsnummer|Invoice)[:\s]*([A-Z0-9-]+)'
|
||||
betrag:
|
||||
regex: '(?:Gesamtbetrag|Grand Total)[:\s]*([\d.,]+)\s*(?:EUR|€)?'
|
||||
typ: betrag
|
||||
kategorie:
|
||||
wert: "Rechnung"
|
||||
ersteller:
|
||||
wert: "Amazon"
|
||||
schema: "{datum} - {kategorie} - {ersteller} - {rechnungsnummer} - {betrag} EUR.pdf"
|
||||
ziel_ordner: "/archiv/rechnungen/amazon"
|
||||
|
||||
allgemeine_rechnung:
|
||||
name: "Allgemeine Rechnung"
|
||||
prioritaet: 50
|
||||
muster:
|
||||
text_match_any:
|
||||
- rechnung
|
||||
- invoice
|
||||
- faktura
|
||||
extraktion:
|
||||
datum:
|
||||
regex: '(\d{2}[./]\d{2}[./]\d{4})'
|
||||
typ: datum
|
||||
format: "%d.%m.%Y"
|
||||
rechnungsnummer:
|
||||
regex: '(?:Rechnungs?nummer|Rechnung\s*Nr\.?|Invoice|Beleg)[:\s#]*([A-Z0-9-]+)'
|
||||
betrag:
|
||||
regex: '(?:Gesamtbetrag|Summe|Total|Endbetrag|Rechnungsbetrag)[:\s]*([\d.,]+)\s*(?:EUR|€)?'
|
||||
typ: betrag
|
||||
kategorie:
|
||||
wert: "Rechnung"
|
||||
schema: "{datum} - {kategorie} - {rechnungsnummer} - {betrag} EUR.pdf"
|
||||
ziel_ordner: "/archiv/rechnungen/sonstige"
|
||||
|
||||
|
||||
# ============================================
|
||||
# DOKUMENTE (nicht-wiederkehrend)
|
||||
# ============================================
|
||||
|
||||
zeugnis:
|
||||
name: "Zeugnis"
|
||||
prioritaet: 20
|
||||
muster:
|
||||
text_match_any:
|
||||
- zeugnis
|
||||
- zertifikat
|
||||
- bescheinigung
|
||||
- certificate
|
||||
extraktion:
|
||||
typ:
|
||||
regex: '(Zeugnis|Zertifikat|Bescheinigung|Certificate)'
|
||||
aussteller:
|
||||
regex: '(?:ausgestellt von|issued by|Schule|Universität|Hochschule|Firma)[:\s]*([A-Za-zäöüÄÖÜß\s]+)'
|
||||
jahr:
|
||||
regex: '(20\d{2})'
|
||||
schema: "{typ} - {aussteller} - {jahr}.pdf"
|
||||
ziel_ordner: "/archiv/dokumente/zeugnisse"
|
||||
|
||||
vertrag:
|
||||
name: "Vertrag"
|
||||
prioritaet: 20
|
||||
muster:
|
||||
text_match_any:
|
||||
- vertrag
|
||||
- vereinbarung
|
||||
- contract
|
||||
- agreement
|
||||
extraktion:
|
||||
typ:
|
||||
wert: "Vertrag"
|
||||
partner:
|
||||
regex: '(?:zwischen|Vertragspartner|mit)[:\s]*([A-Za-zäöüÄÖÜß\s]+)'
|
||||
datum:
|
||||
regex: '(\d{2}[./]\d{2}[./]\d{4})'
|
||||
typ: datum
|
||||
format: "%d.%m.%Y"
|
||||
schema: "{typ} - {partner} - {datum}.pdf"
|
||||
ziel_ordner: "/archiv/dokumente/vertraege"
|
||||
|
||||
|
||||
# ============================================
|
||||
# REGEX REFERENZ
|
||||
# ============================================
|
||||
#
|
||||
# Häufige Muster:
|
||||
#
|
||||
# Datum (DD.MM.YYYY): (\d{2}[./]\d{2}[./]\d{4})
|
||||
# Datum (YYYY-MM-DD): (\d{4}-\d{2}-\d{2})
|
||||
# Betrag mit EUR: ([\d.,]+)\s*(?:EUR|€)
|
||||
# Rechnungsnummer: (?:Rechnung|Invoice)[:\s#]*([A-Z0-9-]+)
|
||||
# E-Mail: ([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})
|
||||
# IBAN: ([A-Z]{2}\d{2}[A-Z0-9]{4}\d{7}([A-Z0-9]?){0,16})
|
||||
#
|
||||
# Tipps:
|
||||
# - (?:...) = Nicht-einfangende Gruppe
|
||||
# - [:\s]* = Optional Doppelpunkt und/oder Leerzeichen
|
||||
# - \d+ = Eine oder mehr Ziffern
|
||||
# - [A-Z0-9-]+ = Buchstaben, Zahlen und Bindestriche
|
||||
Loading…
Reference in a new issue