"""Import-Service: Videos erkennen, TVDB-Match, umbenennen, einsortieren""" import asyncio import json import logging import os import re import shutil from pathlib import Path from typing import Optional import aiomysql from app.config import Config from app.services.library import ( LibraryService, VIDEO_EXTENSIONS, RE_SXXEXX, RE_XXxXX ) from app.services.tvdb import TVDBService from app.services.probe import ProbeService # Serienname aus Dateiname extrahieren (alles vor SxxExx) RE_SERIES_FROM_NAME = re.compile( r'^(.+?)[\s._-]+[Ss]\d{1,2}[Ee]\d{1,3}', re.IGNORECASE ) RE_SERIES_FROM_XXx = re.compile( r'^(.+?)[\s._-]+\d{1,2}x\d{2,3}', re.IGNORECASE ) class ImporterService: """Video-Import: Erkennung, TVDB-Matching, Umbenennung, Kopieren/Verschieben""" def __init__(self, config: Config, library_service: LibraryService, tvdb_service: TVDBService): self.config = config self.library = library_service self.tvdb = tvdb_service self._db_pool: Optional[aiomysql.Pool] = None def set_db_pool(self, pool: aiomysql.Pool) -> None: self._db_pool = pool @property def _naming_pattern(self) -> str: return self.config.settings.get("library", {}).get( "import_naming_pattern", "{series} - S{season:02d}E{episode:02d} - {title}.{ext}" ) @property def _season_pattern(self) -> str: return self.config.settings.get("library", {}).get( "import_season_pattern", "Season {season:02d}" ) # === DB-Tabellen erstellen === async def init_db(self) -> None: """Import-Tabellen erstellen""" if not self._db_pool: return try: async with self._db_pool.acquire() as conn: async with conn.cursor() as cur: await cur.execute(""" CREATE TABLE IF NOT EXISTS import_jobs ( id INT AUTO_INCREMENT PRIMARY KEY, source_path VARCHAR(1024) NOT NULL, target_library_id INT NOT NULL, status ENUM('pending','analyzing','ready', 'importing','done','error') DEFAULT 'pending', mode ENUM('copy','move') DEFAULT 'copy', naming_pattern VARCHAR(256), season_pattern VARCHAR(256), total_files INT DEFAULT 0, processed_files INT DEFAULT 0, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (target_library_id) REFERENCES library_paths(id) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 """) await cur.execute(""" CREATE TABLE IF NOT EXISTS import_items ( id INT AUTO_INCREMENT PRIMARY KEY, import_job_id INT NOT NULL, source_file VARCHAR(1024) NOT NULL, source_size BIGINT NOT NULL DEFAULT 0, source_duration DOUBLE NULL, detected_series VARCHAR(256), detected_season INT, detected_episode INT, tvdb_series_id INT NULL, tvdb_series_name VARCHAR(256), tvdb_episode_title VARCHAR(512), target_path VARCHAR(1024), target_filename VARCHAR(512), status ENUM('pending','matched','conflict', 'skipped','done','error') DEFAULT 'pending', conflict_reason VARCHAR(512) NULL, existing_file_path VARCHAR(1024) NULL, existing_file_size BIGINT NULL, user_action ENUM('overwrite','skip','rename') NULL, FOREIGN KEY (import_job_id) REFERENCES import_jobs(id) ON DELETE CASCADE, INDEX idx_job (import_job_id) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 """) logging.info("Import-Tabellen initialisiert") except Exception as e: logging.error(f"Import-Tabellen erstellen fehlgeschlagen: {e}") # === Job-Verwaltung === async def create_job(self, source_path: str, target_library_id: int, mode: str = 'copy') -> Optional[int]: """Erstellt einen Import-Job und sucht Video-Dateien im Quellordner""" if not self._db_pool: return None if not os.path.isdir(source_path): return None if mode not in ('copy', 'move'): mode = 'copy' # Video-Dateien im Quellordner finden videos = [] for root, dirs, files in os.walk(source_path): dirs[:] = [d for d in dirs if not d.startswith(".")] for f in sorted(files): ext = os.path.splitext(f)[1].lower() if ext in VIDEO_EXTENSIONS: videos.append(os.path.join(root, f)) if not videos: return None try: async with self._db_pool.acquire() as conn: async with conn.cursor() as cur: await cur.execute( "INSERT INTO import_jobs " "(source_path, target_library_id, status, mode, " "naming_pattern, season_pattern, total_files) " "VALUES (%s, %s, 'pending', %s, %s, %s, %s)", (source_path, target_library_id, mode, self._naming_pattern, self._season_pattern, len(videos)) ) job_id = cur.lastrowid # Items einfuegen for vf in videos: try: size = os.path.getsize(vf) except OSError: size = 0 await cur.execute( "INSERT INTO import_items " "(import_job_id, source_file, source_size) " "VALUES (%s, %s, %s)", (job_id, vf, size) ) logging.info( f"Import-Job erstellt: {job_id} " f"({len(videos)} Videos aus {source_path})" ) return job_id except Exception as e: logging.error(f"Import-Job erstellen fehlgeschlagen: {e}") return None async def analyze_job(self, job_id: int) -> dict: """Analysiert alle Dateien: Erkennung + TVDB-Lookup + Konflikt-Check""" if not self._db_pool: return {"error": "Keine DB-Verbindung"} try: async with self._db_pool.acquire() as conn: async with conn.cursor(aiomysql.DictCursor) as cur: # Job laden await cur.execute( "SELECT * FROM import_jobs WHERE id = %s", (job_id,) ) job = await cur.fetchone() if not job: return {"error": "Job nicht gefunden"} # Ziel-Library laden await cur.execute( "SELECT * FROM library_paths WHERE id = %s", (job["target_library_id"],) ) lib_path = await cur.fetchone() if not lib_path: return {"error": "Ziel-Library nicht gefunden"} # Status auf analyzing await cur.execute( "UPDATE import_jobs SET status = 'analyzing' " "WHERE id = %s", (job_id,) ) # Items laden await cur.execute( "SELECT * FROM import_items " "WHERE import_job_id = %s ORDER BY source_file", (job_id,) ) items = await cur.fetchall() # Jedes Item analysieren tvdb_cache = {} # Serienname -> TVDB-Info for item in items: await self._analyze_item( item, lib_path, job, tvdb_cache ) # Status auf ready async with self._db_pool.acquire() as conn: async with conn.cursor() as cur: await cur.execute( "UPDATE import_jobs SET status = 'ready' " "WHERE id = %s", (job_id,) ) return await self.get_job_status(job_id) except Exception as e: logging.error(f"Import-Analyse fehlgeschlagen: {e}") return {"error": str(e)} # Mindestgroesse fuer "echte" Episoden (darunter = Sample/Trailer) MIN_EPISODE_SIZE = 100 * 1024 * 1024 # 100 MiB async def _analyze_item(self, item: dict, lib_path: dict, job: dict, tvdb_cache: dict) -> None: """Einzelnes Item analysieren: Erkennung + TVDB + Konflikt""" filename = os.path.basename(item["source_file"]) ext = os.path.splitext(filename)[1].lstrip(".") # 0. Groessen-Check: Zu kleine Dateien als Sample/Trailer markieren if item["source_size"] < self.MIN_EPISODE_SIZE: try: async with self._db_pool.acquire() as conn: async with conn.cursor() as cur: await cur.execute(""" UPDATE import_items SET status = 'skipped', conflict_reason = %s WHERE id = %s """, ( f"Vermutlich Sample/Trailer " f"({self._fmt_size(item['source_size'])})", item["id"], )) except Exception: pass return # 1. Serie/Staffel/Episode erkennen (Dateiname + Ordnername) info = self._detect_series_info(item["source_file"]) series_name = info.get("series", "") season = info.get("season") episode = info.get("episode") # 2. Dauer per ffprobe (fuer Konflikt-Check) duration = None try: media = await ProbeService.analyze(item["source_file"]) if media: duration = media.source_duration_sec except Exception: pass # 3. TVDB-Lookup (gecacht pro Serienname) tvdb_id = None tvdb_name = series_name tvdb_ep_title = "" if series_name and self.tvdb.is_configured: if series_name.lower() not in tvdb_cache: results = await self.tvdb.search_series(series_name) if results: tvdb_cache[series_name.lower()] = results[0] else: tvdb_cache[series_name.lower()] = None cached = tvdb_cache.get(series_name.lower()) if cached: tvdb_id = cached.get("tvdb_id") tvdb_name = cached.get("name", series_name) # Episodentitel aus TVDB if tvdb_id and season and episode: tvdb_ep_title = await self._get_episode_title( int(tvdb_id), season, episode ) # 4. Ziel-Pfad berechnen pattern = job.get("naming_pattern") or self._naming_pattern season_pattern = job.get("season_pattern") or self._season_pattern target_dir, target_file = self._build_target( tvdb_name or series_name or "Unbekannt", season, episode, tvdb_ep_title or "", ext, lib_path["path"], pattern, season_pattern ) target_path = os.path.join(target_dir, target_file) # 5. Konflikt-Check status = "matched" if series_name and season and episode else "pending" conflict = None existing_path = None existing_size = None if os.path.exists(target_path): existing_path = target_path existing_size = os.path.getsize(target_path) source_size = item["source_size"] # Groessen-Vergleich if source_size and existing_size: diff_pct = abs(source_size - existing_size) / max( existing_size, 1 ) * 100 if diff_pct > 20: conflict = ( f"Datei existiert bereits " f"(Quelle: {self._fmt_size(source_size)}, " f"Ziel: {self._fmt_size(existing_size)}, " f"Abweichung: {diff_pct:.0f}%)" ) else: conflict = "Datei existiert bereits (aehnliche Groesse)" else: conflict = "Datei existiert bereits" status = "conflict" # 6. In DB aktualisieren try: async with self._db_pool.acquire() as conn: async with conn.cursor() as cur: await cur.execute(""" UPDATE import_items SET source_duration = %s, detected_series = %s, detected_season = %s, detected_episode = %s, tvdb_series_id = %s, tvdb_series_name = %s, tvdb_episode_title = %s, target_path = %s, target_filename = %s, status = %s, conflict_reason = %s, existing_file_path = %s, existing_file_size = %s WHERE id = %s """, ( duration, series_name, season, episode, tvdb_id, tvdb_name, tvdb_ep_title, target_dir, target_file, status, conflict, existing_path, existing_size, item["id"], )) except Exception as e: logging.error(f"Import-Item analysieren fehlgeschlagen: {e}") def _detect_series_info(self, file_path: str) -> dict: """Extrahiert Serienname, Staffel, Episode. Versucht zuerst den Dateinamen, dann den uebergeordneten Ordnernamen als Fallback. Der Ordnername ist oft zuverlaessiger bei Release-Gruppen-Prefixes (z.B. 'tlr-24.s07e01.mkv' vs Ordner '24.S07E01.German.DL.1080p.Bluray.x264-TLR'). """ filename = os.path.basename(file_path) parent_dir = os.path.basename(os.path.dirname(file_path)) # Beide Quellen versuchen info_file = self._parse_name(filename) info_dir = self._parse_name(parent_dir) # Strategie: Ordnername bevorzugen bei Scene-Releases. # Scene-Ordner: "24.S07E01.German.DL.1080p-TLR" -> Serie="24" # Scene-Datei: "tlr-24.s07e01.1080p.mkv" -> Serie="tlr-24" # Ordnername hat Serienname vorne, Dateiname oft Release-Tag vorne # Ordnername hat S/E -> bevorzugen (hat meist korrekten Seriennamen) if info_dir.get("season") and info_dir.get("episode"): if info_dir.get("series"): return info_dir # Ordner hat S/E aber keinen Namen -> Dateiname nehmen if info_file.get("series"): info_dir["series"] = info_file["series"] return info_dir # Dateiname hat S/E if info_file.get("season") and info_file.get("episode"): # Ordner-Serienname als Fallback wenn Datei keinen hat if not info_file.get("series") and info_dir.get("series"): info_file["series"] = info_dir["series"] return info_file return info_file def _parse_name(self, name: str) -> dict: """Extrahiert Serienname, Staffel, Episode aus einem Namen""" result = {"series": "", "season": None, "episode": None} name_no_ext = os.path.splitext(name)[0] # S01E02 Format m = RE_SXXEXX.search(name) if m: result["season"] = int(m.group(1)) result["episode"] = int(m.group(2)) sm = RE_SERIES_FROM_NAME.match(name_no_ext) if sm: result["series"] = self._clean_name(sm.group(1)) return result # 1x02 Format m = RE_XXxXX.search(name) if m: result["season"] = int(m.group(1)) result["episode"] = int(m.group(2)) sm = RE_SERIES_FROM_XXx.match(name_no_ext) if sm: result["series"] = self._clean_name(sm.group(1)) return result return result @staticmethod def _clean_name(name: str) -> str: """Bereinigt Seriennamen: Punkte/Underscores durch Leerzeichen""" name = name.replace(".", " ").replace("_", " ") # Mehrfach-Leerzeichen reduzieren name = re.sub(r'\s+', ' ', name).strip() # Trailing Bindestriche entfernen name = name.rstrip(" -") return name def _build_target(self, series: str, season: Optional[int], episode: Optional[int], title: str, ext: str, lib_path: str, pattern: str, season_pattern: str) -> tuple[str, str]: """Baut Ziel-Ordner und Dateiname nach Pattern""" s = season or 1 e = episode or 0 # Season-Ordner season_dir = season_pattern.format(season=s) # Dateiname try: filename = pattern.format( series=series, season=s, episode=e, title=title or "Unbekannt", ext=ext ) except (KeyError, ValueError): filename = f"{series} - S{s:02d}E{e:02d} - {title or 'Unbekannt'}.{ext}" # Ungueltige Zeichen entfernen for ch in ['<', '>', ':', '"', '|', '?', '*']: filename = filename.replace(ch, '') series = series.replace(ch, '') target_dir = os.path.join(lib_path, series, season_dir) return target_dir, filename async def _get_episode_title(self, tvdb_id: int, season: int, episode: int) -> str: """Episodentitel aus TVDB-Cache oder API holen""" if not self._db_pool: return "" try: async with self._db_pool.acquire() as conn: async with conn.cursor() as cur: # Zuerst Cache pruefen await cur.execute( "SELECT episode_name FROM tvdb_episode_cache " "WHERE series_tvdb_id = %s " "AND season_number = %s " "AND episode_number = %s", (tvdb_id, season, episode) ) row = await cur.fetchone() if row and row[0]: return row[0] except Exception: pass # Cache leer -> Episoden von TVDB laden episodes = await self.tvdb.fetch_episodes(tvdb_id) for ep in episodes: if ep["season_number"] == season and ep["episode_number"] == episode: return ep.get("episode_name", "") return "" async def execute_import(self, job_id: int) -> dict: """Fuehrt den Import aus (Kopieren/Verschieben)""" if not self._db_pool: return {"error": "Keine DB-Verbindung"} try: async with self._db_pool.acquire() as conn: async with conn.cursor(aiomysql.DictCursor) as cur: await cur.execute( "SELECT * FROM import_jobs WHERE id = %s", (job_id,) ) job = await cur.fetchone() if not job: return {"error": "Job nicht gefunden"} await cur.execute( "UPDATE import_jobs SET status = 'importing' " "WHERE id = %s", (job_id,) ) # Nur Items mit status matched oder conflict+overwrite await cur.execute( "SELECT * FROM import_items " "WHERE import_job_id = %s " "AND (status = 'matched' " " OR (status = 'conflict' " " AND user_action = 'overwrite'))", (job_id,) ) items = await cur.fetchall() done = 0 errors = 0 mode = job.get("mode", "copy") for item in items: ok = await self._process_item(item, mode) if ok: done += 1 else: errors += 1 # Progress updaten async with self._db_pool.acquire() as conn: async with conn.cursor() as cur: await cur.execute( "UPDATE import_jobs SET processed_files = %s " "WHERE id = %s", (done + errors, job_id) ) # Job abschliessen status = "done" if errors == 0 else "error" async with self._db_pool.acquire() as conn: async with conn.cursor() as cur: await cur.execute( "UPDATE import_jobs SET status = %s " "WHERE id = %s", (status, job_id) ) return {"done": done, "errors": errors} except Exception as e: logging.error(f"Import ausfuehren fehlgeschlagen: {e}") return {"error": str(e)} async def _process_item(self, item: dict, mode: str) -> bool: """Einzelnes Item importieren (kopieren/verschieben)""" src = item["source_file"] target_dir = item["target_path"] target_file = item["target_filename"] if not target_dir or not target_file: await self._update_item_status(item["id"], "error") return False target = os.path.join(target_dir, target_file) try: # Zielordner erstellen os.makedirs(target_dir, exist_ok=True) if mode == "move": shutil.move(src, target) else: shutil.copy2(src, target) logging.info( f"Import: {os.path.basename(src)} -> {target}" ) await self._update_item_status(item["id"], "done") return True except Exception as e: logging.error(f"Import fehlgeschlagen: {src}: {e}") await self._update_item_status(item["id"], "error") return False async def _update_item_status(self, item_id: int, status: str) -> None: if not self._db_pool: return try: async with self._db_pool.acquire() as conn: async with conn.cursor() as cur: await cur.execute( "UPDATE import_items SET status = %s " "WHERE id = %s", (status, item_id) ) except Exception: pass async def resolve_conflict(self, item_id: int, action: str) -> bool: """Konflikt loesen: overwrite, skip, rename""" if not self._db_pool or action not in ('overwrite', 'skip', 'rename'): return False try: async with self._db_pool.acquire() as conn: async with conn.cursor() as cur: if action == 'skip': await cur.execute( "UPDATE import_items SET status = 'skipped', " "user_action = 'skip' WHERE id = %s", (item_id,) ) elif action == 'rename': # Dateiname mit Suffix versehen await cur.execute( "SELECT target_filename FROM import_items " "WHERE id = %s", (item_id,) ) row = await cur.fetchone() if row and row[0]: name, ext = os.path.splitext(row[0]) new_name = f"{name}_neu{ext}" await cur.execute( "UPDATE import_items SET " "target_filename = %s, " "status = 'matched', " "user_action = 'rename' " "WHERE id = %s", (new_name, item_id) ) else: # overwrite await cur.execute( "UPDATE import_items SET user_action = 'overwrite' " "WHERE id = %s", (item_id,) ) return True except Exception as e: logging.error(f"Konflikt loesen fehlgeschlagen: {e}") return False async def update_item(self, item_id: int, **kwargs) -> bool: """Manuelle Korrektur eines Items""" if not self._db_pool: return False allowed = { 'detected_series', 'detected_season', 'detected_episode', 'tvdb_series_id', 'tvdb_series_name', 'tvdb_episode_title', 'target_path', 'target_filename', 'status' } updates = [] params = [] for k, v in kwargs.items(): if k in allowed: updates.append(f"{k} = %s") params.append(v) if not updates: return False params.append(item_id) try: async with self._db_pool.acquire() as conn: async with conn.cursor() as cur: await cur.execute( f"UPDATE import_items SET {', '.join(updates)} " f"WHERE id = %s", params ) return True except Exception as e: logging.error(f"Import-Item aktualisieren fehlgeschlagen: {e}") return False async def get_job_status(self, job_id: int) -> dict: """Status eines Import-Jobs mit allen Items""" if not self._db_pool: return {"error": "Keine DB-Verbindung"} try: async with self._db_pool.acquire() as conn: async with conn.cursor(aiomysql.DictCursor) as cur: await cur.execute( "SELECT * FROM import_jobs WHERE id = %s", (job_id,) ) job = await cur.fetchone() if not job: return {"error": "Job nicht gefunden"} await cur.execute( "SELECT * FROM import_items " "WHERE import_job_id = %s ORDER BY source_file", (job_id,) ) items = await cur.fetchall() return { "job": self._serialize(job), "items": [self._serialize(i) for i in items], } except Exception as e: return {"error": str(e)} @staticmethod def _serialize(row: dict) -> dict: """Dict JSON-kompatibel machen""" result = {} for k, v in row.items(): if hasattr(v, "isoformat"): result[k] = str(v) else: result[k] = v return result @staticmethod def _fmt_size(b: int) -> str: """Bytes menschenlesbar""" for u in ("B", "KiB", "MiB", "GiB"): if b < 1024: return f"{b:.1f} {u}" b /= 1024 return f"{b:.1f} TiB"