* * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. */ /** * \file bankimport/class/bankstatement.class.php * \ingroup bankimport * \brief Class for PDF bank statements from FinTS */ require_once DOL_DOCUMENT_ROOT.'/core/class/commonobject.class.php'; /** * Class BankImportStatement * Represents a PDF bank statement imported via FinTS */ class BankImportStatement extends CommonObject { /** * @var string ID to identify managed object */ public $element = 'bankstatement'; /** * @var string Name of table without prefix where object is stored */ public $table_element = 'bankimport_statement'; /** * @var int Entity */ public $entity; /** * @var string IBAN */ public $iban; /** * @var string Statement number */ public $statement_number; /** * @var int Statement year */ public $statement_year; /** * @var int Statement date */ public $statement_date; /** * @var int Period from */ public $date_from; /** * @var int Period to */ public $date_to; /** * @var float Opening balance */ public $opening_balance; /** * @var float Closing balance */ public $closing_balance; /** * @var string Currency */ public $currency = 'EUR'; /** * @var string Filename */ public $filename; /** * @var string Filepath */ public $filepath; /** * @var int Filesize */ public $filesize; /** * @var string Import batch key */ public $import_key; /** * @var int Creation timestamp */ public $datec; /** * @var int User who created */ public $fk_user_creat; /** * @var string Private note */ public $note_private; /** * @var string Public note */ public $note_public; /** * Constructor * * @param DoliDB $db Database handler */ public function __construct($db) { global $conf; $this->db = $db; $this->entity = $conf->entity; } /** * Create statement in database * * @param User $user User that creates * @return int <0 if KO, Id of created object if OK */ public function create($user) { global $conf; $now = dol_now(); $this->db->begin(); $sql = "INSERT INTO ".MAIN_DB_PREFIX."bankimport_statement ("; $sql .= "entity, iban, statement_number, statement_year, statement_date,"; $sql .= "date_from, date_to, opening_balance, closing_balance, currency,"; $sql .= "filename, filepath, filesize, import_key, datec, fk_user_creat"; $sql .= ") VALUES ("; $sql .= ((int) $this->entity).","; $sql .= ($this->iban ? "'".$this->db->escape($this->iban)."'" : "NULL").","; $sql .= "'".$this->db->escape($this->statement_number)."',"; $sql .= ((int) $this->statement_year).","; $sql .= ($this->statement_date ? "'".$this->db->idate($this->statement_date)."'" : "NULL").","; $sql .= ($this->date_from ? "'".$this->db->idate($this->date_from)."'" : "NULL").","; $sql .= ($this->date_to ? "'".$this->db->idate($this->date_to)."'" : "NULL").","; $sql .= ($this->opening_balance !== null ? ((float) $this->opening_balance) : "NULL").","; $sql .= ($this->closing_balance !== null ? ((float) $this->closing_balance) : "NULL").","; $sql .= "'".$this->db->escape($this->currency)."',"; $sql .= ($this->filename ? "'".$this->db->escape($this->filename)."'" : "NULL").","; $sql .= ($this->filepath ? "'".$this->db->escape($this->filepath)."'" : "NULL").","; $sql .= ($this->filesize ? ((int) $this->filesize) : "NULL").","; $sql .= ($this->import_key ? "'".$this->db->escape($this->import_key)."'" : "NULL").","; $sql .= "'".$this->db->idate($now)."',"; $sql .= ((int) $user->id); $sql .= ")"; dol_syslog(get_class($this)."::create", LOG_DEBUG); $resql = $this->db->query($sql); if ($resql) { $this->id = $this->db->last_insert_id(MAIN_DB_PREFIX."bankimport_statement"); $this->datec = $now; $this->fk_user_creat = $user->id; $this->db->commit(); return $this->id; } else { $this->error = $this->db->lasterror(); $this->db->rollback(); return -1; } } /** * Load statement from database * * @param int $id Id of statement to load * @return int <0 if KO, 0 if not found, >0 if OK */ public function fetch($id) { $sql = "SELECT t.*"; $sql .= " FROM ".MAIN_DB_PREFIX."bankimport_statement as t"; $sql .= " WHERE t.rowid = ".((int) $id); dol_syslog(get_class($this)."::fetch", LOG_DEBUG); $resql = $this->db->query($sql); if ($resql) { if ($this->db->num_rows($resql)) { $obj = $this->db->fetch_object($resql); $this->id = $obj->rowid; $this->entity = $obj->entity; $this->iban = $obj->iban; $this->statement_number = $obj->statement_number; $this->statement_year = $obj->statement_year; $this->statement_date = $this->db->jdate($obj->statement_date); $this->date_from = $this->db->jdate($obj->date_from); $this->date_to = $this->db->jdate($obj->date_to); $this->opening_balance = $obj->opening_balance; $this->closing_balance = $obj->closing_balance; $this->currency = $obj->currency; $this->filename = $obj->filename; $this->filepath = $obj->filepath; $this->filesize = $obj->filesize; $this->import_key = $obj->import_key; $this->datec = $this->db->jdate($obj->datec); $this->fk_user_creat = $obj->fk_user_creat; $this->note_private = $obj->note_private; $this->note_public = $obj->note_public; $this->db->free($resql); return 1; } else { $this->db->free($resql); return 0; } } else { $this->error = $this->db->lasterror(); return -1; } } /** * Check if statement already exists (by number, year, iban) * * @return int 0 if not exists, rowid if exists */ public function exists() { $sql = "SELECT rowid FROM ".MAIN_DB_PREFIX."bankimport_statement"; $sql .= " WHERE statement_number = '".$this->db->escape($this->statement_number)."'"; $sql .= " AND statement_year = ".((int) $this->statement_year); $sql .= " AND iban = '".$this->db->escape($this->iban)."'"; $sql .= " AND entity = ".((int) $this->entity); $resql = $this->db->query($sql); if ($resql) { if ($this->db->num_rows($resql) > 0) { $obj = $this->db->fetch_object($resql); return $obj->rowid; } } return 0; } /** * Fetch all statements with filters * * @param string $sortfield Sort field * @param string $sortorder Sort order (ASC/DESC) * @param int $limit Limit * @param int $offset Offset * @param array $filter Filters array * @param string $mode 'list' returns array, 'count' returns count * @return array|int Array of statements, count or -1 on error */ public function fetchAll($sortfield = 'statement_year,statement_number', $sortorder = 'DESC', $limit = 0, $offset = 0, $filter = array(), $mode = 'list') { $sql = "SELECT t.rowid"; $sql .= " FROM ".MAIN_DB_PREFIX."bankimport_statement as t"; $sql .= " WHERE t.entity = ".((int) $this->entity); // Apply filters if (!empty($filter['iban'])) { $sql .= " AND t.iban LIKE '%".$this->db->escape($filter['iban'])."%'"; } if (!empty($filter['year'])) { $sql .= " AND t.statement_year = ".((int) $filter['year']); } // Count mode if ($mode == 'count') { $sqlcount = preg_replace('/SELECT t\.rowid/', 'SELECT COUNT(*) as total', $sql); $resqlcount = $this->db->query($sqlcount); if ($resqlcount) { $objcount = $this->db->fetch_object($resqlcount); return (int) $objcount->total; } return 0; } // Sort and limit $sql .= $this->db->order($sortfield, $sortorder); if ($limit > 0) { $sql .= $this->db->plimit($limit, $offset); } dol_syslog(get_class($this)."::fetchAll", LOG_DEBUG); $resql = $this->db->query($sql); if ($resql) { $result = array(); while ($obj = $this->db->fetch_object($resql)) { $statement = new BankImportStatement($this->db); $statement->fetch($obj->rowid); $result[] = $statement; } $this->db->free($resql); return $result; } else { $this->error = $this->db->lasterror(); return -1; } } /** * Delete statement * * @param User $user User that deletes * @return int <0 if KO, >0 if OK */ public function delete($user) { $this->db->begin(); // Delete file if exists if ($this->filepath && file_exists($this->filepath)) { @unlink($this->filepath); } $sql = "DELETE FROM ".MAIN_DB_PREFIX."bankimport_statement"; $sql .= " WHERE rowid = ".((int) $this->id); dol_syslog(get_class($this)."::delete", LOG_DEBUG); $resql = $this->db->query($sql); if ($resql) { $this->db->commit(); return 1; } else { $this->error = $this->db->lasterror(); $this->db->rollback(); return -1; } } /** * Get full path to PDF file * * @return string Full path or empty string */ public function getFilePath() { if ($this->filepath && file_exists($this->filepath)) { return $this->filepath; } return ''; } /** * Get storage directory for statements * * @return string Directory path */ public static function getStorageDir() { global $conf; $dir = $conf->bankimport->dir_output.'/statements'; if (!is_dir($dir)) { dol_mkdir($dir); } return $dir; } /** * Save PDF content to file * * @param string $pdfContent Binary PDF content * @return int <0 if KO, >0 if OK */ public function savePDF($pdfContent) { $dir = self::getStorageDir(); // Generate filename $this->filename = sprintf('statement_%s_%d_%s.pdf', preg_replace('/[^A-Z0-9]/', '', $this->iban), $this->statement_year, $this->statement_number ); $this->filepath = $dir.'/'.$this->filename; // Write file $result = file_put_contents($this->filepath, $pdfContent); if ($result !== false) { $this->filesize = strlen($pdfContent); return 1; } $this->error = 'Failed to write PDF file'; return -1; } /** * Save uploaded PDF file * * @param array $fileInfo Element from $_FILES array * @return int <0 if KO, >0 if OK */ public function saveUploadedPDF($fileInfo) { // Validate upload if (empty($fileInfo['tmp_name']) || !is_uploaded_file($fileInfo['tmp_name'])) { $this->error = 'No file uploaded'; return -1; } // Check file size (max 10MB) if ($fileInfo['size'] > 10 * 1024 * 1024) { $this->error = 'File too large (max 10MB)'; return -1; } // Check MIME type $finfo = finfo_open(FILEINFO_MIME_TYPE); $mimeType = finfo_file($finfo, $fileInfo['tmp_name']); finfo_close($finfo); if ($mimeType !== 'application/pdf') { $this->error = 'Only PDF files are allowed'; return -1; } $dir = self::getStorageDir(); // Generate filename $ibanPart = !empty($this->iban) ? preg_replace('/[^A-Z0-9]/', '', strtoupper($this->iban)) : 'KONTO'; $this->filename = sprintf('Kontoauszug_%s_%d_%s.pdf', $ibanPart, $this->statement_year, str_pad($this->statement_number, 3, '0', STR_PAD_LEFT) ); $this->filepath = $dir.'/'.$this->filename; // Check if file already exists if (file_exists($this->filepath)) { // Add timestamp to make unique $this->filename = sprintf('Kontoauszug_%s_%d_%s_%s.pdf', $ibanPart, $this->statement_year, str_pad($this->statement_number, 3, '0', STR_PAD_LEFT), date('His') ); $this->filepath = $dir.'/'.$this->filename; } // Move uploaded file if (!move_uploaded_file($fileInfo['tmp_name'], $this->filepath)) { $this->error = 'Failed to save file'; return -1; } $this->filesize = filesize($this->filepath); return 1; } /** * Parse PDF bank statement metadata using pdfinfo and pdftotext * * Extracts: statement number, year, IBAN, date range, opening/closing balance, * account number, bank name, statement date. * * @param string $filepath Path to PDF file * @return array|false Array with extracted data or false on failure */ public static function parsePdfMetadata($filepath) { if (!file_exists($filepath)) { return false; } $result = array( 'statement_number' => '', 'statement_year' => 0, 'pdf_number' => '', // Original statement number from PDF (e.g. "1" from Nr. 1/2025) 'pdf_year' => 0, // Original year from PDF 'iban' => '', 'date_from' => null, 'date_to' => null, 'opening_balance' => null, 'closing_balance' => null, 'statement_date' => null, 'account_number' => '', 'bank_name' => '', 'author' => '', ); $escapedPath = escapeshellarg($filepath); // 1. Extract metadata via pdfinfo $pdfinfo = array(); exec("pdfinfo ".$escapedPath." 2>/dev/null", $pdfinfo); foreach ($pdfinfo as $line) { if (preg_match('/^Title:\s+(.+)$/', $line, $m)) { // Title format: "000000000000000000000013438147 001/2025" or "Kontoauszug 13438147" if (preg_match('/(\d+)\s+(\d+)\/(\d{4})/', $m[1], $tm)) { $result['account_number'] = ltrim($tm[1], '0'); $result['pdf_number'] = (string) intval($tm[2]); $result['pdf_year'] = (int) $tm[3]; } } if (preg_match('/^Author:\s+(.+)$/', $line, $m)) { $result['author'] = trim($m[1]); } } // 2. Extract text via pdftotext $text = ''; exec("pdftotext -layout ".$escapedPath." - 2>/dev/null", $textlines); $text = implode("\n", $textlines); // Statement number from text (fallback if not in metadata) if (empty($result['pdf_number']) && preg_match('/Nr\.\s+(\d+)\/(\d{4})/', $text, $m)) { $result['pdf_number'] = (string) intval($m[1]); $result['pdf_year'] = (int) $m[2]; } // IBAN if (preg_match('/IBAN:\s*([A-Z]{2}\d{2}\s*[\d\s]+)/', $text, $m)) { $result['iban'] = preg_replace('/\s+/', ' ', trim($m[1])); } // Account number (fallback) if (empty($result['account_number']) && preg_match('/Kontonummer\s+(\d+)/', $text, $m)) { $result['account_number'] = $m[1]; } // Date range from Kontoabschluss if (preg_match('/Kontoabschluss vom (\d{2}\.\d{2}\.\d{4}) bis (\d{2}\.\d{2}\.\d{4})/', $text, $m)) { $dateFrom = DateTime::createFromFormat('d.m.Y', $m[1]); $dateTo = DateTime::createFromFormat('d.m.Y', $m[2]); if ($dateFrom) { $result['date_from'] = $dateFrom->setTime(0, 0, 0)->getTimestamp(); } if ($dateTo) { $result['date_to'] = $dateTo->setTime(0, 0, 0)->getTimestamp(); } } // Statement date (erstellt am) if (preg_match('/erstellt am\s+(\d{2}\.\d{2}\.\d{4})/', $text, $m)) { $stmtDate = DateTime::createFromFormat('d.m.Y', $m[1]); if ($stmtDate) { $result['statement_date'] = $stmtDate->setTime(0, 0, 0)->getTimestamp(); } } // Opening balance: "alter Kontostand [vom DD.MM.YYYY] X.XXX,XX H/S" if (preg_match('/alter Kontostand(?:\s+vom\s+\d{2}\.\d{2}\.\d{4})?\s+([\d.,]+)\s+(H|S)/', $text, $m)) { $amount = self::parseGermanAmount($m[1]); if ($m[2] === 'S') { $amount = -$amount; } $result['opening_balance'] = $amount; } // Closing balance: "neuer Kontostand vom DD.MM.YYYY X.XXX,XX H/S" if (preg_match('/neuer Kontostand(?:\s+vom\s+\d{2}\.\d{2}\.\d{4})?\s+([\d.,]+)\s+(H|S)/', $text, $m)) { $amount = self::parseGermanAmount($m[1]); if ($m[2] === 'S') { $amount = -$amount; } $result['closing_balance'] = $amount; } // Bank name (first line that contains "Bank" or known patterns) if (preg_match('/(?:VR\s*B\s*ank|Volksbank|Raiffeisenbank|Sparkasse)[^\n]*/i', $text, $m)) { $bankName = trim($m[0]); // Fix OCR artifacts: single chars separated by spaces ("V R B a n k" → "VRBank") // Strategy: collapse all single-space gaps between word chars that look like OCR splitting $bankName = preg_replace('/\b(\w) (\w) (\w) (\w)\b/', '$1$2$3$4', $bankName); $bankName = preg_replace('/\b(\w) (\w) (\w)\b/', '$1$2$3', $bankName); $bankName = preg_replace('/\b(\w) (\w)\b/', '$1$2', $bankName); // Fix common OCR pattern "VR B ank" → "VR Bank", "S chleswig" → "Schleswig" $bankName = preg_replace('/\bB ank\b/', 'Bank', $bankName); $bankName = preg_replace('/\bS (\w)/', 'S$1', $bankName); $bankName = preg_replace('/\bW (\w)/', 'W$1', $bankName); // Clean up multiple spaces and trim address parts after comma $bankName = preg_replace('/\s{2,}/', ' ', $bankName); $bankName = preg_replace('/,.*$/', '', $bankName); $result['bank_name'] = trim($bankName); } // Derive statement_number (=month) and statement_year from end date of period if ($result['date_to']) { $result['statement_number'] = (string) intval(date('m', $result['date_to'])); $result['statement_year'] = (int) date('Y', $result['date_to']); } elseif ($result['date_from']) { $result['statement_number'] = (string) intval(date('m', $result['date_from'])); $result['statement_year'] = (int) date('Y', $result['date_from']); } elseif (!empty($result['pdf_year'])) { // Fallback to PDF metadata if no date range $result['statement_number'] = $result['pdf_number']; $result['statement_year'] = $result['pdf_year']; } // Fallback: extract data from filename if PDF tools returned nothing // Supports patterns like: 13438147_2025_Nr.001_Kontoauszug_vom_2025.07.01_timestamp.pdf if (empty($result['statement_number']) && empty($result['iban'])) { $basename = basename($filepath); if (preg_match('/(\d+)_(\d{4})_Nr\.?(\d+)/', $basename, $fm)) { $result['account_number'] = ltrim($fm[1], '0'); $result['pdf_number'] = (string) intval($fm[3]); $result['pdf_year'] = (int) $fm[2]; $result['statement_number'] = $result['pdf_number']; $result['statement_year'] = $result['pdf_year']; } if (preg_match('/vom[_\s](\d{4})\.(\d{2})\.(\d{2})/', $basename, $dm)) { $stmtDate = DateTime::createFromFormat('Y-m-d', $dm[1].'-'.$dm[2].'-'.$dm[3]); if ($stmtDate) { $result['statement_date'] = $stmtDate->setTime(0, 0, 0)->getTimestamp(); if (empty($result['statement_number'])) { $result['statement_number'] = (string) intval($dm[2]); $result['statement_year'] = (int) $dm[1]; } } } } // Validate: at least statement number or IBAN must be present if (empty($result['statement_number']) && empty($result['iban'])) { return false; } return $result; } /** * Parse a German formatted amount (e.g., "3.681,45" → 3681.45) * * @param string $amount German formatted amount string * @return float Parsed amount */ private static function parseGermanAmount($amount) { $amount = str_replace('.', '', $amount); // Remove thousands separator $amount = str_replace(',', '.', $amount); // Convert decimal separator return (float) $amount; } /** * Generate a clean filename for a PDF statement * * @param array $parsed Parsed metadata from parsePdfMetadata() * @return string Generated filename */ public static function generateFilename($parsed) { $bank = 'Bank'; if (!empty($parsed['bank_name'])) { // Shorten bank name - take first meaningful words $bank = preg_replace('/\s+(eG|AG|e\.G\.).*$/', '', $parsed['bank_name']); $bank = preg_replace('/[^a-zA-Z0-9äöüÄÖÜß-]/', '_', $bank); $bank = preg_replace('/_+/', '_', $bank); $bank = trim($bank, '_'); } $account = !empty($parsed['account_number']) ? $parsed['account_number'] : 'Konto'; $year = !empty($parsed['statement_year']) ? $parsed['statement_year'] : date('Y'); $nr = !empty($parsed['statement_number']) ? str_pad($parsed['statement_number'], 3, '0', STR_PAD_LEFT) : '000'; return sprintf('%s_%s_%d_%s.pdf', $bank, $account, $year, $nr); } /** * Get next available statement number for a year * * @param int $year Year * @return string Next statement number */ public function getNextStatementNumber($year) { $sql = "SELECT MAX(CAST(statement_number AS UNSIGNED)) as maxnum"; $sql .= " FROM ".MAIN_DB_PREFIX."bankimport_statement"; $sql .= " WHERE statement_year = ".((int) $year); $sql .= " AND entity = ".((int) $this->entity); $resql = $this->db->query($sql); if ($resql) { $obj = $this->db->fetch_object($resql); $nextNum = ($obj->maxnum !== null) ? ((int) $obj->maxnum + 1) : 1; return (string) $nextNum; } return '1'; } /** * Get the end date (date_to) of the most recent statement * * @return int|null Timestamp of latest date_to, or null if none */ public function getLatestStatementEndDate() { $sql = "SELECT MAX(date_to) as last_date"; $sql .= " FROM ".MAIN_DB_PREFIX."bankimport_statement"; $sql .= " WHERE entity = ".((int) $this->entity); $resql = $this->db->query($sql); if ($resql) { $obj = $this->db->fetch_object($resql); if ($obj->last_date) { return $this->db->jdate($obj->last_date); } } return null; } /** * Get list of years that have stored statements * * @return array Array of years (descending) */ public function getAvailableYears() { $sql = "SELECT DISTINCT statement_year"; $sql .= " FROM ".MAIN_DB_PREFIX."bankimport_statement"; $sql .= " WHERE entity = ".((int) $this->entity); $sql .= " ORDER BY statement_year DESC"; $result = array(); $resql = $this->db->query($sql); if ($resql) { while ($obj = $this->db->fetch_object($resql)) { $result[(int) $obj->statement_year] = (string) $obj->statement_year; } $this->db->free($resql); } return $result; } /** * Link transactions to this statement based on date range and IBAN * * Updates all transactions that fall within the statement's date range * and match the IBAN, setting their fk_statement to this statement's ID. * * @return int Number of linked transactions, or -1 on error */ public function linkTransactions() { if (empty($this->id) || empty($this->date_from) || empty($this->date_to)) { return 0; } $sql = "UPDATE ".MAIN_DB_PREFIX."bankimport_transaction SET"; $sql .= " fk_statement = ".((int) $this->id); $sql .= " WHERE entity = ".((int) $this->entity); $sql .= " AND date_trans >= '".$this->db->idate($this->date_from)."'"; $sql .= " AND date_trans <= '".$this->db->idate($this->date_to)."'"; $sql .= " AND fk_statement IS NULL"; // Don't overwrite existing links // Match by IBAN if available if (!empty($this->iban)) { $ibanClean = preg_replace('/\s+/', '', $this->iban); $sql .= " AND REPLACE(iban, ' ', '') = '".$this->db->escape($ibanClean)."'"; } dol_syslog(get_class($this)."::linkTransactions", LOG_DEBUG); $resql = $this->db->query($sql); if ($resql) { return $this->db->affected_rows($resql); } else { $this->error = $this->db->lasterror(); return -1; } } }