importzugferd/class/zugferdparser.class.php

645 lines
27 KiB
PHP

<?php
/* Copyright (C) 2026 ZUGFeRD Import Module
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*/
/**
* \file class/zugferdparser.class.php
* \ingroup importzugferd
* \brief Parser for ZUGFeRD/Factur-X XML invoices
*/
/**
* Class ZugferdParser
* Parses ZUGFeRD XML from PDF attachments
*/
class ZugferdParser
{
/**
* @var DoliDB Database handler
*/
public $db;
/**
* @var string Error message
*/
public $error = '';
/**
* @var array Error messages
*/
public $errors = array();
/**
* @var string XML content
*/
public $xml_content = '';
/**
* @var SimpleXMLElement Parsed XML
*/
public $xml;
/**
* @var array Parsed invoice data
*/
public $invoice_data = array();
/**
* @var array Namespace prefixes
*/
private $namespaces = array();
/**
* Constructor
*
* @param DoliDB $db Database handler
*/
public function __construct($db)
{
$this->db = $db;
}
/**
* Extract XML from PDF file
*
* @param string $pdf_path Path to PDF file
* @return int 1 if OK, -1 if error
*/
public function extractFromPdf($pdf_path)
{
if (!file_exists($pdf_path)) {
$this->error = 'File not found: ' . $pdf_path;
return -1;
}
// Read PDF content
$pdf_content = file_get_contents($pdf_path);
if ($pdf_content === false) {
$this->error = 'Cannot read PDF file';
return -1;
}
// Try to find embedded XML using different methods
$xml = $this->extractXmlFromPdfContent($pdf_content);
if (empty($xml)) {
// Try using pdfdetach command
$xml = $this->extractXmlUsingPdfdetach($pdf_path);
}
if (empty($xml)) {
$this->error = 'No ZUGFeRD/Factur-X XML found in PDF';
return -1;
}
$this->xml_content = $xml;
return 1;
}
/**
* Extract XML from PDF content by searching for XML patterns
*
* @param string $content PDF binary content
* @return string|null XML content or null
*/
private function extractXmlFromPdfContent($content)
{
// Look for embedded file streams
// ZUGFeRD XML typically starts with <?xml and contains CrossIndustryInvoice or CrossIndustryDocument
// Method 1: Look for FlateDecode streams and decompress
$pattern = '/stream\s*(.*?)\s*endstream/s';
preg_match_all($pattern, $content, $matches);
foreach ($matches[1] as $stream) {
// Try to decompress
$decompressed = @gzuncompress($stream);
if ($decompressed === false) {
$decompressed = @gzinflate($stream);
}
if ($decompressed === false) {
$decompressed = $stream;
}
// Check if it's XML
if (strpos($decompressed, '<?xml') !== false &&
(strpos($decompressed, 'CrossIndustryDocument') !== false ||
strpos($decompressed, 'CrossIndustryInvoice') !== false)) {
// Extract just the XML part
$start = strpos($decompressed, '<?xml');
$xml = substr($decompressed, $start);
// Find the end
if (preg_match('/<\/[a-z]+:CrossIndustry(Document|Invoice)>/i', $xml, $endMatch, PREG_OFFSET_CAPTURE)) {
$xml = substr($xml, 0, $endMatch[0][1] + strlen($endMatch[0][0]));
return $xml;
}
}
}
return null;
}
/**
* Extract XML using pdfdetach command line tool
*
* @param string $pdf_path Path to PDF
* @return string|null XML content or null
*/
private function extractXmlUsingPdfdetach($pdf_path)
{
$tmp_file = sys_get_temp_dir() . '/zugferd_' . uniqid() . '.xml';
// Try to extract first attachment
$cmd = 'pdfdetach -save 1 -o ' . escapeshellarg($tmp_file) . ' ' . escapeshellarg($pdf_path) . ' 2>&1';
exec($cmd, $output, $return_code);
if ($return_code === 0 && file_exists($tmp_file)) {
$xml = file_get_contents($tmp_file);
unlink($tmp_file);
if (strpos($xml, 'CrossIndustryDocument') !== false ||
strpos($xml, 'CrossIndustryInvoice') !== false) {
return $xml;
}
}
// Try listing and extracting by name
$cmd = 'pdfdetach -list ' . escapeshellarg($pdf_path) . ' 2>&1';
exec($cmd, $list_output, $return_code);
foreach ($list_output as $line) {
if (preg_match('/(ZUGFeRD|factur-x|xrechnung)/i', $line)) {
if (preg_match('/(\d+):/', $line, $matches)) {
$idx = $matches[1];
$cmd = 'pdfdetach -save ' . $idx . ' -o ' . escapeshellarg($tmp_file) . ' ' . escapeshellarg($pdf_path) . ' 2>&1';
exec($cmd, $output, $return_code);
if ($return_code === 0 && file_exists($tmp_file)) {
$xml = file_get_contents($tmp_file);
unlink($tmp_file);
return $xml;
}
}
}
}
return null;
}
/**
* Parse the XML content
*
* @param string $xml_content Optional XML content, uses $this->xml_content if not provided
* @return int 1 if OK, -1 if error
*/
public function parse($xml_content = null)
{
if ($xml_content !== null) {
$this->xml_content = $xml_content;
}
if (empty($this->xml_content)) {
$this->error = 'No XML content to parse';
return -1;
}
libxml_use_internal_errors(true);
$this->xml = simplexml_load_string($this->xml_content);
if ($this->xml === false) {
$errors = libxml_get_errors();
$this->error = 'XML parse error: ' . ($errors[0]->message ?? 'Unknown error');
libxml_clear_errors();
return -1;
}
// Get namespaces
$this->namespaces = $this->xml->getNamespaces(true);
// Determine ZUGFeRD version and parse accordingly
if ($this->isZugferdV1()) {
return $this->parseZugferdV1();
} elseif ($this->isZugferdV2()) {
return $this->parseZugferdV2();
} else {
$this->error = 'Unknown ZUGFeRD/Factur-X format';
return -1;
}
}
/**
* Check if ZUGFeRD v1 format
*
* @return bool
*/
private function isZugferdV1()
{
return strpos($this->xml_content, 'CrossIndustryDocument') !== false;
}
/**
* Check if ZUGFeRD v2 / Factur-X format
*
* @return bool
*/
private function isZugferdV2()
{
return strpos($this->xml_content, 'CrossIndustryInvoice') !== false;
}
/**
* Parse ZUGFeRD v1 format
*
* @return int 1 if OK, -1 if error
*/
private function parseZugferdV1()
{
$this->xml->registerXPathNamespace('rsm', 'urn:ferd:CrossIndustryDocument:invoice:1p0');
$this->xml->registerXPathNamespace('ram', 'urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:12');
$this->xml->registerXPathNamespace('udt', 'urn:un:unece:uncefact:data:standard:UnqualifiedDataType:15');
$data = array();
// Header information
$header = $this->xml->xpath('//rsm:HeaderExchangedDocument');
if (!empty($header)) {
$data['invoice_number'] = $this->getXpathValue('//rsm:HeaderExchangedDocument/ram:ID');
$data['invoice_type'] = $this->getXpathValue('//rsm:HeaderExchangedDocument/ram:TypeCode');
$data['invoice_name'] = $this->getXpathValue('//rsm:HeaderExchangedDocument/ram:Name');
$dateStr = $this->getXpathValue('//rsm:HeaderExchangedDocument/ram:IssueDateTime/udt:DateTimeString');
$data['invoice_date'] = $this->parseDate($dateStr);
}
// Seller (Lieferant)
$data['seller'] = array(
'name' => $this->getXpathValue('//ram:SellerTradeParty/ram:Name'),
'global_id' => $this->getXpathValue('//ram:SellerTradeParty/ram:GlobalID'),
'vat_id' => $this->getXpathValue('//ram:SellerTradeParty/ram:SpecifiedTaxRegistration/ram:ID[@schemeID="VA"]'),
'address' => array(
'street' => $this->getXpathValue('//ram:SellerTradeParty/ram:PostalTradeAddress/ram:LineOne'),
'postcode' => $this->getXpathValue('//ram:SellerTradeParty/ram:PostalTradeAddress/ram:PostcodeCode'),
'city' => $this->getXpathValue('//ram:SellerTradeParty/ram:PostalTradeAddress/ram:CityName'),
'country' => $this->getXpathValue('//ram:SellerTradeParty/ram:PostalTradeAddress/ram:CountryID'),
)
);
// Buyer (Käufer - wir)
$data['buyer'] = array(
'id' => $this->getXpathValue('//ram:BuyerTradeParty/ram:ID'),
'reference' => $this->getXpathValue('//ram:ApplicableSupplyChainTradeAgreement/ram:BuyerReference'),
'name' => $this->getXpathValue('//ram:BuyerTradeParty/ram:Name'),
);
// Totals
$data['totals'] = array(
'net' => (float) $this->getXpathValue('//ram:SpecifiedTradeSettlementMonetarySummation/ram:LineTotalAmount'),
'tax' => (float) $this->getXpathValue('//ram:SpecifiedTradeSettlementMonetarySummation/ram:TaxTotalAmount'),
'gross' => (float) $this->getXpathValue('//ram:SpecifiedTradeSettlementMonetarySummation/ram:GrandTotalAmount'),
'currency' => $this->getXpathValue('//ram:ApplicableSupplyChainTradeSettlement/ram:InvoiceCurrencyCode'),
);
// Due date
$dueDateStr = $this->getXpathValue('//ram:SpecifiedTradePaymentTerms/ram:DueDateDateTime/udt:DateTimeString');
$data['due_date'] = $this->parseDate($dueDateStr);
// Line items
$data['lines'] = array();
$lines = $this->xml->xpath('//ram:IncludedSupplyChainTradeLineItem');
foreach ($lines as $line) {
$line->registerXPathNamespace('ram', 'urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:12');
// Get price and basis quantity for correct unit price calculation
$chargeAmount = (float) $this->getNodeValue($line->xpath('ram:SpecifiedSupplyChainTradeAgreement/ram:NetPriceProductTradePrice/ram:ChargeAmount'));
$basisQuantity = (float) $this->getNodeValue($line->xpath('ram:SpecifiedSupplyChainTradeAgreement/ram:NetPriceProductTradePrice/ram:BasisQuantity'));
$basisQuantityUnit = (string) $this->getNodeAttribute($line->xpath('ram:SpecifiedSupplyChainTradeAgreement/ram:NetPriceProductTradePrice/ram:BasisQuantity'), 'unitCode');
// Calculate real unit price: if BasisQuantity is e.g. 100 (meters), price is for 100 units
if ($basisQuantity > 0 && $basisQuantity != 1) {
$unitPrice = $chargeAmount / $basisQuantity;
} else {
$unitPrice = $chargeAmount;
}
// Extract copper surcharge (Kupferzuschlag) from AppliedTradeAllowanceCharge
$copperSurcharge = null;
$copperSurchargeBasisQty = null;
$allowanceCharges = $line->xpath('ram:SpecifiedSupplyChainTradeAgreement/ram:GrossPriceProductTradePrice/ram:AppliedTradeAllowanceCharge');
foreach ($allowanceCharges as $charge) {
$charge->registerXPathNamespace('ram', 'urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:12');
$reason = (string) $this->getNodeValue($charge->xpath('ram:Reason'));
if (stripos($reason, 'Kupfer') !== false || stripos($reason, 'copper') !== false || stripos($reason, 'Metall') !== false) {
$copperSurcharge = (float) $this->getNodeValue($charge->xpath('ram:ActualAmount'));
$copperSurchargeBasisQty = (float) $this->getNodeValue($charge->xpath('ram:BasisQuantity'));
break;
}
}
// Also check ApplicableProductCharacteristic for copper surcharge
if ($copperSurcharge === null) {
$characteristics = $line->xpath('ram:SpecifiedTradeProduct/ram:ApplicableProductCharacteristic');
foreach ($characteristics as $char) {
$char->registerXPathNamespace('ram', 'urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:12');
$desc = (string) $this->getNodeValue($char->xpath('ram:Description'));
if (stripos($desc, 'Kupfer') !== false || stripos($desc, 'copper') !== false || stripos($desc, 'Metall') !== false) {
$copperSurcharge = (float) $this->getNodeValue($char->xpath('ram:Value'));
// Usually refers to same basis quantity as the price
$copperSurchargeBasisQty = $basisQuantity ?: 1;
break;
}
}
}
// Calculate copper surcharge per single unit
$copperSurchargePerUnit = null;
if ($copperSurcharge !== null && $copperSurcharge > 0) {
if ($copperSurchargeBasisQty > 0 && $copperSurchargeBasisQty != 1) {
$copperSurchargePerUnit = $copperSurcharge / $copperSurchargeBasisQty;
} else {
$copperSurchargePerUnit = $copperSurcharge;
}
}
$lineData = array(
'line_id' => (string) $this->getNodeValue($line->xpath('ram:AssociatedDocumentLineDocument/ram:LineID')),
'product' => array(
'seller_id' => (string) $this->getNodeValue($line->xpath('ram:SpecifiedTradeProduct/ram:SellerAssignedID')),
'buyer_id' => (string) $this->getNodeValue($line->xpath('ram:SpecifiedTradeProduct/ram:BuyerAssignedID')),
'global_id' => (string) $this->getNodeValue($line->xpath('ram:SpecifiedTradeProduct/ram:GlobalID')),
'name' => (string) $this->getNodeValue($line->xpath('ram:SpecifiedTradeProduct/ram:Name')),
'description' => (string) $this->getNodeValue($line->xpath('ram:SpecifiedTradeProduct/ram:Description')),
),
'quantity' => (float) $this->getNodeValue($line->xpath('ram:SpecifiedSupplyChainTradeDelivery/ram:BilledQuantity')),
'unit_code' => (string) $this->getNodeAttribute($line->xpath('ram:SpecifiedSupplyChainTradeDelivery/ram:BilledQuantity'), 'unitCode'),
'unit_price' => $unitPrice,
'unit_price_raw' => $chargeAmount,
'basis_quantity' => $basisQuantity ?: 1,
'basis_quantity_unit' => $basisQuantityUnit,
'line_total' => (float) $this->getNodeValue($line->xpath('ram:SpecifiedSupplyChainTradeSettlement/ram:SpecifiedTradeSettlementMonetarySummation/ram:LineTotalAmount')),
'tax_percent' => (float) $this->getNodeValue($line->xpath('ram:SpecifiedSupplyChainTradeSettlement/ram:ApplicableTradeTax/ram:ApplicablePercent')),
// Copper surcharge data
'copper_surcharge' => $copperSurcharge,
'copper_surcharge_basis_qty' => $copperSurchargeBasisQty,
'copper_surcharge_per_unit' => $copperSurchargePerUnit,
);
$data['lines'][] = $lineData;
}
$this->invoice_data = $data;
return 1;
}
/**
* Parse ZUGFeRD v2 / Factur-X format
*
* @return int 1 if OK, -1 if error
*/
private function parseZugferdV2()
{
$this->xml->registerXPathNamespace('rsm', 'urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100');
$this->xml->registerXPathNamespace('ram', 'urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100');
$this->xml->registerXPathNamespace('qdt', 'urn:un:unece:uncefact:data:standard:QualifiedDataType:100');
$this->xml->registerXPathNamespace('udt', 'urn:un:unece:uncefact:data:standard:UnqualifiedDataType:100');
$data = array();
// Header information
$data['invoice_number'] = $this->getXpathValue('//rsm:ExchangedDocument/ram:ID');
$data['invoice_type'] = $this->getXpathValue('//rsm:ExchangedDocument/ram:TypeCode');
$data['invoice_name'] = $this->getXpathValue('//rsm:ExchangedDocument/ram:Name');
$dateStr = $this->getXpathValue('//rsm:ExchangedDocument/ram:IssueDateTime/udt:DateTimeString');
$data['invoice_date'] = $this->parseDate($dateStr);
// Seller (Lieferant)
$data['seller'] = array(
'name' => $this->getXpathValue('//ram:ApplicableHeaderTradeAgreement/ram:SellerTradeParty/ram:Name'),
'global_id' => $this->getXpathValue('//ram:ApplicableHeaderTradeAgreement/ram:SellerTradeParty/ram:GlobalID'),
'vat_id' => $this->getXpathValue('//ram:ApplicableHeaderTradeAgreement/ram:SellerTradeParty/ram:SpecifiedTaxRegistration/ram:ID'),
'address' => array(
'street' => $this->getXpathValue('//ram:ApplicableHeaderTradeAgreement/ram:SellerTradeParty/ram:PostalTradeAddress/ram:LineOne'),
'postcode' => $this->getXpathValue('//ram:ApplicableHeaderTradeAgreement/ram:SellerTradeParty/ram:PostalTradeAddress/ram:PostcodeCode'),
'city' => $this->getXpathValue('//ram:ApplicableHeaderTradeAgreement/ram:SellerTradeParty/ram:PostalTradeAddress/ram:CityName'),
'country' => $this->getXpathValue('//ram:ApplicableHeaderTradeAgreement/ram:SellerTradeParty/ram:PostalTradeAddress/ram:CountryID'),
)
);
// Buyer (Käufer - wir)
$data['buyer'] = array(
'id' => $this->getXpathValue('//ram:ApplicableHeaderTradeAgreement/ram:BuyerTradeParty/ram:ID'),
'reference' => $this->getXpathValue('//ram:ApplicableHeaderTradeAgreement/ram:BuyerReference'),
'name' => $this->getXpathValue('//ram:ApplicableHeaderTradeAgreement/ram:BuyerTradeParty/ram:Name'),
);
// Totals
$data['totals'] = array(
'net' => (float) $this->getXpathValue('//ram:ApplicableHeaderTradeSettlement/ram:SpecifiedTradeSettlementHeaderMonetarySummation/ram:LineTotalAmount'),
'tax' => (float) $this->getXpathValue('//ram:ApplicableHeaderTradeSettlement/ram:SpecifiedTradeSettlementHeaderMonetarySummation/ram:TaxTotalAmount'),
'gross' => (float) $this->getXpathValue('//ram:ApplicableHeaderTradeSettlement/ram:SpecifiedTradeSettlementHeaderMonetarySummation/ram:GrandTotalAmount'),
'currency' => $this->getXpathValue('//ram:ApplicableHeaderTradeSettlement/ram:InvoiceCurrencyCode'),
);
// Due date
$dueDateStr = $this->getXpathValue('//ram:SpecifiedTradePaymentTerms/ram:DueDateDateTime/udt:DateTimeString');
$data['due_date'] = $this->parseDate($dueDateStr);
// Line items
$data['lines'] = array();
$lines = $this->xml->xpath('//ram:IncludedSupplyChainTradeLineItem');
foreach ($lines as $line) {
$line->registerXPathNamespace('ram', 'urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100');
// Get price and basis quantity for correct unit price calculation
$chargeAmount = (float) $this->getNodeValue($line->xpath('ram:SpecifiedLineTradeAgreement/ram:NetPriceProductTradePrice/ram:ChargeAmount'));
$basisQuantity = (float) $this->getNodeValue($line->xpath('ram:SpecifiedLineTradeAgreement/ram:NetPriceProductTradePrice/ram:BasisQuantity'));
$basisQuantityUnit = (string) $this->getNodeAttribute($line->xpath('ram:SpecifiedLineTradeAgreement/ram:NetPriceProductTradePrice/ram:BasisQuantity'), 'unitCode');
// Calculate real unit price: if BasisQuantity is e.g. 100 (meters), price is for 100 units
if ($basisQuantity > 0 && $basisQuantity != 1) {
$unitPrice = $chargeAmount / $basisQuantity;
} else {
$unitPrice = $chargeAmount;
}
// Extract copper surcharge (Kupferzuschlag) from AppliedTradeAllowanceCharge (v2)
$copperSurcharge = null;
$copperSurchargeBasisQty = null;
$allowanceCharges = $line->xpath('ram:SpecifiedLineTradeAgreement/ram:GrossPriceProductTradePrice/ram:AppliedTradeAllowanceCharge');
foreach ($allowanceCharges as $charge) {
$charge->registerXPathNamespace('ram', 'urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100');
$reason = (string) $this->getNodeValue($charge->xpath('ram:Reason'));
if (stripos($reason, 'Kupfer') !== false || stripos($reason, 'copper') !== false || stripos($reason, 'Metall') !== false) {
$copperSurcharge = (float) $this->getNodeValue($charge->xpath('ram:ActualAmount'));
$copperSurchargeBasisQty = (float) $this->getNodeValue($charge->xpath('ram:BasisQuantity'));
break;
}
}
// Also check ApplicableProductCharacteristic for copper surcharge (v2)
if ($copperSurcharge === null) {
$characteristics = $line->xpath('ram:SpecifiedTradeProduct/ram:ApplicableProductCharacteristic');
foreach ($characteristics as $char) {
$char->registerXPathNamespace('ram', 'urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100');
$desc = (string) $this->getNodeValue($char->xpath('ram:Description'));
if (stripos($desc, 'Kupfer') !== false || stripos($desc, 'copper') !== false || stripos($desc, 'Metall') !== false) {
$copperSurcharge = (float) $this->getNodeValue($char->xpath('ram:Value'));
$copperSurchargeBasisQty = $basisQuantity ?: 1;
break;
}
}
}
// Calculate copper surcharge per single unit
$copperSurchargePerUnit = null;
if ($copperSurcharge !== null && $copperSurcharge > 0) {
if ($copperSurchargeBasisQty > 0 && $copperSurchargeBasisQty != 1) {
$copperSurchargePerUnit = $copperSurcharge / $copperSurchargeBasisQty;
} else {
$copperSurchargePerUnit = $copperSurcharge;
}
}
$lineData = array(
'line_id' => (string) $this->getNodeValue($line->xpath('ram:AssociatedDocumentLineDocument/ram:LineID')),
'product' => array(
'seller_id' => (string) $this->getNodeValue($line->xpath('ram:SpecifiedTradeProduct/ram:SellerAssignedID')),
'buyer_id' => (string) $this->getNodeValue($line->xpath('ram:SpecifiedTradeProduct/ram:BuyerAssignedID')),
'global_id' => (string) $this->getNodeValue($line->xpath('ram:SpecifiedTradeProduct/ram:GlobalID')),
'name' => (string) $this->getNodeValue($line->xpath('ram:SpecifiedTradeProduct/ram:Name')),
'description' => (string) $this->getNodeValue($line->xpath('ram:SpecifiedTradeProduct/ram:Description')),
),
'quantity' => (float) $this->getNodeValue($line->xpath('ram:SpecifiedLineTradeDelivery/ram:BilledQuantity')),
'unit_code' => (string) $this->getNodeAttribute($line->xpath('ram:SpecifiedLineTradeDelivery/ram:BilledQuantity'), 'unitCode'),
'unit_price' => $unitPrice,
'unit_price_raw' => $chargeAmount,
'basis_quantity' => $basisQuantity ?: 1,
'basis_quantity_unit' => $basisQuantityUnit,
'line_total' => (float) $this->getNodeValue($line->xpath('ram:SpecifiedLineTradeSettlement/ram:SpecifiedTradeSettlementLineMonetarySummation/ram:LineTotalAmount')),
'tax_percent' => (float) $this->getNodeValue($line->xpath('ram:SpecifiedLineTradeSettlement/ram:ApplicableTradeTax/ram:RateApplicablePercent')),
// Copper surcharge data
'copper_surcharge' => $copperSurcharge,
'copper_surcharge_basis_qty' => $copperSurchargeBasisQty,
'copper_surcharge_per_unit' => $copperSurchargePerUnit,
);
$data['lines'][] = $lineData;
}
$this->invoice_data = $data;
return 1;
}
/**
* Get value from XPath result
*
* @param string $xpath XPath expression
* @return string
*/
private function getXpathValue($xpath)
{
$result = $this->xml->xpath($xpath);
if (!empty($result)) {
return trim((string) $result[0]);
}
return '';
}
/**
* Get value from node array
*
* @param array $nodes XPath result array
* @return string
*/
private function getNodeValue($nodes)
{
if (!empty($nodes) && isset($nodes[0])) {
return trim((string) $nodes[0]);
}
return '';
}
/**
* Get attribute from node
*
* @param array $nodes XPath result array
* @param string $attr Attribute name
* @return string
*/
private function getNodeAttribute($nodes, $attr)
{
if (!empty($nodes) && isset($nodes[0])) {
$attributes = $nodes[0]->attributes();
if (isset($attributes[$attr])) {
return (string) $attributes[$attr];
}
}
return '';
}
/**
* Parse date string in format YYYYMMDD or ISO
*
* @param string $dateStr Date string
* @return string Date in Y-m-d format
*/
private function parseDate($dateStr)
{
if (empty($dateStr)) {
return '';
}
// Format: YYYYMMDD
if (preg_match('/^(\d{4})(\d{2})(\d{2})$/', $dateStr, $matches)) {
return $matches[1] . '-' . $matches[2] . '-' . $matches[3];
}
// Format: YYYY-MM-DD or ISO
if (preg_match('/^(\d{4})-(\d{2})-(\d{2})/', $dateStr, $matches)) {
return $matches[1] . '-' . $matches[2] . '-' . $matches[3];
}
return $dateStr;
}
/**
* Get file hash for duplicate detection
*
* @param string $file_path Path to file
* @return string SHA256 hash
*/
public function getFileHash($file_path)
{
if (!file_exists($file_path)) {
return '';
}
return hash_file('sha256', $file_path);
}
/**
* Get invoice data
*
* @return array
*/
public function getInvoiceData()
{
return $this->invoice_data;
}
/**
* Get XML content
*
* @return string
*/
public function getXmlContent()
{
return $this->xml_content;
}
}