db = $db; } /** * Extract XML from PDF file * * @param string $pdf_path Path to PDF file * @return int 1 if OK, -1 if error */ public function extractFromPdf($pdf_path) { if (!file_exists($pdf_path)) { $this->error = 'File not found: ' . $pdf_path; return -1; } // Read PDF content $pdf_content = file_get_contents($pdf_path); if ($pdf_content === false) { $this->error = 'Cannot read PDF file'; return -1; } // Try to find embedded XML using different methods $xml = $this->extractXmlFromPdfContent($pdf_content); if (empty($xml)) { // Try using pdfdetach command $xml = $this->extractXmlUsingPdfdetach($pdf_path); } if (empty($xml)) { $this->error = 'No ZUGFeRD/Factur-X XML found in PDF'; return -1; } $this->xml_content = $xml; return 1; } /** * Extract XML from PDF content by searching for XML patterns * * @param string $content PDF binary content * @return string|null XML content or null */ private function extractXmlFromPdfContent($content) { // Look for embedded file streams // ZUGFeRD XML typically starts with /i', $xml, $endMatch, PREG_OFFSET_CAPTURE)) { $xml = substr($xml, 0, $endMatch[0][1] + strlen($endMatch[0][0])); return $xml; } } } return null; } /** * Extract XML using pdfdetach command line tool * * @param string $pdf_path Path to PDF * @return string|null XML content or null */ private function extractXmlUsingPdfdetach($pdf_path) { $tmp_file = sys_get_temp_dir() . '/zugferd_' . uniqid() . '.xml'; // Try to extract first attachment $cmd = 'pdfdetach -save 1 -o ' . escapeshellarg($tmp_file) . ' ' . escapeshellarg($pdf_path) . ' 2>&1'; exec($cmd, $output, $return_code); if ($return_code === 0 && file_exists($tmp_file)) { $xml = file_get_contents($tmp_file); unlink($tmp_file); if (strpos($xml, 'CrossIndustryDocument') !== false || strpos($xml, 'CrossIndustryInvoice') !== false) { return $xml; } } // Try listing and extracting by name $cmd = 'pdfdetach -list ' . escapeshellarg($pdf_path) . ' 2>&1'; exec($cmd, $list_output, $return_code); foreach ($list_output as $line) { if (preg_match('/(ZUGFeRD|factur-x|xrechnung)/i', $line)) { if (preg_match('/(\d+):/', $line, $matches)) { $idx = $matches[1]; $cmd = 'pdfdetach -save ' . $idx . ' -o ' . escapeshellarg($tmp_file) . ' ' . escapeshellarg($pdf_path) . ' 2>&1'; exec($cmd, $output, $return_code); if ($return_code === 0 && file_exists($tmp_file)) { $xml = file_get_contents($tmp_file); unlink($tmp_file); return $xml; } } } } return null; } /** * Parse the XML content * * @param string $xml_content Optional XML content, uses $this->xml_content if not provided * @return int 1 if OK, -1 if error */ public function parse($xml_content = null) { if ($xml_content !== null) { $this->xml_content = $xml_content; } if (empty($this->xml_content)) { $this->error = 'No XML content to parse'; return -1; } libxml_use_internal_errors(true); $this->xml = simplexml_load_string($this->xml_content); if ($this->xml === false) { $errors = libxml_get_errors(); $this->error = 'XML parse error: ' . ($errors[0]->message ?? 'Unknown error'); libxml_clear_errors(); return -1; } // Get namespaces $this->namespaces = $this->xml->getNamespaces(true); // Determine ZUGFeRD version and parse accordingly if ($this->isZugferdV1()) { return $this->parseZugferdV1(); } elseif ($this->isZugferdV2()) { return $this->parseZugferdV2(); } else { $this->error = 'Unknown ZUGFeRD/Factur-X format'; return -1; } } /** * Check if ZUGFeRD v1 format * * @return bool */ private function isZugferdV1() { return strpos($this->xml_content, 'CrossIndustryDocument') !== false; } /** * Check if ZUGFeRD v2 / Factur-X format * * @return bool */ private function isZugferdV2() { return strpos($this->xml_content, 'CrossIndustryInvoice') !== false; } /** * Parse ZUGFeRD v1 format * * @return int 1 if OK, -1 if error */ private function parseZugferdV1() { $this->xml->registerXPathNamespace('rsm', 'urn:ferd:CrossIndustryDocument:invoice:1p0'); $this->xml->registerXPathNamespace('ram', 'urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:12'); $this->xml->registerXPathNamespace('udt', 'urn:un:unece:uncefact:data:standard:UnqualifiedDataType:15'); $data = array(); // Header information $header = $this->xml->xpath('//rsm:HeaderExchangedDocument'); if (!empty($header)) { $data['invoice_number'] = $this->getXpathValue('//rsm:HeaderExchangedDocument/ram:ID'); $data['invoice_type'] = $this->getXpathValue('//rsm:HeaderExchangedDocument/ram:TypeCode'); $data['invoice_name'] = $this->getXpathValue('//rsm:HeaderExchangedDocument/ram:Name'); $dateStr = $this->getXpathValue('//rsm:HeaderExchangedDocument/ram:IssueDateTime/udt:DateTimeString'); $data['invoice_date'] = $this->parseDate($dateStr); } // Seller (Lieferant) $data['seller'] = array( 'name' => $this->getXpathValue('//ram:SellerTradeParty/ram:Name'), 'global_id' => $this->getXpathValue('//ram:SellerTradeParty/ram:GlobalID'), 'vat_id' => $this->getXpathValue('//ram:SellerTradeParty/ram:SpecifiedTaxRegistration/ram:ID[@schemeID="VA"]'), 'address' => array( 'street' => $this->getXpathValue('//ram:SellerTradeParty/ram:PostalTradeAddress/ram:LineOne'), 'postcode' => $this->getXpathValue('//ram:SellerTradeParty/ram:PostalTradeAddress/ram:PostcodeCode'), 'city' => $this->getXpathValue('//ram:SellerTradeParty/ram:PostalTradeAddress/ram:CityName'), 'country' => $this->getXpathValue('//ram:SellerTradeParty/ram:PostalTradeAddress/ram:CountryID'), ) ); // Buyer (Käufer - wir) $data['buyer'] = array( 'id' => $this->getXpathValue('//ram:BuyerTradeParty/ram:ID'), 'reference' => $this->getXpathValue('//ram:ApplicableSupplyChainTradeAgreement/ram:BuyerReference'), 'name' => $this->getXpathValue('//ram:BuyerTradeParty/ram:Name'), ); // Totals $data['totals'] = array( 'net' => (float) $this->getXpathValue('//ram:SpecifiedTradeSettlementMonetarySummation/ram:LineTotalAmount'), 'tax' => (float) $this->getXpathValue('//ram:SpecifiedTradeSettlementMonetarySummation/ram:TaxTotalAmount'), 'gross' => (float) $this->getXpathValue('//ram:SpecifiedTradeSettlementMonetarySummation/ram:GrandTotalAmount'), 'currency' => $this->getXpathValue('//ram:ApplicableSupplyChainTradeSettlement/ram:InvoiceCurrencyCode'), ); // Due date $dueDateStr = $this->getXpathValue('//ram:SpecifiedTradePaymentTerms/ram:DueDateDateTime/udt:DateTimeString'); $data['due_date'] = $this->parseDate($dueDateStr); // Line items $data['lines'] = array(); $lines = $this->xml->xpath('//ram:IncludedSupplyChainTradeLineItem'); foreach ($lines as $line) { $line->registerXPathNamespace('ram', 'urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:12'); // Get price and basis quantity for correct unit price calculation $chargeAmount = (float) $this->getNodeValue($line->xpath('ram:SpecifiedSupplyChainTradeAgreement/ram:NetPriceProductTradePrice/ram:ChargeAmount')); $basisQuantity = (float) $this->getNodeValue($line->xpath('ram:SpecifiedSupplyChainTradeAgreement/ram:NetPriceProductTradePrice/ram:BasisQuantity')); $basisQuantityUnit = (string) $this->getNodeAttribute($line->xpath('ram:SpecifiedSupplyChainTradeAgreement/ram:NetPriceProductTradePrice/ram:BasisQuantity'), 'unitCode'); // Calculate real unit price: if BasisQuantity is e.g. 100 (meters), price is for 100 units if ($basisQuantity > 0 && $basisQuantity != 1) { $unitPrice = $chargeAmount / $basisQuantity; } else { $unitPrice = $chargeAmount; } // Extract copper surcharge (Kupferzuschlag) from AppliedTradeAllowanceCharge $copperSurcharge = null; $copperSurchargeBasisQty = null; $allowanceCharges = $line->xpath('ram:SpecifiedSupplyChainTradeAgreement/ram:GrossPriceProductTradePrice/ram:AppliedTradeAllowanceCharge'); foreach ($allowanceCharges as $charge) { $charge->registerXPathNamespace('ram', 'urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:12'); $reason = (string) $this->getNodeValue($charge->xpath('ram:Reason')); if (stripos($reason, 'Kupfer') !== false || stripos($reason, 'copper') !== false || stripos($reason, 'Metall') !== false) { $copperSurcharge = (float) $this->getNodeValue($charge->xpath('ram:ActualAmount')); $copperSurchargeBasisQty = (float) $this->getNodeValue($charge->xpath('ram:BasisQuantity')); break; } } // Also check ApplicableProductCharacteristic for copper surcharge if ($copperSurcharge === null) { $characteristics = $line->xpath('ram:SpecifiedTradeProduct/ram:ApplicableProductCharacteristic'); foreach ($characteristics as $char) { $char->registerXPathNamespace('ram', 'urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:12'); $desc = (string) $this->getNodeValue($char->xpath('ram:Description')); if (stripos($desc, 'Kupfer') !== false || stripos($desc, 'copper') !== false || stripos($desc, 'Metall') !== false) { $copperSurcharge = (float) $this->getNodeValue($char->xpath('ram:Value')); // Usually refers to same basis quantity as the price $copperSurchargeBasisQty = $basisQuantity ?: 1; break; } } } // Calculate copper surcharge per single unit $copperSurchargePerUnit = null; if ($copperSurcharge !== null && $copperSurcharge > 0) { if ($copperSurchargeBasisQty > 0 && $copperSurchargeBasisQty != 1) { $copperSurchargePerUnit = $copperSurcharge / $copperSurchargeBasisQty; } else { $copperSurchargePerUnit = $copperSurcharge; } } $lineData = array( 'line_id' => (string) $this->getNodeValue($line->xpath('ram:AssociatedDocumentLineDocument/ram:LineID')), 'product' => array( 'seller_id' => (string) $this->getNodeValue($line->xpath('ram:SpecifiedTradeProduct/ram:SellerAssignedID')), 'buyer_id' => (string) $this->getNodeValue($line->xpath('ram:SpecifiedTradeProduct/ram:BuyerAssignedID')), 'global_id' => (string) $this->getNodeValue($line->xpath('ram:SpecifiedTradeProduct/ram:GlobalID')), 'name' => (string) $this->getNodeValue($line->xpath('ram:SpecifiedTradeProduct/ram:Name')), 'description' => (string) $this->getNodeValue($line->xpath('ram:SpecifiedTradeProduct/ram:Description')), ), 'quantity' => (float) $this->getNodeValue($line->xpath('ram:SpecifiedSupplyChainTradeDelivery/ram:BilledQuantity')), 'unit_code' => (string) $this->getNodeAttribute($line->xpath('ram:SpecifiedSupplyChainTradeDelivery/ram:BilledQuantity'), 'unitCode'), 'unit_price' => $unitPrice, 'unit_price_raw' => $chargeAmount, 'basis_quantity' => $basisQuantity ?: 1, 'basis_quantity_unit' => $basisQuantityUnit, 'line_total' => (float) $this->getNodeValue($line->xpath('ram:SpecifiedSupplyChainTradeSettlement/ram:SpecifiedTradeSettlementMonetarySummation/ram:LineTotalAmount')), 'tax_percent' => (float) $this->getNodeValue($line->xpath('ram:SpecifiedSupplyChainTradeSettlement/ram:ApplicableTradeTax/ram:ApplicablePercent')), // Copper surcharge data 'copper_surcharge' => $copperSurcharge, 'copper_surcharge_basis_qty' => $copperSurchargeBasisQty, 'copper_surcharge_per_unit' => $copperSurchargePerUnit, ); $data['lines'][] = $lineData; } $this->invoice_data = $data; return 1; } /** * Parse ZUGFeRD v2 / Factur-X format * * @return int 1 if OK, -1 if error */ private function parseZugferdV2() { $this->xml->registerXPathNamespace('rsm', 'urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100'); $this->xml->registerXPathNamespace('ram', 'urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100'); $this->xml->registerXPathNamespace('qdt', 'urn:un:unece:uncefact:data:standard:QualifiedDataType:100'); $this->xml->registerXPathNamespace('udt', 'urn:un:unece:uncefact:data:standard:UnqualifiedDataType:100'); $data = array(); // Header information $data['invoice_number'] = $this->getXpathValue('//rsm:ExchangedDocument/ram:ID'); $data['invoice_type'] = $this->getXpathValue('//rsm:ExchangedDocument/ram:TypeCode'); $data['invoice_name'] = $this->getXpathValue('//rsm:ExchangedDocument/ram:Name'); $dateStr = $this->getXpathValue('//rsm:ExchangedDocument/ram:IssueDateTime/udt:DateTimeString'); $data['invoice_date'] = $this->parseDate($dateStr); // Seller (Lieferant) $data['seller'] = array( 'name' => $this->getXpathValue('//ram:ApplicableHeaderTradeAgreement/ram:SellerTradeParty/ram:Name'), 'global_id' => $this->getXpathValue('//ram:ApplicableHeaderTradeAgreement/ram:SellerTradeParty/ram:GlobalID'), 'vat_id' => $this->getXpathValue('//ram:ApplicableHeaderTradeAgreement/ram:SellerTradeParty/ram:SpecifiedTaxRegistration/ram:ID'), 'address' => array( 'street' => $this->getXpathValue('//ram:ApplicableHeaderTradeAgreement/ram:SellerTradeParty/ram:PostalTradeAddress/ram:LineOne'), 'postcode' => $this->getXpathValue('//ram:ApplicableHeaderTradeAgreement/ram:SellerTradeParty/ram:PostalTradeAddress/ram:PostcodeCode'), 'city' => $this->getXpathValue('//ram:ApplicableHeaderTradeAgreement/ram:SellerTradeParty/ram:PostalTradeAddress/ram:CityName'), 'country' => $this->getXpathValue('//ram:ApplicableHeaderTradeAgreement/ram:SellerTradeParty/ram:PostalTradeAddress/ram:CountryID'), ) ); // Buyer (Käufer - wir) $data['buyer'] = array( 'id' => $this->getXpathValue('//ram:ApplicableHeaderTradeAgreement/ram:BuyerTradeParty/ram:ID'), 'reference' => $this->getXpathValue('//ram:ApplicableHeaderTradeAgreement/ram:BuyerReference'), 'name' => $this->getXpathValue('//ram:ApplicableHeaderTradeAgreement/ram:BuyerTradeParty/ram:Name'), ); // Totals $data['totals'] = array( 'net' => (float) $this->getXpathValue('//ram:ApplicableHeaderTradeSettlement/ram:SpecifiedTradeSettlementHeaderMonetarySummation/ram:LineTotalAmount'), 'tax' => (float) $this->getXpathValue('//ram:ApplicableHeaderTradeSettlement/ram:SpecifiedTradeSettlementHeaderMonetarySummation/ram:TaxTotalAmount'), 'gross' => (float) $this->getXpathValue('//ram:ApplicableHeaderTradeSettlement/ram:SpecifiedTradeSettlementHeaderMonetarySummation/ram:GrandTotalAmount'), 'currency' => $this->getXpathValue('//ram:ApplicableHeaderTradeSettlement/ram:InvoiceCurrencyCode'), ); // Due date $dueDateStr = $this->getXpathValue('//ram:SpecifiedTradePaymentTerms/ram:DueDateDateTime/udt:DateTimeString'); $data['due_date'] = $this->parseDate($dueDateStr); // Line items $data['lines'] = array(); $lines = $this->xml->xpath('//ram:IncludedSupplyChainTradeLineItem'); foreach ($lines as $line) { $line->registerXPathNamespace('ram', 'urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100'); // Get price and basis quantity for correct unit price calculation $chargeAmount = (float) $this->getNodeValue($line->xpath('ram:SpecifiedLineTradeAgreement/ram:NetPriceProductTradePrice/ram:ChargeAmount')); $basisQuantity = (float) $this->getNodeValue($line->xpath('ram:SpecifiedLineTradeAgreement/ram:NetPriceProductTradePrice/ram:BasisQuantity')); $basisQuantityUnit = (string) $this->getNodeAttribute($line->xpath('ram:SpecifiedLineTradeAgreement/ram:NetPriceProductTradePrice/ram:BasisQuantity'), 'unitCode'); // Calculate real unit price: if BasisQuantity is e.g. 100 (meters), price is for 100 units if ($basisQuantity > 0 && $basisQuantity != 1) { $unitPrice = $chargeAmount / $basisQuantity; } else { $unitPrice = $chargeAmount; } // Extract copper surcharge (Kupferzuschlag) from AppliedTradeAllowanceCharge (v2) $copperSurcharge = null; $copperSurchargeBasisQty = null; $allowanceCharges = $line->xpath('ram:SpecifiedLineTradeAgreement/ram:GrossPriceProductTradePrice/ram:AppliedTradeAllowanceCharge'); foreach ($allowanceCharges as $charge) { $charge->registerXPathNamespace('ram', 'urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100'); $reason = (string) $this->getNodeValue($charge->xpath('ram:Reason')); if (stripos($reason, 'Kupfer') !== false || stripos($reason, 'copper') !== false || stripos($reason, 'Metall') !== false) { $copperSurcharge = (float) $this->getNodeValue($charge->xpath('ram:ActualAmount')); $copperSurchargeBasisQty = (float) $this->getNodeValue($charge->xpath('ram:BasisQuantity')); break; } } // Also check ApplicableProductCharacteristic for copper surcharge (v2) if ($copperSurcharge === null) { $characteristics = $line->xpath('ram:SpecifiedTradeProduct/ram:ApplicableProductCharacteristic'); foreach ($characteristics as $char) { $char->registerXPathNamespace('ram', 'urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100'); $desc = (string) $this->getNodeValue($char->xpath('ram:Description')); if (stripos($desc, 'Kupfer') !== false || stripos($desc, 'copper') !== false || stripos($desc, 'Metall') !== false) { $copperSurcharge = (float) $this->getNodeValue($char->xpath('ram:Value')); $copperSurchargeBasisQty = $basisQuantity ?: 1; break; } } } // Calculate copper surcharge per single unit $copperSurchargePerUnit = null; if ($copperSurcharge !== null && $copperSurcharge > 0) { if ($copperSurchargeBasisQty > 0 && $copperSurchargeBasisQty != 1) { $copperSurchargePerUnit = $copperSurcharge / $copperSurchargeBasisQty; } else { $copperSurchargePerUnit = $copperSurcharge; } } $lineData = array( 'line_id' => (string) $this->getNodeValue($line->xpath('ram:AssociatedDocumentLineDocument/ram:LineID')), 'product' => array( 'seller_id' => (string) $this->getNodeValue($line->xpath('ram:SpecifiedTradeProduct/ram:SellerAssignedID')), 'buyer_id' => (string) $this->getNodeValue($line->xpath('ram:SpecifiedTradeProduct/ram:BuyerAssignedID')), 'global_id' => (string) $this->getNodeValue($line->xpath('ram:SpecifiedTradeProduct/ram:GlobalID')), 'name' => (string) $this->getNodeValue($line->xpath('ram:SpecifiedTradeProduct/ram:Name')), 'description' => (string) $this->getNodeValue($line->xpath('ram:SpecifiedTradeProduct/ram:Description')), ), 'quantity' => (float) $this->getNodeValue($line->xpath('ram:SpecifiedLineTradeDelivery/ram:BilledQuantity')), 'unit_code' => (string) $this->getNodeAttribute($line->xpath('ram:SpecifiedLineTradeDelivery/ram:BilledQuantity'), 'unitCode'), 'unit_price' => $unitPrice, 'unit_price_raw' => $chargeAmount, 'basis_quantity' => $basisQuantity ?: 1, 'basis_quantity_unit' => $basisQuantityUnit, 'line_total' => (float) $this->getNodeValue($line->xpath('ram:SpecifiedLineTradeSettlement/ram:SpecifiedTradeSettlementLineMonetarySummation/ram:LineTotalAmount')), 'tax_percent' => (float) $this->getNodeValue($line->xpath('ram:SpecifiedLineTradeSettlement/ram:ApplicableTradeTax/ram:RateApplicablePercent')), // Copper surcharge data 'copper_surcharge' => $copperSurcharge, 'copper_surcharge_basis_qty' => $copperSurchargeBasisQty, 'copper_surcharge_per_unit' => $copperSurchargePerUnit, ); $data['lines'][] = $lineData; } $this->invoice_data = $data; return 1; } /** * Get value from XPath result * * @param string $xpath XPath expression * @return string */ private function getXpathValue($xpath) { $result = $this->xml->xpath($xpath); if (!empty($result)) { return trim((string) $result[0]); } return ''; } /** * Get value from node array * * @param array $nodes XPath result array * @return string */ private function getNodeValue($nodes) { if (!empty($nodes) && isset($nodes[0])) { return trim((string) $nodes[0]); } return ''; } /** * Get attribute from node * * @param array $nodes XPath result array * @param string $attr Attribute name * @return string */ private function getNodeAttribute($nodes, $attr) { if (!empty($nodes) && isset($nodes[0])) { $attributes = $nodes[0]->attributes(); if (isset($attributes[$attr])) { return (string) $attributes[$attr]; } } return ''; } /** * Parse date string in format YYYYMMDD or ISO * * @param string $dateStr Date string * @return string Date in Y-m-d format */ private function parseDate($dateStr) { if (empty($dateStr)) { return ''; } // Format: YYYYMMDD if (preg_match('/^(\d{4})(\d{2})(\d{2})$/', $dateStr, $matches)) { return $matches[1] . '-' . $matches[2] . '-' . $matches[3]; } // Format: YYYY-MM-DD or ISO if (preg_match('/^(\d{4})-(\d{2})-(\d{2})/', $dateStr, $matches)) { return $matches[1] . '-' . $matches[2] . '-' . $matches[3]; } return $dateStr; } /** * Get file hash for duplicate detection * * @param string $file_path Path to file * @return string SHA256 hash */ public function getFileHash($file_path) { if (!file_exists($file_path)) { return ''; } return hash_file('sha256', $file_path); } /** * Get invoice data * * @return array */ public function getInvoiceData() { return $this->invoice_data; } /** * Get XML content * * @return string */ public function getXmlContent() { return $this->xml_content; } }