importzugferd/class/datanormparser.class.php

1006 lines
34 KiB
PHP

<?php
/* Copyright (C) 2026 Eduard Wisch <data@data-it-solution.de>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*/
/**
* \file class/datanormparser.class.php
* \ingroup importzugferd
* \brief Parser for Datanorm 4.0 and 5.0 catalog files
*/
/**
* Class DatanormParser
* Parses Datanorm catalog files (Version 4.0 and 5.0)
*
* Datanorm Price Unit (PE) Codes:
* 0 or empty = per 1 piece
* 1 = per 10 pieces
* 2 = per 100 pieces
* 3 = per 1000 pieces
*
* The price in Datanorm is given for the quantity specified by the PE code.
* To get the unit price: divide price by PE quantity.
*/
class DatanormParser
{
/**
* Price unit code mapping
* Datanorm uses codes 0-3 to represent price units
*/
const PRICE_UNIT_CODES = array(
0 => 1,
1 => 10,
2 => 100,
3 => 1000,
);
/**
* Convert Datanorm PE code to actual quantity
*
* @param int|string $peCode The PE code from Datanorm (0, 1, 2, or 3)
* @return int The actual quantity (1, 10, 100, or 1000)
*/
public static function convertPriceUnitCode($peCode)
{
$code = (int)$peCode;
return self::PRICE_UNIT_CODES[$code] ?? 1;
}
/**
* @var string Detected Datanorm version
*/
public $version = '';
/**
* @var array Parsed articles (only used for small imports)
*/
public $articles = array();
/**
* @var array Parsed price information
*/
public $prices = array();
/**
* @var array Product groups/categories
*/
public $groups = array();
/**
* @var string Error message
*/
public $error = '';
/**
* @var array Error messages
*/
public $errors = array();
/**
* @var callable Callback for batch processing articles
*/
protected $batchCallback = null;
/**
* @var int Batch size for database inserts
*/
protected $batchSize = 1000;
/**
* @var array Current batch of articles
*/
protected $batchArticles = array();
/**
* @var bool Whether to use streaming mode (for large files)
*/
protected $streamingMode = false;
/**
* Enable streaming mode for large files
* In streaming mode, articles are processed in batches via callback
*
* @param callable $callback Function to call with batch of articles
* @param int $batchSize Number of articles per batch
*/
public function enableStreaming($callback, $batchSize = 1000)
{
$this->streamingMode = true;
$this->batchCallback = $callback;
$this->batchSize = $batchSize;
$this->batchArticles = array();
}
/**
* Disable streaming mode
*/
public function disableStreaming()
{
$this->streamingMode = false;
$this->batchCallback = null;
$this->batchArticles = array();
}
/**
* Add article to batch (streaming mode) or to articles array
*
* @param array $article Article data
*/
protected function addArticle($article)
{
if ($this->streamingMode && $this->batchCallback) {
$this->batchArticles[$article['article_number']] = $article;
if (count($this->batchArticles) >= $this->batchSize) {
$this->flushBatch();
}
} else {
$this->articles[$article['article_number']] = $article;
}
}
/**
* Flush current batch to callback
*/
protected function flushBatch()
{
if (!empty($this->batchArticles) && $this->batchCallback) {
// Merge prices into batch articles before flushing
foreach ($this->batchArticles as $artNum => &$article) {
if (isset($this->prices[$artNum])) {
$article['price'] = $this->prices[$artNum]['price'];
if (!empty($this->prices[$artNum]['metal_surcharge'])) {
$article['metal_surcharge'] = $this->prices[$artNum]['metal_surcharge'];
}
unset($this->prices[$artNum]); // Free memory
}
}
unset($article);
call_user_func($this->batchCallback, $this->batchArticles);
$this->batchArticles = array();
}
}
/**
* Parse a Datanorm file or directory
*
* @param string $path Path to file or directory
* @return int Number of articles parsed, -1 on error
*/
public function parse($path)
{
if (is_dir($path)) {
return $this->parseDirectory($path);
} else {
return $this->parseFile($path);
}
}
/**
* Parse all Datanorm files in a directory
*
* @param string $dir Directory path
* @return int Number of articles parsed, -1 on error
*/
public function parseDirectory($dir)
{
$totalArticles = 0;
// Use case-insensitive search for Linux compatibility
$allFiles = glob($dir . '/*');
// For non-streaming mode, load prices first into memory
// For streaming mode, prices are updated via second pass directly to DB
if (!$this->streamingMode) {
$priceFiles = array();
foreach ($allFiles as $file) {
$basename = strtoupper(basename($file));
if (preg_match('/^DATPREIS\.\d{3}$/', $basename)) {
$priceFiles[] = $file;
}
}
if (!empty($priceFiles)) {
$this->version = '4.0';
foreach ($priceFiles as $file) {
$this->parseDatapreis4File($file);
}
}
}
// Look for Datanorm 4.0 files (DATANORM.xxx) - case-insensitive
$datanormFiles = array();
$wrgFiles = array();
$rabFiles = array();
foreach ($allFiles as $file) {
$basename = strtoupper(basename($file));
if (preg_match('/^DATANORM\.\d{3}$/', $basename)) {
$datanormFiles[] = $file;
} elseif ($basename === 'DATANORM.WRG') {
$wrgFiles[] = $file;
} elseif ($basename === 'DATANORM.RAB') {
$rabFiles[] = $file;
}
}
if (!empty($datanormFiles)) {
$this->version = '4.0';
foreach ($datanormFiles as $file) {
$count = $this->parseDatanorm4File($file);
if ($count > 0) {
$totalArticles += $count;
}
}
foreach ($wrgFiles as $file) {
$this->parseDatanorm4Groups($file);
}
foreach ($rabFiles as $file) {
$this->parseDatanorm4Discounts($file);
}
}
// Merge prices into articles (non-streaming mode only)
// In streaming mode, prices are merged in flushBatch()
if (!$this->streamingMode && !empty($this->prices)) {
$this->mergePricesIntoArticles();
}
// Look for Datanorm 5.0 files (*.xml)
$xmlFiles = glob($dir . '/*.xml');
foreach ($xmlFiles as $file) {
if ($this->isDatanorm5File($file)) {
$this->version = '5.0';
$count = $this->parseDatanorm5File($file);
if ($count > 0) {
$totalArticles += $count;
}
}
}
return $totalArticles;
}
/**
* Parse a single file (auto-detect format)
*
* @param string $file File path
* @return int Number of articles parsed, -1 on error
*/
public function parseFile($file)
{
if (!file_exists($file)) {
$this->error = 'File not found: ' . $file;
return -1;
}
// Check if XML (Datanorm 5.0)
$content = file_get_contents($file, false, null, 0, 1000);
if (strpos($content, '<?xml') !== false || strpos($content, '<DATANORM') !== false) {
$this->version = '5.0';
return $this->parseDatanorm5File($file);
}
// Assume Datanorm 4.0
$this->version = '4.0';
return $this->parseDatanorm4File($file);
}
/**
* Parse Datanorm 4.0 file (fixed-width format)
* Uses streaming to handle large files
*
* @param string $file File path
* @return int Number of articles parsed
*/
protected function parseDatanorm4File($file)
{
$handle = fopen($file, 'r');
if ($handle === false) {
$this->error = 'Cannot read file: ' . $file;
return -1;
}
$count = 0;
$currentArticle = null;
while (($line = fgets($handle)) !== false) {
$line = rtrim($line, "\r\n");
// Convert encoding if needed (Datanorm 4 often uses ISO-8859-1 or CP850)
if (!mb_check_encoding($line, 'UTF-8')) {
$line = mb_convert_encoding($line, 'UTF-8', 'ISO-8859-1');
}
if (strlen($line) < 2) {
continue;
}
$recordType = substr($line, 0, 1);
switch ($recordType) {
case 'A':
// Article master record
$article = $this->parseDatanorm4TypeA($line);
if ($article) {
$this->addArticle($article);
$currentArticle = $article['article_number'];
$count++;
}
break;
case 'B':
// Article info/long text
if ($currentArticle) {
$this->parseDatanorm4TypeB($line, $currentArticle);
}
break;
case 'P':
// Price record
$this->parseDatanorm4TypeP($line);
break;
}
}
fclose($handle);
// Flush any remaining batch in streaming mode
if ($this->streamingMode) {
$this->flushBatch();
} else {
// Merge prices into articles (only in non-streaming mode)
$this->mergePricesIntoArticles();
}
return $count;
}
/**
* Parse Datanorm 4.0 Type A record (Article master)
* Field positions based on Datanorm 4.0 specification
*
* @param string $line Record line
* @return array|null Article data
*/
protected function parseDatanorm4TypeA($line)
{
// Minimum length check
if (strlen($line) < 50) {
return null;
}
// Datanorm 4.0 Type A field layout (semicolon-separated in newer versions)
if (strpos($line, ';') !== false) {
return $this->parseDatanorm4TypeASemicolon($line);
}
// Fixed-width format (classic Datanorm 3.0/4.0)
// PE code is at position 112-116 and is a CODE (0=1, 1=10, 2=100, 3=1000)
$peCode = (int)trim(substr($line, 111, 5));
$priceUnit = self::convertPriceUnitCode($peCode);
$article = array(
'article_number' => trim(substr($line, 1, 15)), // Pos 2-16: Artikelnummer
'action_code' => 'N', // Fixed-width format has no action code
'matchcode' => trim(substr($line, 16, 12)), // Pos 17-28: Matchcode
'short_text1' => trim(substr($line, 28, 40)), // Pos 29-68: Kurztext 1
'short_text2' => trim(substr($line, 68, 40)), // Pos 69-108: Kurztext 2
'unit_code' => trim(substr($line, 108, 3)), // Pos 109-111: Mengeneinheit
'price_unit' => $priceUnit, // Converted from PE code
'price_unit_code' => $peCode, // Original PE code
'discount_group' => trim(substr($line, 116, 4)), // Pos 117-120: Rabattgruppe
'product_group' => trim(substr($line, 120, 7)), // Pos 121-127: Warengruppe
'manufacturer_ref' => trim(substr($line, 127, 15)), // Pos 128-142: Hersteller-Artikelnummer
'manufacturer_name' => trim(substr($line, 142, 20)), // Pos 143-162: Herstellername
'ean' => '',
'long_text' => '',
'price' => 0,
);
// EAN if available (extended format)
if (strlen($line) >= 175) {
$article['ean'] = trim(substr($line, 162, 13));
}
if (empty($article['article_number'])) {
return null;
}
return $article;
}
/**
* Parse Datanorm 4.0 Type A record (semicolon-separated format)
*
* @param string $line Record line
* @return array|null Article data
*/
protected function parseDatanorm4TypeASemicolon($line)
{
$parts = explode(';', $line);
if (count($parts) < 6) {
return null;
}
// Detect format variant
// Sonepar format: A;N;ArtNr;TextKz;Kurztext1;Kurztext2;PreisKz;PE;ME;Preis;RabGrp;WG;...
// Index: 0 1 2 3 4 5 6 7 8 9 10 11
// Standard format: A;ArtNr;Matchcode;Kurztext1;Kurztext2;ME;PE;RabGrp;WG;...
$firstField = trim($parts[0] ?? '');
if ($firstField === 'A' && isset($parts[1]) && strlen(trim($parts[1])) <= 2) {
// Sonepar format with action code (N=New, L=Delete, A=Update)
// A;N;ArtNr;TextKz;Kurztext1;Kurztext2;PreisKz;PE;ME;Preis;RabGrp;WG;LangTextKey
// PE is at index 7 and is a CODE (0=1, 1=10, 2=100, 3=1000)
$actionCode = strtoupper(trim($parts[1] ?? 'N'));
$peCode = (int)trim($parts[7] ?? '0');
$priceUnit = self::convertPriceUnitCode($peCode);
$article = array(
'article_number' => trim($parts[2] ?? ''),
'action_code' => $actionCode, // N=New, A=Update, L=Delete
'matchcode' => '', // Will be set from B record
'short_text1' => trim($parts[4] ?? ''),
'short_text2' => trim($parts[5] ?? ''),
'unit_code' => trim($parts[8] ?? ''), // ME (Mengeneinheit) at index 8
'price_unit' => $priceUnit, // Converted from PE code at index 7
'price_unit_code' => $peCode, // Original PE code for reference
'discount_group' => trim($parts[10] ?? ''), // Rabattgruppe at index 10
'product_group' => trim($parts[11] ?? ''), // Warengruppe at index 11
'price_type' => trim($parts[6] ?? ''), // Preiskennzeichen (1=Brutto, 2=Netto)
'manufacturer_ref' => '',
'manufacturer_name' => '',
'ean' => '',
'long_text' => '',
'price' => 0,
);
} else {
// Standard format: A;ArtNr;Matchcode;Kurztext1;Kurztext2;ME;PE;RabGrp;WG;...
// PE at index 6 is a CODE (0=1, 1=10, 2=100, 3=1000)
$peCode = (int)trim($parts[6] ?? '0');
$priceUnit = self::convertPriceUnitCode($peCode);
$article = array(
'article_number' => trim($parts[1] ?? ''),
'action_code' => 'N', // Default to New for standard format
'matchcode' => trim($parts[2] ?? ''),
'short_text1' => trim($parts[3] ?? ''),
'short_text2' => trim($parts[4] ?? ''),
'unit_code' => trim($parts[5] ?? ''),
'price_unit' => $priceUnit,
'price_unit_code' => $peCode,
'discount_group' => trim($parts[7] ?? ''),
'product_group' => trim($parts[8] ?? ''),
'manufacturer_ref' => trim($parts[14] ?? ''),
'manufacturer_name' => trim($parts[15] ?? ''),
'ean' => trim($parts[16] ?? ''),
'long_text' => '',
'price' => 0,
);
}
if (empty($article['article_number'])) {
return null;
}
return $article;
}
/**
* Get article reference for modification (handles both streaming and non-streaming mode)
*
* @param string $articleNumber Article number
* @return array|null Reference to article or null
*/
protected function &getArticleRef($articleNumber)
{
$null = null;
if ($this->streamingMode) {
if (isset($this->batchArticles[$articleNumber])) {
return $this->batchArticles[$articleNumber];
}
} else {
if (isset($this->articles[$articleNumber])) {
return $this->articles[$articleNumber];
}
}
return $null;
}
/**
* Parse Datanorm 4.0 Type B record (Article info/long text)
* Sonepar format: B;N;ArtNr;Matchcode; ; ;;;;EAN; ; ;0;VPE;;;
*
* @param string $line Record line
* @param string $articleNumber Current article number
*/
protected function parseDatanorm4TypeB($line, $articleNumber)
{
$article = &$this->getArticleRef($articleNumber);
if ($article === null) {
return;
}
if (strpos($line, ';') !== false) {
$parts = explode(';', $line);
// Sonepar format: B;N;ArtNr;Matchcode; ; ;...;EAN; ; ;0;VPE;;;
// Field positions can vary, so we search for EAN and VPE
if (isset($parts[1]) && strlen(trim($parts[1])) <= 2) {
// Get article number from B record to verify
$bArticleNumber = trim($parts[2] ?? '');
if ($bArticleNumber === $articleNumber) {
// Matchcode is at position 3
$matchcode = trim($parts[3] ?? '');
if (!empty($matchcode) && empty($article['matchcode'])) {
$article['matchcode'] = $matchcode;
}
// Search for EAN (13-digit numeric code) in any field
if (empty($article['ean'])) {
foreach ($parts as $part) {
$part = trim($part);
if (preg_match('/^\d{13}$/', $part)) {
$article['ean'] = $part;
break;
}
}
}
// VPE (Verpackungseinheit) in B record is the packaging quantity
// This is informational - the price unit from A record PE code is authoritative
// We store VPE separately for reference but don't override price_unit
for ($i = 12; $i <= min(15, count($parts) - 1); $i++) {
$vpe = (int)trim($parts[$i] ?? '0');
if ($vpe > 1) {
$article['vpe'] = $vpe; // Store as separate field
break;
}
}
}
} else {
// Standard format: text at position 2
$text = trim($parts[2] ?? '');
if (!empty($text)) {
if (!empty($article['long_text'])) {
$article['long_text'] .= "\n";
}
$article['long_text'] .= $text;
}
}
} else {
$text = trim(substr($line, 16));
if (!empty($text)) {
if (!empty($article['long_text'])) {
$article['long_text'] .= "\n";
}
$article['long_text'] .= $text;
}
}
}
/**
* Parse Datanorm 4.0 Type P record (Price)
*
* @param string $line Record line
*/
protected function parseDatanorm4TypeP($line)
{
if (strpos($line, ';') !== false) {
$parts = explode(';', $line);
$articleNumber = trim($parts[1] ?? '');
$priceType = trim($parts[2] ?? '');
$price = $this->parsePrice(trim($parts[3] ?? '0'));
} else {
$articleNumber = trim(substr($line, 1, 15));
$priceType = trim(substr($line, 16, 1));
$price = $this->parsePrice(trim(substr($line, 17, 12)));
}
if (!empty($articleNumber) && $price > 0) {
$this->prices[$articleNumber] = array(
'price' => $price,
'price_type' => $priceType,
);
}
}
/**
* Parse Datanorm 4.0 product groups file (DATANORM.WRG)
*
* @param string $file File path
*/
protected function parseDatanorm4Groups($file)
{
$content = file_get_contents($file);
if ($content === false) {
return;
}
if (!mb_check_encoding($content, 'UTF-8')) {
$content = mb_convert_encoding($content, 'UTF-8', 'ISO-8859-1');
}
$lines = explode("\n", $content);
foreach ($lines as $line) {
$line = rtrim($line, "\r\n");
if (strlen($line) < 10) {
continue;
}
if (strpos($line, ';') !== false) {
$parts = explode(';', $line);
$code = trim($parts[0] ?? '');
$name = trim($parts[1] ?? '');
} else {
$code = trim(substr($line, 0, 7));
$name = trim(substr($line, 7));
}
if (!empty($code)) {
$this->groups[$code] = $name;
}
}
}
/**
* Parse Datanorm 4.0 discount groups file (DATANORM.RAB)
*
* @param string $file File path
*/
protected function parseDatanorm4Discounts($file)
{
// Discount parsing - can be extended if needed
}
/**
* Parse DATPREIS.xxx price file
* Uses streaming to handle large files
*
* @param string $file File path
*/
protected function parseDatapreis4File($file)
{
$handle = fopen($file, 'r');
if ($handle === false) {
return;
}
while (($line = fgets($handle)) !== false) {
$line = rtrim($line, "\r\n");
// Convert encoding if needed
if (!mb_check_encoding($line, 'UTF-8')) {
$line = mb_convert_encoding($line, 'UTF-8', 'ISO-8859-1');
}
if (strlen($line) < 10) {
continue;
}
// DATPREIS format - semicolon separated
if (strpos($line, ';') !== false) {
$parts = explode(';', $line);
$recordType = trim($parts[0] ?? '');
// P;A format - multiple articles per line
// Format: P;A;ArtNr;PreisKz;Preis;PE;Zuschlag;x;x;x;ArtNr2;PreisKz2;Preis2;...
// For cables: Preis = Materialpreis, Zuschlag = Metallzuschlag (copper surcharge)
// PE code from DATPREIS may differ from A-record - used for price normalization
if ($recordType === 'P' && isset($parts[1]) && $parts[1] === 'A') {
// Parse multiple price entries per line
// Each entry is: ArtNr;PreisKz;Preis;PE;Zuschlag;x;x;x;x
$i = 2; // Start after P;A
while ($i < count($parts) - 2) {
$articleNumber = trim($parts[$i] ?? '');
$priceType = trim($parts[$i + 1] ?? '');
$priceRaw = trim($parts[$i + 2] ?? '0');
$datpreisPeCode = (int)trim($parts[$i + 3] ?? '0'); // PE code from DATPREIS
$metalSurchargeRaw = trim($parts[$i + 4] ?? '0');
// Price is in cents, convert to euros
$price = (float)$priceRaw / 100;
$metalSurcharge = (float)$metalSurchargeRaw / 100;
if (!empty($articleNumber) && $price > 0) {
$this->prices[$articleNumber] = array(
'price' => $price,
'price_type' => $priceType,
'metal_surcharge' => $metalSurcharge,
'datpreis_pe_code' => $datpreisPeCode,
);
}
// Move to next article (9 fields per article: ArtNr;Kz;Preis;PE;Zuschlag;x;x;x;x)
$i += 9;
}
} elseif ($recordType === 'P' || $recordType === '0') {
// Simple format: P;ArtNr;PreisKz;Preis
$articleNumber = trim($parts[1] ?? '');
$priceType = trim($parts[2] ?? '');
$priceRaw = trim($parts[3] ?? '0');
// Check if price is in cents (no decimal point)
if (strpos($priceRaw, ',') === false && strpos($priceRaw, '.') === false) {
$price = (float)$priceRaw / 100;
} else {
$price = $this->parsePrice($priceRaw);
}
if (!empty($articleNumber) && $price > 0) {
$this->prices[$articleNumber] = array(
'price' => $price,
'price_type' => $priceType,
);
}
}
} else {
// Fixed width format
$recordType = substr($line, 0, 1);
if ($recordType === 'P' || $recordType === '0') {
$articleNumber = trim(substr($line, 1, 15));
$priceType = trim(substr($line, 16, 1));
$priceRaw = trim(substr($line, 17, 12));
// Check if price is in cents
if (strpos($priceRaw, ',') === false && strpos($priceRaw, '.') === false) {
$price = (float)$priceRaw / 100;
} else {
$price = $this->parsePrice($priceRaw);
}
if (!empty($articleNumber) && $price > 0) {
$this->prices[$articleNumber] = array(
'price' => $price,
'price_type' => $priceType,
);
}
}
}
}
fclose($handle);
}
/**
* Merge prices into articles
* DATPREIS prices are already for the A-Satz PE unit - no normalization needed!
*/
protected function mergePricesIntoArticles()
{
foreach ($this->prices as $articleNumber => $priceData) {
if (isset($this->articles[$articleNumber])) {
$this->articles[$articleNumber]['price'] = $priceData['price'];
if (!empty($priceData['metal_surcharge'])) {
$this->articles[$articleNumber]['metal_surcharge'] = $priceData['metal_surcharge'];
}
}
}
}
/**
* Check if file is Datanorm 5.0 format
*
* @param string $file File path
* @return bool
*/
protected function isDatanorm5File($file)
{
$content = file_get_contents($file, false, null, 0, 2000);
return (strpos($content, '<DATANORM') !== false || strpos($content, '<datanorm') !== false);
}
/**
* Parse Datanorm 5.0 file (XML format)
*
* @param string $file File path
* @return int Number of articles parsed
*/
protected function parseDatanorm5File($file)
{
libxml_use_internal_errors(true);
$xml = simplexml_load_file($file);
if ($xml === false) {
$errors = libxml_get_errors();
$this->error = 'XML parse error: ' . ($errors[0]->message ?? 'Unknown error');
libxml_clear_errors();
return -1;
}
$count = 0;
// Register namespaces if present
$namespaces = $xml->getNamespaces(true);
// Find article nodes (various possible node names)
$articleNodes = $xml->xpath('//Artikel') ?: $xml->xpath('//Article') ?: $xml->xpath('//article') ?: array();
foreach ($articleNodes as $node) {
$article = $this->parseDatanorm5Article($node);
if ($article) {
$this->articles[$article['article_number']] = $article;
$count++;
}
}
return $count;
}
/**
* Parse Datanorm 5.0 article node
*
* @param SimpleXMLElement $node Article XML node
* @return array|null Article data
*/
protected function parseDatanorm5Article($node)
{
$article = array(
'article_number' => $this->getXmlValue($node, array('Artikelnummer', 'ArticleNumber', 'ArtNr', 'artNr')),
'matchcode' => $this->getXmlValue($node, array('Matchcode', 'matchcode')),
'short_text1' => $this->getXmlValue($node, array('Kurztext1', 'Kurztext', 'ShortText1', 'ShortText', 'Bezeichnung', 'Name')),
'short_text2' => $this->getXmlValue($node, array('Kurztext2', 'ShortText2')),
'long_text' => $this->getXmlValue($node, array('Langtext', 'LongText', 'Beschreibung', 'Description')),
'unit_code' => $this->getXmlValue($node, array('Mengeneinheit', 'Unit', 'ME')),
'price_unit' => (int)$this->getXmlValue($node, array('Preiseinheit', 'PriceUnit', 'PE')) ?: 1,
'price' => $this->parsePrice($this->getXmlValue($node, array('Preis', 'Price', 'Listenpreis', 'ListPrice'))),
'discount_group' => $this->getXmlValue($node, array('Rabattgruppe', 'DiscountGroup', 'RG')),
'product_group' => $this->getXmlValue($node, array('Warengruppe', 'ProductGroup', 'WG')),
'manufacturer_ref' => $this->getXmlValue($node, array('HerstellerArtNr', 'ManufacturerArticleNumber')),
'manufacturer_name' => $this->getXmlValue($node, array('Hersteller', 'Manufacturer')),
'ean' => $this->getXmlValue($node, array('EAN', 'GTIN', 'Barcode')),
);
if (empty($article['article_number'])) {
return null;
}
return $article;
}
/**
* Get value from XML node trying multiple possible element names
*
* @param SimpleXMLElement $node XML node
* @param array $names Possible element names
* @return string Value or empty string
*/
protected function getXmlValue($node, $names)
{
foreach ($names as $name) {
// Try as child element
if (isset($node->$name)) {
return trim((string)$node->$name);
}
// Try as attribute
if (isset($node[$name])) {
return trim((string)$node[$name]);
}
}
return '';
}
/**
* Parse price string to float
*
* @param string $priceStr Price string
* @return float Price value
*/
protected function parsePrice($priceStr)
{
if (empty($priceStr)) {
return 0.0;
}
// Remove currency symbols and whitespace
$priceStr = preg_replace('/[^\d,.\-]/', '', $priceStr);
// Handle German number format (1.234,56)
if (preg_match('/^\d{1,3}(\.\d{3})*,\d{2}$/', $priceStr)) {
$priceStr = str_replace('.', '', $priceStr);
$priceStr = str_replace(',', '.', $priceStr);
} elseif (strpos($priceStr, ',') !== false && strpos($priceStr, '.') === false) {
// Simple comma as decimal separator
$priceStr = str_replace(',', '.', $priceStr);
}
return (float)$priceStr;
}
/**
* Convert Datanorm unit code to UN/ECE code
*
* @param string $datanormUnit Datanorm unit code
* @return string UN/ECE unit code
*/
public static function convertUnitCode($datanormUnit)
{
$mapping = array(
'ST' => 'C62', // Stück
'STK' => 'C62', // Stück
'PCE' => 'C62', // Piece
'M' => 'MTR', // Meter
'MTR' => 'MTR', // Meter
'CM' => 'CMT', // Zentimeter
'MM' => 'MMT', // Millimeter
'L' => 'LTR', // Liter
'LTR' => 'LTR', // Liter
'KG' => 'KGM', // Kilogramm
'G' => 'GRM', // Gramm
'M2' => 'MTK', // Quadratmeter
'M3' => 'MTQ', // Kubikmeter
'PAK' => 'PK', // Packung
'PAC' => 'PK', // Package
'SET' => 'SET', // Set
'ROL' => 'RL', // Rolle
'RLL' => 'RL', // Roll
'BDL' => 'BE', // Bündel
'KRT' => 'CT', // Karton
'CTN' => 'CT', // Carton
);
$unit = strtoupper(trim($datanormUnit));
return $mapping[$unit] ?? 'C62'; // Default to piece
}
/**
* Get all parsed articles
*
* @return array Articles
*/
public function getArticles()
{
return $this->articles;
}
/**
* Find article by number
*
* @param string $articleNumber Article number to find
* @return array|null Article data or null
*/
public function findArticle($articleNumber)
{
return $this->articles[$articleNumber] ?? null;
}
/**
* Search articles by text
*
* @param string $searchText Search text
* @param int $limit Maximum results
* @return array Matching articles
*/
public function searchArticles($searchText, $limit = 50)
{
$results = array();
$searchText = strtolower($searchText);
foreach ($this->articles as $article) {
$searchFields = strtolower(
$article['article_number'] . ' ' .
$article['matchcode'] . ' ' .
$article['short_text1'] . ' ' .
$article['short_text2'] . ' ' .
$article['ean'] . ' ' .
$article['manufacturer_ref']
);
if (strpos($searchFields, $searchText) !== false) {
$results[] = $article;
if (count($results) >= $limit) {
break;
}
}
}
return $results;
}
}