909 lines
29 KiB
PHP
909 lines
29 KiB
PHP
<?php
|
|
/* Copyright (C) 2026 Eduard Wisch <data@data-it-solution.de>
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*/
|
|
|
|
/**
|
|
* \file class/datanormparser.class.php
|
|
* \ingroup importzugferd
|
|
* \brief Parser for Datanorm 4.0 and 5.0 catalog files
|
|
*/
|
|
|
|
/**
|
|
* Class DatanormParser
|
|
* Parses Datanorm catalog files (Version 4.0 and 5.0)
|
|
*/
|
|
class DatanormParser
|
|
{
|
|
/**
|
|
* @var string Detected Datanorm version
|
|
*/
|
|
public $version = '';
|
|
|
|
/**
|
|
* @var array Parsed articles (only used for small imports)
|
|
*/
|
|
public $articles = array();
|
|
|
|
/**
|
|
* @var array Parsed price information
|
|
*/
|
|
public $prices = array();
|
|
|
|
/**
|
|
* @var array Product groups/categories
|
|
*/
|
|
public $groups = array();
|
|
|
|
/**
|
|
* @var string Error message
|
|
*/
|
|
public $error = '';
|
|
|
|
/**
|
|
* @var array Error messages
|
|
*/
|
|
public $errors = array();
|
|
|
|
/**
|
|
* @var callable Callback for batch processing articles
|
|
*/
|
|
protected $batchCallback = null;
|
|
|
|
/**
|
|
* @var int Batch size for database inserts
|
|
*/
|
|
protected $batchSize = 1000;
|
|
|
|
/**
|
|
* @var array Current batch of articles
|
|
*/
|
|
protected $batchArticles = array();
|
|
|
|
/**
|
|
* @var bool Whether to use streaming mode (for large files)
|
|
*/
|
|
protected $streamingMode = false;
|
|
|
|
/**
|
|
* Enable streaming mode for large files
|
|
* In streaming mode, articles are processed in batches via callback
|
|
*
|
|
* @param callable $callback Function to call with batch of articles
|
|
* @param int $batchSize Number of articles per batch
|
|
*/
|
|
public function enableStreaming($callback, $batchSize = 1000)
|
|
{
|
|
$this->streamingMode = true;
|
|
$this->batchCallback = $callback;
|
|
$this->batchSize = $batchSize;
|
|
$this->batchArticles = array();
|
|
}
|
|
|
|
/**
|
|
* Disable streaming mode
|
|
*/
|
|
public function disableStreaming()
|
|
{
|
|
$this->streamingMode = false;
|
|
$this->batchCallback = null;
|
|
$this->batchArticles = array();
|
|
}
|
|
|
|
/**
|
|
* Add article to batch (streaming mode) or to articles array
|
|
*
|
|
* @param array $article Article data
|
|
*/
|
|
protected function addArticle($article)
|
|
{
|
|
if ($this->streamingMode && $this->batchCallback) {
|
|
$this->batchArticles[$article['article_number']] = $article;
|
|
|
|
if (count($this->batchArticles) >= $this->batchSize) {
|
|
$this->flushBatch();
|
|
}
|
|
} else {
|
|
$this->articles[$article['article_number']] = $article;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Flush current batch to callback
|
|
*/
|
|
protected function flushBatch()
|
|
{
|
|
if (!empty($this->batchArticles) && $this->batchCallback) {
|
|
// Merge prices into batch articles before flushing
|
|
foreach ($this->batchArticles as $artNum => &$article) {
|
|
if (isset($this->prices[$artNum])) {
|
|
$article['price'] = $this->prices[$artNum]['price'];
|
|
unset($this->prices[$artNum]); // Free memory
|
|
}
|
|
}
|
|
unset($article);
|
|
|
|
call_user_func($this->batchCallback, $this->batchArticles);
|
|
$this->batchArticles = array();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parse a Datanorm file or directory
|
|
*
|
|
* @param string $path Path to file or directory
|
|
* @return int Number of articles parsed, -1 on error
|
|
*/
|
|
public function parse($path)
|
|
{
|
|
if (is_dir($path)) {
|
|
return $this->parseDirectory($path);
|
|
} else {
|
|
return $this->parseFile($path);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parse all Datanorm files in a directory
|
|
*
|
|
* @param string $dir Directory path
|
|
* @return int Number of articles parsed, -1 on error
|
|
*/
|
|
public function parseDirectory($dir)
|
|
{
|
|
$totalArticles = 0;
|
|
|
|
// For non-streaming mode, load prices first
|
|
// For streaming mode with very large files, prices must be handled separately
|
|
if (!$this->streamingMode) {
|
|
$priceFiles = glob($dir . '/DATPREIS.*');
|
|
if (!empty($priceFiles)) {
|
|
$this->version = '4.0';
|
|
foreach ($priceFiles as $file) {
|
|
$ext = strtoupper(pathinfo($file, PATHINFO_EXTENSION));
|
|
if (preg_match('/^\d{3}$/', $ext)) {
|
|
$this->parseDatapreis4File($file);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Look for Datanorm 4.0 files (DATANORM.xxx)
|
|
$files = glob($dir . '/DATANORM.*');
|
|
if (!empty($files)) {
|
|
$this->version = '4.0';
|
|
foreach ($files as $file) {
|
|
$ext = strtoupper(pathinfo($file, PATHINFO_EXTENSION));
|
|
if (preg_match('/^\d{3}$/', $ext)) {
|
|
// Main article file (DATANORM.001, etc.)
|
|
$count = $this->parseDatanorm4File($file);
|
|
if ($count > 0) {
|
|
$totalArticles += $count;
|
|
}
|
|
} elseif ($ext === 'WRG') {
|
|
// Product groups file
|
|
$this->parseDatanorm4Groups($file);
|
|
} elseif ($ext === 'RAB') {
|
|
// Discount groups file
|
|
$this->parseDatanorm4Discounts($file);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Merge prices into articles (non-streaming mode only)
|
|
// In streaming mode, prices are merged in flushBatch()
|
|
if (!$this->streamingMode && !empty($this->prices)) {
|
|
$this->mergePricesIntoArticles();
|
|
}
|
|
|
|
// Look for Datanorm 5.0 files (*.xml)
|
|
$xmlFiles = glob($dir . '/*.xml');
|
|
foreach ($xmlFiles as $file) {
|
|
if ($this->isDatanorm5File($file)) {
|
|
$this->version = '5.0';
|
|
$count = $this->parseDatanorm5File($file);
|
|
if ($count > 0) {
|
|
$totalArticles += $count;
|
|
}
|
|
}
|
|
}
|
|
|
|
return $totalArticles;
|
|
}
|
|
|
|
/**
|
|
* Parse a single file (auto-detect format)
|
|
*
|
|
* @param string $file File path
|
|
* @return int Number of articles parsed, -1 on error
|
|
*/
|
|
public function parseFile($file)
|
|
{
|
|
if (!file_exists($file)) {
|
|
$this->error = 'File not found: ' . $file;
|
|
return -1;
|
|
}
|
|
|
|
// Check if XML (Datanorm 5.0)
|
|
$content = file_get_contents($file, false, null, 0, 1000);
|
|
if (strpos($content, '<?xml') !== false || strpos($content, '<DATANORM') !== false) {
|
|
$this->version = '5.0';
|
|
return $this->parseDatanorm5File($file);
|
|
}
|
|
|
|
// Assume Datanorm 4.0
|
|
$this->version = '4.0';
|
|
return $this->parseDatanorm4File($file);
|
|
}
|
|
|
|
/**
|
|
* Parse Datanorm 4.0 file (fixed-width format)
|
|
* Uses streaming to handle large files
|
|
*
|
|
* @param string $file File path
|
|
* @return int Number of articles parsed
|
|
*/
|
|
protected function parseDatanorm4File($file)
|
|
{
|
|
$handle = fopen($file, 'r');
|
|
if ($handle === false) {
|
|
$this->error = 'Cannot read file: ' . $file;
|
|
return -1;
|
|
}
|
|
|
|
$count = 0;
|
|
$currentArticle = null;
|
|
|
|
while (($line = fgets($handle)) !== false) {
|
|
$line = rtrim($line, "\r\n");
|
|
|
|
// Convert encoding if needed (Datanorm 4 often uses ISO-8859-1 or CP850)
|
|
if (!mb_check_encoding($line, 'UTF-8')) {
|
|
$line = mb_convert_encoding($line, 'UTF-8', 'ISO-8859-1');
|
|
}
|
|
|
|
if (strlen($line) < 2) {
|
|
continue;
|
|
}
|
|
|
|
$recordType = substr($line, 0, 1);
|
|
|
|
switch ($recordType) {
|
|
case 'A':
|
|
// Article master record
|
|
$article = $this->parseDatanorm4TypeA($line);
|
|
if ($article) {
|
|
$this->addArticle($article);
|
|
$currentArticle = $article['article_number'];
|
|
$count++;
|
|
}
|
|
break;
|
|
|
|
case 'B':
|
|
// Article info/long text
|
|
if ($currentArticle) {
|
|
$this->parseDatanorm4TypeB($line, $currentArticle);
|
|
}
|
|
break;
|
|
|
|
case 'P':
|
|
// Price record
|
|
$this->parseDatanorm4TypeP($line);
|
|
break;
|
|
}
|
|
}
|
|
|
|
fclose($handle);
|
|
|
|
// Flush any remaining batch in streaming mode
|
|
if ($this->streamingMode) {
|
|
$this->flushBatch();
|
|
} else {
|
|
// Merge prices into articles (only in non-streaming mode)
|
|
$this->mergePricesIntoArticles();
|
|
}
|
|
|
|
return $count;
|
|
}
|
|
|
|
/**
|
|
* Parse Datanorm 4.0 Type A record (Article master)
|
|
* Field positions based on Datanorm 4.0 specification
|
|
*
|
|
* @param string $line Record line
|
|
* @return array|null Article data
|
|
*/
|
|
protected function parseDatanorm4TypeA($line)
|
|
{
|
|
// Minimum length check
|
|
if (strlen($line) < 50) {
|
|
return null;
|
|
}
|
|
|
|
// Datanorm 4.0 Type A field layout (semicolon-separated in newer versions)
|
|
if (strpos($line, ';') !== false) {
|
|
return $this->parseDatanorm4TypeASemicolon($line);
|
|
}
|
|
|
|
// Fixed-width format (classic)
|
|
$article = array(
|
|
'article_number' => trim(substr($line, 1, 15)), // Pos 2-16: Artikelnummer
|
|
'matchcode' => trim(substr($line, 16, 12)), // Pos 17-28: Matchcode
|
|
'short_text1' => trim(substr($line, 28, 40)), // Pos 29-68: Kurztext 1
|
|
'short_text2' => trim(substr($line, 68, 40)), // Pos 69-108: Kurztext 2
|
|
'unit_code' => trim(substr($line, 108, 3)), // Pos 109-111: Mengeneinheit
|
|
'price_unit' => (int)trim(substr($line, 111, 5)), // Pos 112-116: Preiseinheit
|
|
'discount_group' => trim(substr($line, 116, 4)), // Pos 117-120: Rabattgruppe
|
|
'product_group' => trim(substr($line, 120, 7)), // Pos 121-127: Warengruppe
|
|
'manufacturer_ref' => trim(substr($line, 127, 15)), // Pos 128-142: Hersteller-Artikelnummer
|
|
'manufacturer_name' => trim(substr($line, 142, 20)), // Pos 143-162: Herstellername
|
|
'ean' => '',
|
|
'long_text' => '',
|
|
'price' => 0,
|
|
);
|
|
|
|
// EAN if available (extended format)
|
|
if (strlen($line) >= 175) {
|
|
$article['ean'] = trim(substr($line, 162, 13));
|
|
}
|
|
|
|
if (empty($article['article_number'])) {
|
|
return null;
|
|
}
|
|
|
|
// Default price unit to 1 if not set
|
|
if ($article['price_unit'] <= 0) {
|
|
$article['price_unit'] = 1;
|
|
}
|
|
|
|
return $article;
|
|
}
|
|
|
|
/**
|
|
* Parse Datanorm 4.0 Type A record (semicolon-separated format)
|
|
*
|
|
* @param string $line Record line
|
|
* @return array|null Article data
|
|
*/
|
|
protected function parseDatanorm4TypeASemicolon($line)
|
|
{
|
|
$parts = explode(';', $line);
|
|
|
|
if (count($parts) < 6) {
|
|
return null;
|
|
}
|
|
|
|
// Detect format variant
|
|
// Sonepar format: A;N;ArtNr;WG;Kurztext1;Kurztext2;PE;ME;METext;RabGrp;PreisGrp;WG2;...
|
|
// Standard format: A;ArtNr;Matchcode;Kurztext1;Kurztext2;ME;PE;RabGrp;WG;...
|
|
|
|
$firstField = trim($parts[0] ?? '');
|
|
|
|
if ($firstField === 'A' && isset($parts[1]) && strlen(trim($parts[1])) <= 2) {
|
|
// Sonepar format: A;N;ArtNr;WG;Kurztext1;Kurztext2;PE;ME;METext;RabGrp;PreisGrp;WG2;...
|
|
$article = array(
|
|
'article_number' => trim($parts[2] ?? ''),
|
|
'matchcode' => '', // Will be set from B record
|
|
'short_text1' => trim($parts[4] ?? ''),
|
|
'short_text2' => trim($parts[5] ?? ''),
|
|
'unit_code' => trim($parts[8] ?? trim($parts[7] ?? '')), // METext or ME
|
|
'price_unit' => (int)trim($parts[6] ?? '1'), // PE
|
|
'discount_group' => trim($parts[9] ?? ''),
|
|
'product_group' => trim($parts[3] ?? ''), // WG at position 3
|
|
'manufacturer_ref' => '',
|
|
'manufacturer_name' => '',
|
|
'ean' => '',
|
|
'long_text' => '',
|
|
'price' => 0,
|
|
);
|
|
} else {
|
|
// Standard format
|
|
$article = array(
|
|
'article_number' => trim($parts[1] ?? ''),
|
|
'matchcode' => trim($parts[2] ?? ''),
|
|
'short_text1' => trim($parts[3] ?? ''),
|
|
'short_text2' => trim($parts[4] ?? ''),
|
|
'unit_code' => trim($parts[5] ?? ''),
|
|
'price_unit' => (int)trim($parts[6] ?? '1'),
|
|
'discount_group' => trim($parts[7] ?? ''),
|
|
'product_group' => trim($parts[8] ?? ''),
|
|
'manufacturer_ref' => trim($parts[14] ?? ''),
|
|
'manufacturer_name' => trim($parts[15] ?? ''),
|
|
'ean' => trim($parts[16] ?? ''),
|
|
'long_text' => '',
|
|
'price' => 0,
|
|
);
|
|
}
|
|
|
|
if (empty($article['article_number'])) {
|
|
return null;
|
|
}
|
|
|
|
if ($article['price_unit'] <= 0) {
|
|
$article['price_unit'] = 1;
|
|
}
|
|
|
|
return $article;
|
|
}
|
|
|
|
/**
|
|
* Get article reference for modification (handles both streaming and non-streaming mode)
|
|
*
|
|
* @param string $articleNumber Article number
|
|
* @return array|null Reference to article or null
|
|
*/
|
|
protected function &getArticleRef($articleNumber)
|
|
{
|
|
$null = null;
|
|
if ($this->streamingMode) {
|
|
if (isset($this->batchArticles[$articleNumber])) {
|
|
return $this->batchArticles[$articleNumber];
|
|
}
|
|
} else {
|
|
if (isset($this->articles[$articleNumber])) {
|
|
return $this->articles[$articleNumber];
|
|
}
|
|
}
|
|
return $null;
|
|
}
|
|
|
|
/**
|
|
* Parse Datanorm 4.0 Type B record (Article info/long text)
|
|
*
|
|
* @param string $line Record line
|
|
* @param string $articleNumber Current article number
|
|
*/
|
|
protected function parseDatanorm4TypeB($line, $articleNumber)
|
|
{
|
|
$article = &$this->getArticleRef($articleNumber);
|
|
if ($article === null) {
|
|
return;
|
|
}
|
|
|
|
if (strpos($line, ';') !== false) {
|
|
$parts = explode(';', $line);
|
|
|
|
// Sonepar format: B;N;ArtNr;Matchcode;...
|
|
if (isset($parts[1]) && strlen(trim($parts[1])) <= 2) {
|
|
// Get article number from B record to verify
|
|
$bArticleNumber = trim($parts[2] ?? '');
|
|
if ($bArticleNumber === $articleNumber) {
|
|
// Matchcode is at position 3
|
|
$matchcode = trim($parts[3] ?? '');
|
|
if (!empty($matchcode) && empty($article['matchcode'])) {
|
|
$article['matchcode'] = $matchcode;
|
|
}
|
|
}
|
|
} else {
|
|
// Standard format: text at position 2
|
|
$text = trim($parts[2] ?? '');
|
|
if (!empty($text)) {
|
|
if (!empty($article['long_text'])) {
|
|
$article['long_text'] .= "\n";
|
|
}
|
|
$article['long_text'] .= $text;
|
|
}
|
|
}
|
|
} else {
|
|
$text = trim(substr($line, 16));
|
|
if (!empty($text)) {
|
|
if (!empty($article['long_text'])) {
|
|
$article['long_text'] .= "\n";
|
|
}
|
|
$article['long_text'] .= $text;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parse Datanorm 4.0 Type P record (Price)
|
|
*
|
|
* @param string $line Record line
|
|
*/
|
|
protected function parseDatanorm4TypeP($line)
|
|
{
|
|
if (strpos($line, ';') !== false) {
|
|
$parts = explode(';', $line);
|
|
$articleNumber = trim($parts[1] ?? '');
|
|
$priceType = trim($parts[2] ?? '');
|
|
$price = $this->parsePrice(trim($parts[3] ?? '0'));
|
|
} else {
|
|
$articleNumber = trim(substr($line, 1, 15));
|
|
$priceType = trim(substr($line, 16, 1));
|
|
$price = $this->parsePrice(trim(substr($line, 17, 12)));
|
|
}
|
|
|
|
if (!empty($articleNumber) && $price > 0) {
|
|
$this->prices[$articleNumber] = array(
|
|
'price' => $price,
|
|
'price_type' => $priceType,
|
|
);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parse Datanorm 4.0 product groups file (DATANORM.WRG)
|
|
*
|
|
* @param string $file File path
|
|
*/
|
|
protected function parseDatanorm4Groups($file)
|
|
{
|
|
$content = file_get_contents($file);
|
|
if ($content === false) {
|
|
return;
|
|
}
|
|
|
|
if (!mb_check_encoding($content, 'UTF-8')) {
|
|
$content = mb_convert_encoding($content, 'UTF-8', 'ISO-8859-1');
|
|
}
|
|
|
|
$lines = explode("\n", $content);
|
|
foreach ($lines as $line) {
|
|
$line = rtrim($line, "\r\n");
|
|
if (strlen($line) < 10) {
|
|
continue;
|
|
}
|
|
|
|
if (strpos($line, ';') !== false) {
|
|
$parts = explode(';', $line);
|
|
$code = trim($parts[0] ?? '');
|
|
$name = trim($parts[1] ?? '');
|
|
} else {
|
|
$code = trim(substr($line, 0, 7));
|
|
$name = trim(substr($line, 7));
|
|
}
|
|
|
|
if (!empty($code)) {
|
|
$this->groups[$code] = $name;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parse Datanorm 4.0 discount groups file (DATANORM.RAB)
|
|
*
|
|
* @param string $file File path
|
|
*/
|
|
protected function parseDatanorm4Discounts($file)
|
|
{
|
|
// Discount parsing - can be extended if needed
|
|
}
|
|
|
|
/**
|
|
* Parse DATPREIS.xxx price file
|
|
* Uses streaming to handle large files
|
|
*
|
|
* @param string $file File path
|
|
*/
|
|
protected function parseDatapreis4File($file)
|
|
{
|
|
$handle = fopen($file, 'r');
|
|
if ($handle === false) {
|
|
return;
|
|
}
|
|
|
|
while (($line = fgets($handle)) !== false) {
|
|
$line = rtrim($line, "\r\n");
|
|
|
|
// Convert encoding if needed
|
|
if (!mb_check_encoding($line, 'UTF-8')) {
|
|
$line = mb_convert_encoding($line, 'UTF-8', 'ISO-8859-1');
|
|
}
|
|
|
|
if (strlen($line) < 10) {
|
|
continue;
|
|
}
|
|
|
|
// DATPREIS format - semicolon separated
|
|
if (strpos($line, ';') !== false) {
|
|
$parts = explode(';', $line);
|
|
$recordType = trim($parts[0] ?? '');
|
|
|
|
// P;A format - multiple articles per line
|
|
// Format: P;A;ArtNr;PreisKz;Preis;PE;x;x;x;x;ArtNr2;PreisKz2;Preis2;...
|
|
if ($recordType === 'P' && isset($parts[1]) && $parts[1] === 'A') {
|
|
// Parse multiple price entries per line
|
|
// Each entry is: ArtNr;PreisKz;Preis;PE;0;1;0;1;0
|
|
$i = 2; // Start after P;A
|
|
while ($i < count($parts) - 2) {
|
|
$articleNumber = trim($parts[$i] ?? '');
|
|
$priceType = trim($parts[$i + 1] ?? '');
|
|
$priceRaw = trim($parts[$i + 2] ?? '0');
|
|
|
|
// Price is in cents, convert to euros
|
|
$price = (float)$priceRaw / 100;
|
|
|
|
if (!empty($articleNumber) && $price > 0) {
|
|
$this->prices[$articleNumber] = array(
|
|
'price' => $price,
|
|
'price_type' => $priceType,
|
|
);
|
|
}
|
|
|
|
// Move to next article (9 fields per article: ArtNr;Kz;Preis;PE;0;1;0;1;0)
|
|
$i += 9;
|
|
}
|
|
} elseif ($recordType === 'P' || $recordType === '0') {
|
|
// Simple format: P;ArtNr;PreisKz;Preis
|
|
$articleNumber = trim($parts[1] ?? '');
|
|
$priceType = trim($parts[2] ?? '');
|
|
$priceRaw = trim($parts[3] ?? '0');
|
|
|
|
// Check if price is in cents (no decimal point)
|
|
if (strpos($priceRaw, ',') === false && strpos($priceRaw, '.') === false) {
|
|
$price = (float)$priceRaw / 100;
|
|
} else {
|
|
$price = $this->parsePrice($priceRaw);
|
|
}
|
|
|
|
if (!empty($articleNumber) && $price > 0) {
|
|
$this->prices[$articleNumber] = array(
|
|
'price' => $price,
|
|
'price_type' => $priceType,
|
|
);
|
|
}
|
|
}
|
|
} else {
|
|
// Fixed width format
|
|
$recordType = substr($line, 0, 1);
|
|
|
|
if ($recordType === 'P' || $recordType === '0') {
|
|
$articleNumber = trim(substr($line, 1, 15));
|
|
$priceType = trim(substr($line, 16, 1));
|
|
$priceRaw = trim(substr($line, 17, 12));
|
|
|
|
// Check if price is in cents
|
|
if (strpos($priceRaw, ',') === false && strpos($priceRaw, '.') === false) {
|
|
$price = (float)$priceRaw / 100;
|
|
} else {
|
|
$price = $this->parsePrice($priceRaw);
|
|
}
|
|
|
|
if (!empty($articleNumber) && $price > 0) {
|
|
$this->prices[$articleNumber] = array(
|
|
'price' => $price,
|
|
'price_type' => $priceType,
|
|
);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fclose($handle);
|
|
}
|
|
|
|
/**
|
|
* Merge prices into articles
|
|
*/
|
|
protected function mergePricesIntoArticles()
|
|
{
|
|
foreach ($this->prices as $articleNumber => $priceData) {
|
|
if (isset($this->articles[$articleNumber])) {
|
|
$this->articles[$articleNumber]['price'] = $priceData['price'];
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if file is Datanorm 5.0 format
|
|
*
|
|
* @param string $file File path
|
|
* @return bool
|
|
*/
|
|
protected function isDatanorm5File($file)
|
|
{
|
|
$content = file_get_contents($file, false, null, 0, 2000);
|
|
return (strpos($content, '<DATANORM') !== false || strpos($content, '<datanorm') !== false);
|
|
}
|
|
|
|
/**
|
|
* Parse Datanorm 5.0 file (XML format)
|
|
*
|
|
* @param string $file File path
|
|
* @return int Number of articles parsed
|
|
*/
|
|
protected function parseDatanorm5File($file)
|
|
{
|
|
libxml_use_internal_errors(true);
|
|
$xml = simplexml_load_file($file);
|
|
|
|
if ($xml === false) {
|
|
$errors = libxml_get_errors();
|
|
$this->error = 'XML parse error: ' . ($errors[0]->message ?? 'Unknown error');
|
|
libxml_clear_errors();
|
|
return -1;
|
|
}
|
|
|
|
$count = 0;
|
|
|
|
// Register namespaces if present
|
|
$namespaces = $xml->getNamespaces(true);
|
|
|
|
// Find article nodes (various possible node names)
|
|
$articleNodes = $xml->xpath('//Artikel') ?: $xml->xpath('//Article') ?: $xml->xpath('//article') ?: array();
|
|
|
|
foreach ($articleNodes as $node) {
|
|
$article = $this->parseDatanorm5Article($node);
|
|
if ($article) {
|
|
$this->articles[$article['article_number']] = $article;
|
|
$count++;
|
|
}
|
|
}
|
|
|
|
return $count;
|
|
}
|
|
|
|
/**
|
|
* Parse Datanorm 5.0 article node
|
|
*
|
|
* @param SimpleXMLElement $node Article XML node
|
|
* @return array|null Article data
|
|
*/
|
|
protected function parseDatanorm5Article($node)
|
|
{
|
|
$article = array(
|
|
'article_number' => $this->getXmlValue($node, array('Artikelnummer', 'ArticleNumber', 'ArtNr', 'artNr')),
|
|
'matchcode' => $this->getXmlValue($node, array('Matchcode', 'matchcode')),
|
|
'short_text1' => $this->getXmlValue($node, array('Kurztext1', 'Kurztext', 'ShortText1', 'ShortText', 'Bezeichnung', 'Name')),
|
|
'short_text2' => $this->getXmlValue($node, array('Kurztext2', 'ShortText2')),
|
|
'long_text' => $this->getXmlValue($node, array('Langtext', 'LongText', 'Beschreibung', 'Description')),
|
|
'unit_code' => $this->getXmlValue($node, array('Mengeneinheit', 'Unit', 'ME')),
|
|
'price_unit' => (int)$this->getXmlValue($node, array('Preiseinheit', 'PriceUnit', 'PE')) ?: 1,
|
|
'price' => $this->parsePrice($this->getXmlValue($node, array('Preis', 'Price', 'Listenpreis', 'ListPrice'))),
|
|
'discount_group' => $this->getXmlValue($node, array('Rabattgruppe', 'DiscountGroup', 'RG')),
|
|
'product_group' => $this->getXmlValue($node, array('Warengruppe', 'ProductGroup', 'WG')),
|
|
'manufacturer_ref' => $this->getXmlValue($node, array('HerstellerArtNr', 'ManufacturerArticleNumber')),
|
|
'manufacturer_name' => $this->getXmlValue($node, array('Hersteller', 'Manufacturer')),
|
|
'ean' => $this->getXmlValue($node, array('EAN', 'GTIN', 'Barcode')),
|
|
);
|
|
|
|
if (empty($article['article_number'])) {
|
|
return null;
|
|
}
|
|
|
|
return $article;
|
|
}
|
|
|
|
/**
|
|
* Get value from XML node trying multiple possible element names
|
|
*
|
|
* @param SimpleXMLElement $node XML node
|
|
* @param array $names Possible element names
|
|
* @return string Value or empty string
|
|
*/
|
|
protected function getXmlValue($node, $names)
|
|
{
|
|
foreach ($names as $name) {
|
|
// Try as child element
|
|
if (isset($node->$name)) {
|
|
return trim((string)$node->$name);
|
|
}
|
|
// Try as attribute
|
|
if (isset($node[$name])) {
|
|
return trim((string)$node[$name]);
|
|
}
|
|
}
|
|
return '';
|
|
}
|
|
|
|
/**
|
|
* Parse price string to float
|
|
*
|
|
* @param string $priceStr Price string
|
|
* @return float Price value
|
|
*/
|
|
protected function parsePrice($priceStr)
|
|
{
|
|
if (empty($priceStr)) {
|
|
return 0.0;
|
|
}
|
|
|
|
// Remove currency symbols and whitespace
|
|
$priceStr = preg_replace('/[^\d,.\-]/', '', $priceStr);
|
|
|
|
// Handle German number format (1.234,56)
|
|
if (preg_match('/^\d{1,3}(\.\d{3})*,\d{2}$/', $priceStr)) {
|
|
$priceStr = str_replace('.', '', $priceStr);
|
|
$priceStr = str_replace(',', '.', $priceStr);
|
|
} elseif (strpos($priceStr, ',') !== false && strpos($priceStr, '.') === false) {
|
|
// Simple comma as decimal separator
|
|
$priceStr = str_replace(',', '.', $priceStr);
|
|
}
|
|
|
|
return (float)$priceStr;
|
|
}
|
|
|
|
/**
|
|
* Convert Datanorm unit code to UN/ECE code
|
|
*
|
|
* @param string $datanormUnit Datanorm unit code
|
|
* @return string UN/ECE unit code
|
|
*/
|
|
public static function convertUnitCode($datanormUnit)
|
|
{
|
|
$mapping = array(
|
|
'ST' => 'C62', // Stück
|
|
'STK' => 'C62', // Stück
|
|
'PCE' => 'C62', // Piece
|
|
'M' => 'MTR', // Meter
|
|
'MTR' => 'MTR', // Meter
|
|
'CM' => 'CMT', // Zentimeter
|
|
'MM' => 'MMT', // Millimeter
|
|
'L' => 'LTR', // Liter
|
|
'LTR' => 'LTR', // Liter
|
|
'KG' => 'KGM', // Kilogramm
|
|
'G' => 'GRM', // Gramm
|
|
'M2' => 'MTK', // Quadratmeter
|
|
'M3' => 'MTQ', // Kubikmeter
|
|
'PAK' => 'PK', // Packung
|
|
'PAC' => 'PK', // Package
|
|
'SET' => 'SET', // Set
|
|
'ROL' => 'RL', // Rolle
|
|
'RLL' => 'RL', // Roll
|
|
'BDL' => 'BE', // Bündel
|
|
'KRT' => 'CT', // Karton
|
|
'CTN' => 'CT', // Carton
|
|
);
|
|
|
|
$unit = strtoupper(trim($datanormUnit));
|
|
return $mapping[$unit] ?? 'C62'; // Default to piece
|
|
}
|
|
|
|
/**
|
|
* Get all parsed articles
|
|
*
|
|
* @return array Articles
|
|
*/
|
|
public function getArticles()
|
|
{
|
|
return $this->articles;
|
|
}
|
|
|
|
/**
|
|
* Find article by number
|
|
*
|
|
* @param string $articleNumber Article number to find
|
|
* @return array|null Article data or null
|
|
*/
|
|
public function findArticle($articleNumber)
|
|
{
|
|
return $this->articles[$articleNumber] ?? null;
|
|
}
|
|
|
|
/**
|
|
* Search articles by text
|
|
*
|
|
* @param string $searchText Search text
|
|
* @param int $limit Maximum results
|
|
* @return array Matching articles
|
|
*/
|
|
public function searchArticles($searchText, $limit = 50)
|
|
{
|
|
$results = array();
|
|
$searchText = strtolower($searchText);
|
|
|
|
foreach ($this->articles as $article) {
|
|
$searchFields = strtolower(
|
|
$article['article_number'] . ' ' .
|
|
$article['matchcode'] . ' ' .
|
|
$article['short_text1'] . ' ' .
|
|
$article['short_text2'] . ' ' .
|
|
$article['ean'] . ' ' .
|
|
$article['manufacturer_ref']
|
|
);
|
|
|
|
if (strpos($searchFields, $searchText) !== false) {
|
|
$results[] = $article;
|
|
if (count($results) >= $limit) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return $results;
|
|
}
|
|
}
|