1006 lines
34 KiB
PHP
1006 lines
34 KiB
PHP
<?php
|
|
/* Copyright (C) 2026 Eduard Wisch <data@data-it-solution.de>
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*/
|
|
|
|
/**
|
|
* \file class/datanormparser.class.php
|
|
* \ingroup importzugferd
|
|
* \brief Parser for Datanorm 4.0 and 5.0 catalog files
|
|
*/
|
|
|
|
/**
|
|
* Class DatanormParser
|
|
* Parses Datanorm catalog files (Version 4.0 and 5.0)
|
|
*
|
|
* Datanorm Price Unit (PE) Codes:
|
|
* 0 or empty = per 1 piece
|
|
* 1 = per 10 pieces
|
|
* 2 = per 100 pieces
|
|
* 3 = per 1000 pieces
|
|
*
|
|
* The price in Datanorm is given for the quantity specified by the PE code.
|
|
* To get the unit price: divide price by PE quantity.
|
|
*/
|
|
class DatanormParser
|
|
{
|
|
/**
|
|
* Price unit code mapping
|
|
* Datanorm uses codes 0-3 to represent price units
|
|
*/
|
|
const PRICE_UNIT_CODES = array(
|
|
0 => 1,
|
|
1 => 10,
|
|
2 => 100,
|
|
3 => 1000,
|
|
);
|
|
|
|
/**
|
|
* Convert Datanorm PE code to actual quantity
|
|
*
|
|
* @param int|string $peCode The PE code from Datanorm (0, 1, 2, or 3)
|
|
* @return int The actual quantity (1, 10, 100, or 1000)
|
|
*/
|
|
public static function convertPriceUnitCode($peCode)
|
|
{
|
|
$code = (int)$peCode;
|
|
return self::PRICE_UNIT_CODES[$code] ?? 1;
|
|
}
|
|
|
|
/**
|
|
* @var string Detected Datanorm version
|
|
*/
|
|
public $version = '';
|
|
|
|
/**
|
|
* @var array Parsed articles (only used for small imports)
|
|
*/
|
|
public $articles = array();
|
|
|
|
/**
|
|
* @var array Parsed price information
|
|
*/
|
|
public $prices = array();
|
|
|
|
/**
|
|
* @var array Product groups/categories
|
|
*/
|
|
public $groups = array();
|
|
|
|
/**
|
|
* @var string Error message
|
|
*/
|
|
public $error = '';
|
|
|
|
/**
|
|
* @var array Error messages
|
|
*/
|
|
public $errors = array();
|
|
|
|
/**
|
|
* @var callable Callback for batch processing articles
|
|
*/
|
|
protected $batchCallback = null;
|
|
|
|
/**
|
|
* @var int Batch size for database inserts
|
|
*/
|
|
protected $batchSize = 1000;
|
|
|
|
/**
|
|
* @var array Current batch of articles
|
|
*/
|
|
protected $batchArticles = array();
|
|
|
|
/**
|
|
* @var bool Whether to use streaming mode (for large files)
|
|
*/
|
|
protected $streamingMode = false;
|
|
|
|
/**
|
|
* Enable streaming mode for large files
|
|
* In streaming mode, articles are processed in batches via callback
|
|
*
|
|
* @param callable $callback Function to call with batch of articles
|
|
* @param int $batchSize Number of articles per batch
|
|
*/
|
|
public function enableStreaming($callback, $batchSize = 1000)
|
|
{
|
|
$this->streamingMode = true;
|
|
$this->batchCallback = $callback;
|
|
$this->batchSize = $batchSize;
|
|
$this->batchArticles = array();
|
|
}
|
|
|
|
/**
|
|
* Disable streaming mode
|
|
*/
|
|
public function disableStreaming()
|
|
{
|
|
$this->streamingMode = false;
|
|
$this->batchCallback = null;
|
|
$this->batchArticles = array();
|
|
}
|
|
|
|
/**
|
|
* Add article to batch (streaming mode) or to articles array
|
|
*
|
|
* @param array $article Article data
|
|
*/
|
|
protected function addArticle($article)
|
|
{
|
|
if ($this->streamingMode && $this->batchCallback) {
|
|
$this->batchArticles[$article['article_number']] = $article;
|
|
|
|
if (count($this->batchArticles) >= $this->batchSize) {
|
|
$this->flushBatch();
|
|
}
|
|
} else {
|
|
$this->articles[$article['article_number']] = $article;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Flush current batch to callback
|
|
*/
|
|
protected function flushBatch()
|
|
{
|
|
if (!empty($this->batchArticles) && $this->batchCallback) {
|
|
// Merge prices into batch articles before flushing
|
|
foreach ($this->batchArticles as $artNum => &$article) {
|
|
if (isset($this->prices[$artNum])) {
|
|
$article['price'] = $this->prices[$artNum]['price'];
|
|
if (!empty($this->prices[$artNum]['metal_surcharge'])) {
|
|
$article['metal_surcharge'] = $this->prices[$artNum]['metal_surcharge'];
|
|
}
|
|
unset($this->prices[$artNum]); // Free memory
|
|
}
|
|
}
|
|
unset($article);
|
|
|
|
call_user_func($this->batchCallback, $this->batchArticles);
|
|
$this->batchArticles = array();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parse a Datanorm file or directory
|
|
*
|
|
* @param string $path Path to file or directory
|
|
* @return int Number of articles parsed, -1 on error
|
|
*/
|
|
public function parse($path)
|
|
{
|
|
if (is_dir($path)) {
|
|
return $this->parseDirectory($path);
|
|
} else {
|
|
return $this->parseFile($path);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parse all Datanorm files in a directory
|
|
*
|
|
* @param string $dir Directory path
|
|
* @return int Number of articles parsed, -1 on error
|
|
*/
|
|
public function parseDirectory($dir)
|
|
{
|
|
$totalArticles = 0;
|
|
|
|
// Use case-insensitive search for Linux compatibility
|
|
$allFiles = glob($dir . '/*');
|
|
|
|
// For non-streaming mode, load prices first into memory
|
|
// For streaming mode, prices are updated via second pass directly to DB
|
|
if (!$this->streamingMode) {
|
|
$priceFiles = array();
|
|
foreach ($allFiles as $file) {
|
|
$basename = strtoupper(basename($file));
|
|
if (preg_match('/^DATPREIS\.\d{3}$/', $basename)) {
|
|
$priceFiles[] = $file;
|
|
}
|
|
}
|
|
if (!empty($priceFiles)) {
|
|
$this->version = '4.0';
|
|
foreach ($priceFiles as $file) {
|
|
$this->parseDatapreis4File($file);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Look for Datanorm 4.0 files (DATANORM.xxx) - case-insensitive
|
|
$datanormFiles = array();
|
|
$wrgFiles = array();
|
|
$rabFiles = array();
|
|
foreach ($allFiles as $file) {
|
|
$basename = strtoupper(basename($file));
|
|
if (preg_match('/^DATANORM\.\d{3}$/', $basename)) {
|
|
$datanormFiles[] = $file;
|
|
} elseif ($basename === 'DATANORM.WRG') {
|
|
$wrgFiles[] = $file;
|
|
} elseif ($basename === 'DATANORM.RAB') {
|
|
$rabFiles[] = $file;
|
|
}
|
|
}
|
|
if (!empty($datanormFiles)) {
|
|
$this->version = '4.0';
|
|
foreach ($datanormFiles as $file) {
|
|
$count = $this->parseDatanorm4File($file);
|
|
if ($count > 0) {
|
|
$totalArticles += $count;
|
|
}
|
|
}
|
|
foreach ($wrgFiles as $file) {
|
|
$this->parseDatanorm4Groups($file);
|
|
}
|
|
foreach ($rabFiles as $file) {
|
|
$this->parseDatanorm4Discounts($file);
|
|
}
|
|
}
|
|
|
|
// Merge prices into articles (non-streaming mode only)
|
|
// In streaming mode, prices are merged in flushBatch()
|
|
if (!$this->streamingMode && !empty($this->prices)) {
|
|
$this->mergePricesIntoArticles();
|
|
}
|
|
|
|
// Look for Datanorm 5.0 files (*.xml)
|
|
$xmlFiles = glob($dir . '/*.xml');
|
|
foreach ($xmlFiles as $file) {
|
|
if ($this->isDatanorm5File($file)) {
|
|
$this->version = '5.0';
|
|
$count = $this->parseDatanorm5File($file);
|
|
if ($count > 0) {
|
|
$totalArticles += $count;
|
|
}
|
|
}
|
|
}
|
|
|
|
return $totalArticles;
|
|
}
|
|
|
|
/**
|
|
* Parse a single file (auto-detect format)
|
|
*
|
|
* @param string $file File path
|
|
* @return int Number of articles parsed, -1 on error
|
|
*/
|
|
public function parseFile($file)
|
|
{
|
|
if (!file_exists($file)) {
|
|
$this->error = 'File not found: ' . $file;
|
|
return -1;
|
|
}
|
|
|
|
// Check if XML (Datanorm 5.0)
|
|
$content = file_get_contents($file, false, null, 0, 1000);
|
|
if (strpos($content, '<?xml') !== false || strpos($content, '<DATANORM') !== false) {
|
|
$this->version = '5.0';
|
|
return $this->parseDatanorm5File($file);
|
|
}
|
|
|
|
// Assume Datanorm 4.0
|
|
$this->version = '4.0';
|
|
return $this->parseDatanorm4File($file);
|
|
}
|
|
|
|
/**
|
|
* Parse Datanorm 4.0 file (fixed-width format)
|
|
* Uses streaming to handle large files
|
|
*
|
|
* @param string $file File path
|
|
* @return int Number of articles parsed
|
|
*/
|
|
protected function parseDatanorm4File($file)
|
|
{
|
|
$handle = fopen($file, 'r');
|
|
if ($handle === false) {
|
|
$this->error = 'Cannot read file: ' . $file;
|
|
return -1;
|
|
}
|
|
|
|
$count = 0;
|
|
$currentArticle = null;
|
|
|
|
while (($line = fgets($handle)) !== false) {
|
|
$line = rtrim($line, "\r\n");
|
|
|
|
// Convert encoding if needed (Datanorm 4 often uses ISO-8859-1 or CP850)
|
|
if (!mb_check_encoding($line, 'UTF-8')) {
|
|
$line = mb_convert_encoding($line, 'UTF-8', 'ISO-8859-1');
|
|
}
|
|
|
|
if (strlen($line) < 2) {
|
|
continue;
|
|
}
|
|
|
|
$recordType = substr($line, 0, 1);
|
|
|
|
switch ($recordType) {
|
|
case 'A':
|
|
// Article master record
|
|
$article = $this->parseDatanorm4TypeA($line);
|
|
if ($article) {
|
|
$this->addArticle($article);
|
|
$currentArticle = $article['article_number'];
|
|
$count++;
|
|
}
|
|
break;
|
|
|
|
case 'B':
|
|
// Article info/long text
|
|
if ($currentArticle) {
|
|
$this->parseDatanorm4TypeB($line, $currentArticle);
|
|
}
|
|
break;
|
|
|
|
case 'P':
|
|
// Price record
|
|
$this->parseDatanorm4TypeP($line);
|
|
break;
|
|
}
|
|
}
|
|
|
|
fclose($handle);
|
|
|
|
// Flush any remaining batch in streaming mode
|
|
if ($this->streamingMode) {
|
|
$this->flushBatch();
|
|
} else {
|
|
// Merge prices into articles (only in non-streaming mode)
|
|
$this->mergePricesIntoArticles();
|
|
}
|
|
|
|
return $count;
|
|
}
|
|
|
|
/**
|
|
* Parse Datanorm 4.0 Type A record (Article master)
|
|
* Field positions based on Datanorm 4.0 specification
|
|
*
|
|
* @param string $line Record line
|
|
* @return array|null Article data
|
|
*/
|
|
protected function parseDatanorm4TypeA($line)
|
|
{
|
|
// Minimum length check
|
|
if (strlen($line) < 50) {
|
|
return null;
|
|
}
|
|
|
|
// Datanorm 4.0 Type A field layout (semicolon-separated in newer versions)
|
|
if (strpos($line, ';') !== false) {
|
|
return $this->parseDatanorm4TypeASemicolon($line);
|
|
}
|
|
|
|
// Fixed-width format (classic Datanorm 3.0/4.0)
|
|
// PE code is at position 112-116 and is a CODE (0=1, 1=10, 2=100, 3=1000)
|
|
$peCode = (int)trim(substr($line, 111, 5));
|
|
$priceUnit = self::convertPriceUnitCode($peCode);
|
|
|
|
$article = array(
|
|
'article_number' => trim(substr($line, 1, 15)), // Pos 2-16: Artikelnummer
|
|
'action_code' => 'N', // Fixed-width format has no action code
|
|
'matchcode' => trim(substr($line, 16, 12)), // Pos 17-28: Matchcode
|
|
'short_text1' => trim(substr($line, 28, 40)), // Pos 29-68: Kurztext 1
|
|
'short_text2' => trim(substr($line, 68, 40)), // Pos 69-108: Kurztext 2
|
|
'unit_code' => trim(substr($line, 108, 3)), // Pos 109-111: Mengeneinheit
|
|
'price_unit' => $priceUnit, // Converted from PE code
|
|
'price_unit_code' => $peCode, // Original PE code
|
|
'discount_group' => trim(substr($line, 116, 4)), // Pos 117-120: Rabattgruppe
|
|
'product_group' => trim(substr($line, 120, 7)), // Pos 121-127: Warengruppe
|
|
'manufacturer_ref' => trim(substr($line, 127, 15)), // Pos 128-142: Hersteller-Artikelnummer
|
|
'manufacturer_name' => trim(substr($line, 142, 20)), // Pos 143-162: Herstellername
|
|
'ean' => '',
|
|
'long_text' => '',
|
|
'price' => 0,
|
|
);
|
|
|
|
// EAN if available (extended format)
|
|
if (strlen($line) >= 175) {
|
|
$article['ean'] = trim(substr($line, 162, 13));
|
|
}
|
|
|
|
if (empty($article['article_number'])) {
|
|
return null;
|
|
}
|
|
|
|
return $article;
|
|
}
|
|
|
|
/**
|
|
* Parse Datanorm 4.0 Type A record (semicolon-separated format)
|
|
*
|
|
* @param string $line Record line
|
|
* @return array|null Article data
|
|
*/
|
|
protected function parseDatanorm4TypeASemicolon($line)
|
|
{
|
|
$parts = explode(';', $line);
|
|
|
|
if (count($parts) < 6) {
|
|
return null;
|
|
}
|
|
|
|
// Detect format variant
|
|
// Sonepar format: A;N;ArtNr;TextKz;Kurztext1;Kurztext2;PreisKz;PE;ME;Preis;RabGrp;WG;...
|
|
// Index: 0 1 2 3 4 5 6 7 8 9 10 11
|
|
// Standard format: A;ArtNr;Matchcode;Kurztext1;Kurztext2;ME;PE;RabGrp;WG;...
|
|
|
|
$firstField = trim($parts[0] ?? '');
|
|
|
|
if ($firstField === 'A' && isset($parts[1]) && strlen(trim($parts[1])) <= 2) {
|
|
// Sonepar format with action code (N=New, L=Delete, A=Update)
|
|
// A;N;ArtNr;TextKz;Kurztext1;Kurztext2;PreisKz;PE;ME;Preis;RabGrp;WG;LangTextKey
|
|
// PE is at index 7 and is a CODE (0=1, 1=10, 2=100, 3=1000)
|
|
$actionCode = strtoupper(trim($parts[1] ?? 'N'));
|
|
$peCode = (int)trim($parts[7] ?? '0');
|
|
$priceUnit = self::convertPriceUnitCode($peCode);
|
|
|
|
$article = array(
|
|
'article_number' => trim($parts[2] ?? ''),
|
|
'action_code' => $actionCode, // N=New, A=Update, L=Delete
|
|
'matchcode' => '', // Will be set from B record
|
|
'short_text1' => trim($parts[4] ?? ''),
|
|
'short_text2' => trim($parts[5] ?? ''),
|
|
'unit_code' => trim($parts[8] ?? ''), // ME (Mengeneinheit) at index 8
|
|
'price_unit' => $priceUnit, // Converted from PE code at index 7
|
|
'price_unit_code' => $peCode, // Original PE code for reference
|
|
'discount_group' => trim($parts[10] ?? ''), // Rabattgruppe at index 10
|
|
'product_group' => trim($parts[11] ?? ''), // Warengruppe at index 11
|
|
'price_type' => trim($parts[6] ?? ''), // Preiskennzeichen (1=Brutto, 2=Netto)
|
|
'manufacturer_ref' => '',
|
|
'manufacturer_name' => '',
|
|
'ean' => '',
|
|
'long_text' => '',
|
|
'price' => 0,
|
|
);
|
|
} else {
|
|
// Standard format: A;ArtNr;Matchcode;Kurztext1;Kurztext2;ME;PE;RabGrp;WG;...
|
|
// PE at index 6 is a CODE (0=1, 1=10, 2=100, 3=1000)
|
|
$peCode = (int)trim($parts[6] ?? '0');
|
|
$priceUnit = self::convertPriceUnitCode($peCode);
|
|
|
|
$article = array(
|
|
'article_number' => trim($parts[1] ?? ''),
|
|
'action_code' => 'N', // Default to New for standard format
|
|
'matchcode' => trim($parts[2] ?? ''),
|
|
'short_text1' => trim($parts[3] ?? ''),
|
|
'short_text2' => trim($parts[4] ?? ''),
|
|
'unit_code' => trim($parts[5] ?? ''),
|
|
'price_unit' => $priceUnit,
|
|
'price_unit_code' => $peCode,
|
|
'discount_group' => trim($parts[7] ?? ''),
|
|
'product_group' => trim($parts[8] ?? ''),
|
|
'manufacturer_ref' => trim($parts[14] ?? ''),
|
|
'manufacturer_name' => trim($parts[15] ?? ''),
|
|
'ean' => trim($parts[16] ?? ''),
|
|
'long_text' => '',
|
|
'price' => 0,
|
|
);
|
|
}
|
|
|
|
if (empty($article['article_number'])) {
|
|
return null;
|
|
}
|
|
|
|
return $article;
|
|
}
|
|
|
|
/**
|
|
* Get article reference for modification (handles both streaming and non-streaming mode)
|
|
*
|
|
* @param string $articleNumber Article number
|
|
* @return array|null Reference to article or null
|
|
*/
|
|
protected function &getArticleRef($articleNumber)
|
|
{
|
|
$null = null;
|
|
if ($this->streamingMode) {
|
|
if (isset($this->batchArticles[$articleNumber])) {
|
|
return $this->batchArticles[$articleNumber];
|
|
}
|
|
} else {
|
|
if (isset($this->articles[$articleNumber])) {
|
|
return $this->articles[$articleNumber];
|
|
}
|
|
}
|
|
return $null;
|
|
}
|
|
|
|
/**
|
|
* Parse Datanorm 4.0 Type B record (Article info/long text)
|
|
* Sonepar format: B;N;ArtNr;Matchcode; ; ;;;;EAN; ; ;0;VPE;;;
|
|
*
|
|
* @param string $line Record line
|
|
* @param string $articleNumber Current article number
|
|
*/
|
|
protected function parseDatanorm4TypeB($line, $articleNumber)
|
|
{
|
|
$article = &$this->getArticleRef($articleNumber);
|
|
if ($article === null) {
|
|
return;
|
|
}
|
|
|
|
if (strpos($line, ';') !== false) {
|
|
$parts = explode(';', $line);
|
|
|
|
// Sonepar format: B;N;ArtNr;Matchcode; ; ;...;EAN; ; ;0;VPE;;;
|
|
// Field positions can vary, so we search for EAN and VPE
|
|
if (isset($parts[1]) && strlen(trim($parts[1])) <= 2) {
|
|
// Get article number from B record to verify
|
|
$bArticleNumber = trim($parts[2] ?? '');
|
|
if ($bArticleNumber === $articleNumber) {
|
|
// Matchcode is at position 3
|
|
$matchcode = trim($parts[3] ?? '');
|
|
if (!empty($matchcode) && empty($article['matchcode'])) {
|
|
$article['matchcode'] = $matchcode;
|
|
}
|
|
|
|
// Search for EAN (13-digit numeric code) in any field
|
|
if (empty($article['ean'])) {
|
|
foreach ($parts as $part) {
|
|
$part = trim($part);
|
|
if (preg_match('/^\d{13}$/', $part)) {
|
|
$article['ean'] = $part;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// VPE (Verpackungseinheit) in B record is the packaging quantity
|
|
// This is informational - the price unit from A record PE code is authoritative
|
|
// We store VPE separately for reference but don't override price_unit
|
|
for ($i = 12; $i <= min(15, count($parts) - 1); $i++) {
|
|
$vpe = (int)trim($parts[$i] ?? '0');
|
|
if ($vpe > 1) {
|
|
$article['vpe'] = $vpe; // Store as separate field
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
// Standard format: text at position 2
|
|
$text = trim($parts[2] ?? '');
|
|
if (!empty($text)) {
|
|
if (!empty($article['long_text'])) {
|
|
$article['long_text'] .= "\n";
|
|
}
|
|
$article['long_text'] .= $text;
|
|
}
|
|
}
|
|
} else {
|
|
$text = trim(substr($line, 16));
|
|
if (!empty($text)) {
|
|
if (!empty($article['long_text'])) {
|
|
$article['long_text'] .= "\n";
|
|
}
|
|
$article['long_text'] .= $text;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parse Datanorm 4.0 Type P record (Price)
|
|
*
|
|
* @param string $line Record line
|
|
*/
|
|
protected function parseDatanorm4TypeP($line)
|
|
{
|
|
if (strpos($line, ';') !== false) {
|
|
$parts = explode(';', $line);
|
|
$articleNumber = trim($parts[1] ?? '');
|
|
$priceType = trim($parts[2] ?? '');
|
|
$price = $this->parsePrice(trim($parts[3] ?? '0'));
|
|
} else {
|
|
$articleNumber = trim(substr($line, 1, 15));
|
|
$priceType = trim(substr($line, 16, 1));
|
|
$price = $this->parsePrice(trim(substr($line, 17, 12)));
|
|
}
|
|
|
|
if (!empty($articleNumber) && $price > 0) {
|
|
$this->prices[$articleNumber] = array(
|
|
'price' => $price,
|
|
'price_type' => $priceType,
|
|
);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parse Datanorm 4.0 product groups file (DATANORM.WRG)
|
|
*
|
|
* @param string $file File path
|
|
*/
|
|
protected function parseDatanorm4Groups($file)
|
|
{
|
|
$content = file_get_contents($file);
|
|
if ($content === false) {
|
|
return;
|
|
}
|
|
|
|
if (!mb_check_encoding($content, 'UTF-8')) {
|
|
$content = mb_convert_encoding($content, 'UTF-8', 'ISO-8859-1');
|
|
}
|
|
|
|
$lines = explode("\n", $content);
|
|
foreach ($lines as $line) {
|
|
$line = rtrim($line, "\r\n");
|
|
if (strlen($line) < 10) {
|
|
continue;
|
|
}
|
|
|
|
if (strpos($line, ';') !== false) {
|
|
$parts = explode(';', $line);
|
|
$code = trim($parts[0] ?? '');
|
|
$name = trim($parts[1] ?? '');
|
|
} else {
|
|
$code = trim(substr($line, 0, 7));
|
|
$name = trim(substr($line, 7));
|
|
}
|
|
|
|
if (!empty($code)) {
|
|
$this->groups[$code] = $name;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parse Datanorm 4.0 discount groups file (DATANORM.RAB)
|
|
*
|
|
* @param string $file File path
|
|
*/
|
|
protected function parseDatanorm4Discounts($file)
|
|
{
|
|
// Discount parsing - can be extended if needed
|
|
}
|
|
|
|
/**
|
|
* Parse DATPREIS.xxx price file
|
|
* Uses streaming to handle large files
|
|
*
|
|
* @param string $file File path
|
|
*/
|
|
protected function parseDatapreis4File($file)
|
|
{
|
|
$handle = fopen($file, 'r');
|
|
if ($handle === false) {
|
|
return;
|
|
}
|
|
|
|
while (($line = fgets($handle)) !== false) {
|
|
$line = rtrim($line, "\r\n");
|
|
|
|
// Convert encoding if needed
|
|
if (!mb_check_encoding($line, 'UTF-8')) {
|
|
$line = mb_convert_encoding($line, 'UTF-8', 'ISO-8859-1');
|
|
}
|
|
|
|
if (strlen($line) < 10) {
|
|
continue;
|
|
}
|
|
|
|
// DATPREIS format - semicolon separated
|
|
if (strpos($line, ';') !== false) {
|
|
$parts = explode(';', $line);
|
|
$recordType = trim($parts[0] ?? '');
|
|
|
|
// P;A format - multiple articles per line
|
|
// Format: P;A;ArtNr;PreisKz;Preis;PE;Zuschlag;x;x;x;ArtNr2;PreisKz2;Preis2;...
|
|
// For cables: Preis = Materialpreis, Zuschlag = Metallzuschlag (copper surcharge)
|
|
// PE code from DATPREIS may differ from A-record - used for price normalization
|
|
if ($recordType === 'P' && isset($parts[1]) && $parts[1] === 'A') {
|
|
// Parse multiple price entries per line
|
|
// Each entry is: ArtNr;PreisKz;Preis;PE;Zuschlag;x;x;x;x
|
|
$i = 2; // Start after P;A
|
|
while ($i < count($parts) - 2) {
|
|
$articleNumber = trim($parts[$i] ?? '');
|
|
$priceType = trim($parts[$i + 1] ?? '');
|
|
$priceRaw = trim($parts[$i + 2] ?? '0');
|
|
$datpreisPeCode = (int)trim($parts[$i + 3] ?? '0'); // PE code from DATPREIS
|
|
$metalSurchargeRaw = trim($parts[$i + 4] ?? '0');
|
|
|
|
// Price is in cents, convert to euros
|
|
$price = (float)$priceRaw / 100;
|
|
$metalSurcharge = (float)$metalSurchargeRaw / 100;
|
|
|
|
if (!empty($articleNumber) && $price > 0) {
|
|
$this->prices[$articleNumber] = array(
|
|
'price' => $price,
|
|
'price_type' => $priceType,
|
|
'metal_surcharge' => $metalSurcharge,
|
|
'datpreis_pe_code' => $datpreisPeCode,
|
|
);
|
|
}
|
|
|
|
// Move to next article (9 fields per article: ArtNr;Kz;Preis;PE;Zuschlag;x;x;x;x)
|
|
$i += 9;
|
|
}
|
|
} elseif ($recordType === 'P' || $recordType === '0') {
|
|
// Simple format: P;ArtNr;PreisKz;Preis
|
|
$articleNumber = trim($parts[1] ?? '');
|
|
$priceType = trim($parts[2] ?? '');
|
|
$priceRaw = trim($parts[3] ?? '0');
|
|
|
|
// Check if price is in cents (no decimal point)
|
|
if (strpos($priceRaw, ',') === false && strpos($priceRaw, '.') === false) {
|
|
$price = (float)$priceRaw / 100;
|
|
} else {
|
|
$price = $this->parsePrice($priceRaw);
|
|
}
|
|
|
|
if (!empty($articleNumber) && $price > 0) {
|
|
$this->prices[$articleNumber] = array(
|
|
'price' => $price,
|
|
'price_type' => $priceType,
|
|
);
|
|
}
|
|
}
|
|
} else {
|
|
// Fixed width format
|
|
$recordType = substr($line, 0, 1);
|
|
|
|
if ($recordType === 'P' || $recordType === '0') {
|
|
$articleNumber = trim(substr($line, 1, 15));
|
|
$priceType = trim(substr($line, 16, 1));
|
|
$priceRaw = trim(substr($line, 17, 12));
|
|
|
|
// Check if price is in cents
|
|
if (strpos($priceRaw, ',') === false && strpos($priceRaw, '.') === false) {
|
|
$price = (float)$priceRaw / 100;
|
|
} else {
|
|
$price = $this->parsePrice($priceRaw);
|
|
}
|
|
|
|
if (!empty($articleNumber) && $price > 0) {
|
|
$this->prices[$articleNumber] = array(
|
|
'price' => $price,
|
|
'price_type' => $priceType,
|
|
);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fclose($handle);
|
|
}
|
|
|
|
/**
|
|
* Merge prices into articles
|
|
* DATPREIS prices are already for the A-Satz PE unit - no normalization needed!
|
|
*/
|
|
protected function mergePricesIntoArticles()
|
|
{
|
|
foreach ($this->prices as $articleNumber => $priceData) {
|
|
if (isset($this->articles[$articleNumber])) {
|
|
$this->articles[$articleNumber]['price'] = $priceData['price'];
|
|
if (!empty($priceData['metal_surcharge'])) {
|
|
$this->articles[$articleNumber]['metal_surcharge'] = $priceData['metal_surcharge'];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if file is Datanorm 5.0 format
|
|
*
|
|
* @param string $file File path
|
|
* @return bool
|
|
*/
|
|
protected function isDatanorm5File($file)
|
|
{
|
|
$content = file_get_contents($file, false, null, 0, 2000);
|
|
return (strpos($content, '<DATANORM') !== false || strpos($content, '<datanorm') !== false);
|
|
}
|
|
|
|
/**
|
|
* Parse Datanorm 5.0 file (XML format)
|
|
*
|
|
* @param string $file File path
|
|
* @return int Number of articles parsed
|
|
*/
|
|
protected function parseDatanorm5File($file)
|
|
{
|
|
libxml_use_internal_errors(true);
|
|
$xml = simplexml_load_file($file);
|
|
|
|
if ($xml === false) {
|
|
$errors = libxml_get_errors();
|
|
$this->error = 'XML parse error: ' . ($errors[0]->message ?? 'Unknown error');
|
|
libxml_clear_errors();
|
|
return -1;
|
|
}
|
|
|
|
$count = 0;
|
|
|
|
// Register namespaces if present
|
|
$namespaces = $xml->getNamespaces(true);
|
|
|
|
// Find article nodes (various possible node names)
|
|
$articleNodes = $xml->xpath('//Artikel') ?: $xml->xpath('//Article') ?: $xml->xpath('//article') ?: array();
|
|
|
|
foreach ($articleNodes as $node) {
|
|
$article = $this->parseDatanorm5Article($node);
|
|
if ($article) {
|
|
$this->articles[$article['article_number']] = $article;
|
|
$count++;
|
|
}
|
|
}
|
|
|
|
return $count;
|
|
}
|
|
|
|
/**
|
|
* Parse Datanorm 5.0 article node
|
|
*
|
|
* @param SimpleXMLElement $node Article XML node
|
|
* @return array|null Article data
|
|
*/
|
|
protected function parseDatanorm5Article($node)
|
|
{
|
|
$article = array(
|
|
'article_number' => $this->getXmlValue($node, array('Artikelnummer', 'ArticleNumber', 'ArtNr', 'artNr')),
|
|
'matchcode' => $this->getXmlValue($node, array('Matchcode', 'matchcode')),
|
|
'short_text1' => $this->getXmlValue($node, array('Kurztext1', 'Kurztext', 'ShortText1', 'ShortText', 'Bezeichnung', 'Name')),
|
|
'short_text2' => $this->getXmlValue($node, array('Kurztext2', 'ShortText2')),
|
|
'long_text' => $this->getXmlValue($node, array('Langtext', 'LongText', 'Beschreibung', 'Description')),
|
|
'unit_code' => $this->getXmlValue($node, array('Mengeneinheit', 'Unit', 'ME')),
|
|
'price_unit' => (int)$this->getXmlValue($node, array('Preiseinheit', 'PriceUnit', 'PE')) ?: 1,
|
|
'price' => $this->parsePrice($this->getXmlValue($node, array('Preis', 'Price', 'Listenpreis', 'ListPrice'))),
|
|
'discount_group' => $this->getXmlValue($node, array('Rabattgruppe', 'DiscountGroup', 'RG')),
|
|
'product_group' => $this->getXmlValue($node, array('Warengruppe', 'ProductGroup', 'WG')),
|
|
'manufacturer_ref' => $this->getXmlValue($node, array('HerstellerArtNr', 'ManufacturerArticleNumber')),
|
|
'manufacturer_name' => $this->getXmlValue($node, array('Hersteller', 'Manufacturer')),
|
|
'ean' => $this->getXmlValue($node, array('EAN', 'GTIN', 'Barcode')),
|
|
);
|
|
|
|
if (empty($article['article_number'])) {
|
|
return null;
|
|
}
|
|
|
|
return $article;
|
|
}
|
|
|
|
/**
|
|
* Get value from XML node trying multiple possible element names
|
|
*
|
|
* @param SimpleXMLElement $node XML node
|
|
* @param array $names Possible element names
|
|
* @return string Value or empty string
|
|
*/
|
|
protected function getXmlValue($node, $names)
|
|
{
|
|
foreach ($names as $name) {
|
|
// Try as child element
|
|
if (isset($node->$name)) {
|
|
return trim((string)$node->$name);
|
|
}
|
|
// Try as attribute
|
|
if (isset($node[$name])) {
|
|
return trim((string)$node[$name]);
|
|
}
|
|
}
|
|
return '';
|
|
}
|
|
|
|
/**
|
|
* Parse price string to float
|
|
*
|
|
* @param string $priceStr Price string
|
|
* @return float Price value
|
|
*/
|
|
protected function parsePrice($priceStr)
|
|
{
|
|
if (empty($priceStr)) {
|
|
return 0.0;
|
|
}
|
|
|
|
// Remove currency symbols and whitespace
|
|
$priceStr = preg_replace('/[^\d,.\-]/', '', $priceStr);
|
|
|
|
// Handle German number format (1.234,56)
|
|
if (preg_match('/^\d{1,3}(\.\d{3})*,\d{2}$/', $priceStr)) {
|
|
$priceStr = str_replace('.', '', $priceStr);
|
|
$priceStr = str_replace(',', '.', $priceStr);
|
|
} elseif (strpos($priceStr, ',') !== false && strpos($priceStr, '.') === false) {
|
|
// Simple comma as decimal separator
|
|
$priceStr = str_replace(',', '.', $priceStr);
|
|
}
|
|
|
|
return (float)$priceStr;
|
|
}
|
|
|
|
/**
|
|
* Convert Datanorm unit code to UN/ECE code
|
|
*
|
|
* @param string $datanormUnit Datanorm unit code
|
|
* @return string UN/ECE unit code
|
|
*/
|
|
public static function convertUnitCode($datanormUnit)
|
|
{
|
|
$mapping = array(
|
|
'ST' => 'C62', // Stück
|
|
'STK' => 'C62', // Stück
|
|
'PCE' => 'C62', // Piece
|
|
'M' => 'MTR', // Meter
|
|
'MTR' => 'MTR', // Meter
|
|
'CM' => 'CMT', // Zentimeter
|
|
'MM' => 'MMT', // Millimeter
|
|
'L' => 'LTR', // Liter
|
|
'LTR' => 'LTR', // Liter
|
|
'KG' => 'KGM', // Kilogramm
|
|
'G' => 'GRM', // Gramm
|
|
'M2' => 'MTK', // Quadratmeter
|
|
'M3' => 'MTQ', // Kubikmeter
|
|
'PAK' => 'PK', // Packung
|
|
'PAC' => 'PK', // Package
|
|
'SET' => 'SET', // Set
|
|
'ROL' => 'RL', // Rolle
|
|
'RLL' => 'RL', // Roll
|
|
'BDL' => 'BE', // Bündel
|
|
'KRT' => 'CT', // Karton
|
|
'CTN' => 'CT', // Carton
|
|
);
|
|
|
|
$unit = strtoupper(trim($datanormUnit));
|
|
return $mapping[$unit] ?? 'C62'; // Default to piece
|
|
}
|
|
|
|
/**
|
|
* Get all parsed articles
|
|
*
|
|
* @return array Articles
|
|
*/
|
|
public function getArticles()
|
|
{
|
|
return $this->articles;
|
|
}
|
|
|
|
/**
|
|
* Find article by number
|
|
*
|
|
* @param string $articleNumber Article number to find
|
|
* @return array|null Article data or null
|
|
*/
|
|
public function findArticle($articleNumber)
|
|
{
|
|
return $this->articles[$articleNumber] ?? null;
|
|
}
|
|
|
|
/**
|
|
* Search articles by text
|
|
*
|
|
* @param string $searchText Search text
|
|
* @param int $limit Maximum results
|
|
* @return array Matching articles
|
|
*/
|
|
public function searchArticles($searchText, $limit = 50)
|
|
{
|
|
$results = array();
|
|
$searchText = strtolower($searchText);
|
|
|
|
foreach ($this->articles as $article) {
|
|
$searchFields = strtolower(
|
|
$article['article_number'] . ' ' .
|
|
$article['matchcode'] . ' ' .
|
|
$article['short_text1'] . ' ' .
|
|
$article['short_text2'] . ' ' .
|
|
$article['ean'] . ' ' .
|
|
$article['manufacturer_ref']
|
|
);
|
|
|
|
if (strpos($searchFields, $searchText) !== false) {
|
|
$results[] = $article;
|
|
if (count($results) >= $limit) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return $results;
|
|
}
|
|
}
|