From 0e2f17ac37e04b9b089b2c8516de262bf846617b Mon Sep 17 00:00:00 2001 From: Yusuf Kandemir Date: Sun, 11 Nov 2018 20:49:54 +0300 Subject: [PATCH] Extracted methods to a class --- src/MicrodataDOMElement.php | 189 +++++++++++++++++++++++++++++++++++ src/MicrodataParser.php | 191 +----------------------------------- 2 files changed, 194 insertions(+), 186 deletions(-) create mode 100644 src/MicrodataDOMElement.php diff --git a/src/MicrodataDOMElement.php b/src/MicrodataDOMElement.php new file mode 100644 index 0000000..bdab108 --- /dev/null +++ b/src/MicrodataDOMElement.php @@ -0,0 +1,189 @@ +hasChildNodes()) { + $childNodes = iterator_to_array($this->childNodes); + + $childNodes = array_filter($childNodes, function ($node) { + return $node instanceof \DOMElement; + }); // Get only DOMElements + + $pending = array_merge($pending, $childNodes); + } + + // Step 4 + if ($this->hasAttribute('itemref')) { + $tokens = preg_split('/\s+/', $this->getAttribute('itemref')); + + foreach ($tokens as $token) { + // @todo Implement xpath query and get the first item + } + } + + // Step 5 + while ($pending) { + // Step 6 + $current = array_pop($pending); + + // Step 7 + // in_array can't compare objects + /*if (in_array($current, $memory)) { + // There is MicrodataError + continue; + }*/ + $error = false; + + foreach ($memory as $memory_item) { + if ($current->isSameNode($memory_item)) { + // There is MicrodataError + $error = true; + break; + } + } + + if ($error) { + continue; + } + + // Step 8 + $memory[] = $current; + + // Step 9 + if (! $current->hasAttribute('itemscope')) { + if ($current->hasChildNodes()) { + $childNodes = iterator_to_array($current->childNodes); + + $childNodes = array_filter($childNodes, function ($node) { + return $node instanceof \DOMElement; + }); + + $pending = array_merge($pending, $childNodes); + } + } + + // Step 10 + if ($current->hasAttribute('itemprop') && /* hasPropertyNames */ $current->getPropertyNames()) { + $results[] = $current; + } + + // Step 11: Return to loop + } + + // Step 12: End of loop. Sort results in tree order. + + $results = array_reverse($results); + + // Step 13 + return $results; + } + + public function getPropertyNames() + { + // Step 1 + $itemprop = $this->getAttribute('itemprop'); + $tokens = $itemprop ? preg_split('/\s+/', $itemprop) : []; + + // Step 2 + $properties = []; + + // Step 3 + foreach ($tokens as $token) { + if ($this->isAbsoluteUri($token)) { + $properties[] = $token; + } elseif ($this->isTypedItem()) { + $properties[] = /*$vocabularyIdentifier . */ $token; + } else { + $properties[] = $token; + } + } + + $properties = array_unique($properties); + + return $properties; + } + + public function getPropertyValue() + { + + if ($this->hasAttribute('itemscope')) { + return $this; + } + + if ($this->hasAttribute('content')) { + return $this->getAttribute('content'); + } + + $base = $this->ownerDocument->documentURI; + + switch ($this->tagName) { + case 'audio': + case 'embed': + case 'iframe': + case 'img': + case 'source': + case 'track': + case 'video': + if ($this->hasAttribute('src')) { + $result = $this->getAttribute('src'); + + // @todo check against protocol relative urls like "//example.com/test.jpg" + return $this->isAbsoluteUri($result) ? $result : $base.$result; + } + case 'a': + case 'area': + case 'link': + if ($this->hasAttribute('href')) { + $result = $this->getAttribute('href'); + + return $this->isAbsoluteUri($result) ? $result : $base.$result; + } + case 'object': + if ($this->hasAttribute('data')) { + $result = $this->getAttribute('data'); + + return $this->isAbsoluteUri($result) ? $result : $base.$result; + } + case 'data': + case 'meter': + if ($this->hasAttribute('value')) { + return $this->getAttribute('value'); + } + case 'time': + if ($this->hasAttribute('datetime')) { + return $this->getAttribute('datetime'); + } + default: + return $this->textContent; + } + } + + public function isTypedItem() + { + $tokens = []; + + if ($this->hasAttribute('itemtype')) { + $tokens = preg_split("/\s+/", $this->getAttribute('itemtype')); + } + + return !empty($tokens); + } + + protected function isAbsoluteUri(string $uri) + { + return preg_match("/^\w+:/", trim($uri)); + } +} diff --git a/src/MicrodataParser.php b/src/MicrodataParser.php index c881ecf..983e712 100644 --- a/src/MicrodataParser.php +++ b/src/MicrodataParser.php @@ -8,6 +8,8 @@ class MicrodataParser public function __construct(\DOMDocument $dom) { + $dom->registerNodeClass(\DOMElement::class, MicrodataDOMElement::class); + $xpath = new \DOMXPath($dom); $this->topLevelItems = $xpath->query('//*[@itemscope and not(@itemprop)]'); } @@ -61,8 +63,8 @@ class MicrodataParser $properties = new \stdClass; // Step 7 - foreach ($this->getProperties($item) as $element) { - $value = $this->getPropertyValue($element); + foreach ($item->getProperties() as $element) { + $value = $element->getPropertyValue(); if ($this->isItem($value)) { foreach ($memory as $memory_item) { @@ -76,7 +78,7 @@ class MicrodataParser } } - foreach ($this->getPropertyNames($element) as $name) { + foreach ($element->getPropertyNames() as $name) { $properties->{$name}[] = $value; } } @@ -88,191 +90,8 @@ class MicrodataParser return $result; } - protected function getPropertyNames(\DOMElement $item) - { - // Step 1 - $itemprop = $item->getAttribute('itemprop'); - $tokens = $itemprop ? preg_split('/\s+/', $itemprop) : []; - - // Step 2 - $properties = []; - - // Step 3 - foreach ($tokens as $token) { - if ($this->isAbsoluteUri($token)) { - $properties[] = $token; - } elseif ($this->isTypedItem($item)) { - $properties[] = /*$vocabularyIdentifier . */ $token; - } else { - $properties[] = $token; - } - } - - $properties = array_unique($properties); - - return $properties; - } - - protected function getProperties(\DOMElement $root) - { - // Step 1 - $results = []; - $memory = []; - $pending = []; - - // Step 2 - $memory[] = $root; - - // Step 3 - if ($root->hasChildNodes()) { - $childNodes = iterator_to_array($root->childNodes); - - $childNodes = array_filter($childNodes, function ($node) { - return $node instanceof \DOMElement; - }); // Get only DOMElements - - $pending = array_merge($pending, $childNodes); - } - - // Step 4 - if ($root->hasAttribute('itemref')) { - $tokens = preg_split('/\s+/', $root->getAttribute('itemref')); - - foreach ($tokens as $token) { - // @todo Implement xpath query and get the first item - } - } - - // Step 5 - while ($pending) { - // Step 6 - $current = array_pop($pending); - - // Step 7 - // in_array can't compare objects - /*if (in_array($current, $memory)) { - // There is MicrodataError - continue; - }*/ - $error = false; - - foreach ($memory as $memory_item) { - if ($current->isSameNode($memory_item)) { - // There is MicrodataError - $error = true; - break; - } - } - - if ($error) { - continue; - } - - // Step 8 - $memory[] = $current; - - // Step 9 - if (! $current->hasAttribute('itemscope')) { - if ($current->hasChildNodes()) { - $childNodes = iterator_to_array($current->childNodes); - - $childNodes = array_filter($childNodes, function ($node) { - return $node instanceof \DOMElement; - }); - - $pending = array_merge($pending, $childNodes); - } - } - - // Step 10 - if ($current->hasAttribute('itemprop') && /* hasPropertyNames */ $this->getPropertyNames($current)) { - $results[] = $current; - } - - // Step 11: Return to loop - } - - // Step 12: End of loop. Sort results in tree order. - - $results = array_reverse($results); - - // Step 13 - return $results; - } - - protected function getPropertyValue(\DOMElement $item) - { - - if ($item->hasAttribute('itemscope')) { - return $item; - } - - if ($item->hasAttribute('content')) { - return $item->getAttribute('content'); - } - - $base = $item->ownerDocument->documentURI; - - switch ($item->tagName) { - case 'audio': - case 'embed': - case 'iframe': - case 'img': - case 'source': - case 'track': - case 'video': - if ($item->hasAttribute('src')) { - $result = $item->getAttribute('src'); - - // @todo check against protocol relative urls like "//example.com/test.jpg" - return $this->isAbsoluteUri($result) ? $result : $base.$result; - } - case 'a': - case 'area': - case 'link': - if ($item->hasAttribute('href')) { - $result = $item->getAttribute('href'); - - return $this->isAbsoluteUri($result) ? $result : $base.$result; - } - case 'object': - if ($item->hasAttribute('data')) { - $result = $item->getAttribute('data'); - - return $this->isAbsoluteUri($result) ? $result : $base.$result; - } - case 'data': - case 'meter': - if ($item->hasAttribute('value')) { - return $item->getAttribute('value'); - } - case 'time': - if ($item->hasAttribute('datetime')) { - return $item->getAttribute('datetime'); - } - default: - return $item->textContent; - } - } - protected function isItem($element) { return $element instanceof \DOMElement && $element->hasAttribute('itemscope'); } - - protected function isTypedItem(\DOMElement $item) - { - $tokens = []; - - if ($item->hasAttribute('itemtype')) { - $tokens = preg_split("/\s+/", $item->getAttribute('itemtype')); - } - - return !empty($tokens); - } - - protected function isAbsoluteUri(string $uri) - { - return preg_match("/^\w+:/", trim($uri)); - } }