microdata-parser/src/MicrodataParser.php

77 lines
1.9 KiB
PHP
Raw Normal View History

2018-11-10 12:05:55 +00:00
<?php
namespace YusufKandemir\MicrodataParser;
class MicrodataParser
{
protected $topLevelItems;
public function __construct(\DOMDocument $dom)
{
2018-11-11 17:49:54 +00:00
$dom->registerNodeClass(\DOMElement::class, MicrodataDOMElement::class);
2018-11-10 12:05:55 +00:00
$xpath = new \DOMXPath($dom);
$this->topLevelItems = $xpath->query('//*[@itemscope and not(@itemprop)]');
}
public function extractMicrodata()
{
$result = new \stdClass;
2018-11-11 18:53:08 +00:00
$result->items = [];
2018-11-10 12:05:55 +00:00
foreach ($this->topLevelItems as $topLevelItem) {
2018-11-11 18:53:08 +00:00
$result->items[] = $this->getObject($topLevelItem);
}
2018-11-10 12:05:55 +00:00
return $result;
}
protected function getObject(\DOMElement $item, $memory = [])
{
$result = new \stdClass;
$memory[] = $item;
2018-11-11 19:47:37 +00:00
$result->type = $this->tokenizeAttribute('itemtype');
2018-11-10 12:05:55 +00:00
// @todo Check if types are valid absolute urls
2018-11-11 19:47:37 +00:00
if ($item->hasAttribute('itemid')) {
$result->id = $item->getAttribute('itemid');
2018-11-10 12:05:55 +00:00
}
// @todo Check if item ids are valid absolute urls or like isbn:xxx
$properties = new \stdClass;
2018-11-11 17:49:54 +00:00
foreach ($item->getProperties() as $element) {
$value = $element->getPropertyValue();
2018-11-10 12:05:55 +00:00
if ($this->isItem($value)) {
foreach ($memory as $memory_item) {
if ($element->isSameNode($memory_item)) {
$value = 'ERROR';
2018-11-11 18:53:08 +00:00
break;
2018-11-10 12:05:55 +00:00
}
}
if ($value != 'ERROR') {
$value = $this->getObject($value, $memory);
}
}
2018-11-11 17:49:54 +00:00
foreach ($element->getPropertyNames() as $name) {
2018-11-10 12:05:55 +00:00
$properties->{$name}[] = $value;
}
}
$result->properties = $properties;
return $result;
}
protected function isItem($element)
{
return $element instanceof \DOMElement && $element->hasAttribute('itemscope');
}
}