microdata-parser/src/MicrodataDOMElement.php

159 lines
4.3 KiB
PHP
Raw Normal View History

2018-11-11 17:49:54 +00:00
<?php
namespace YusufKandemir\MicrodataParser;
class MicrodataDOMElement extends \DOMElement
{
public function getProperties()
{
$results = [];
2018-11-11 20:18:13 +00:00
$memory = [$this];
$pending = $this->getChildElementNodes();
2018-11-11 17:49:54 +00:00
if ($this->hasAttribute('itemref')) {
2018-11-11 19:47:37 +00:00
$tokens = $this->tokenizeAttribute('itemref');
2018-11-11 17:49:54 +00:00
foreach ($tokens as $token) {
// @todo Implement xpath query and get the first item
}
}
while ($pending) {
$current = array_pop($pending);
foreach ($memory as $memory_item) {
if ($current->isSameNode($memory_item)) {
2018-11-11 18:53:08 +00:00
continue 2; // Skip next part and continue while loop if memory contains $current
2018-11-11 17:49:54 +00:00
}
}
$memory[] = $current;
if (! $current->hasAttribute('itemscope')) {
2018-11-11 18:53:08 +00:00
$pending = array_merge($pending, $current->getChildElementNodes());
2018-11-11 17:49:54 +00:00
}
2018-11-11 20:18:13 +00:00
if ($current->hasAttribute('itemprop') && $current->hasPropertyNames()) {
2018-11-11 17:49:54 +00:00
$results[] = $current;
}
}
2018-11-11 19:47:37 +00:00
return array_reverse($results);
2018-11-11 17:49:54 +00:00
}
2018-11-11 20:19:53 +00:00
public function hasPropertyNames()
{
2018-11-11 20:18:13 +00:00
return !empty($this->tokenizeAttribute('itemprop'));
}
2018-11-11 17:49:54 +00:00
public function getPropertyNames()
{
2018-11-11 19:47:37 +00:00
$tokens = $this->tokenizeAttribute('itemprop');
2018-11-11 17:49:54 +00:00
$properties = [];
foreach ($tokens as $token) {
2018-11-11 19:47:37 +00:00
if (!$this->isAbsoluteUri($token) && $this->tokenizeAttribute('itemtype')) {
$token = /*$vocabularyIdentifier . */ $token;
2018-11-11 17:49:54 +00:00
}
2018-11-11 19:47:37 +00:00
$properties[] = $token;
2018-11-11 17:49:54 +00:00
}
2018-11-11 19:54:43 +00:00
return array_unique($properties);
2018-11-11 17:49:54 +00:00
}
public function getPropertyValue()
{
if ($this->hasAttribute('itemscope')) {
return $this;
}
if ($this->hasAttribute('content')) {
return $this->getAttribute('content');
}
$base = $this->ownerDocument->documentURI;
switch ($this->tagName) {
case 'audio':
case 'embed':
case 'iframe':
case 'img':
case 'source':
case 'track':
case 'video':
if ($this->hasAttribute('src')) {
$result = $this->getAttribute('src');
// @todo check against protocol relative urls like "//example.com/test.jpg"
return $this->isAbsoluteUri($result) ? $result : $base.$result;
}
// No break
2018-11-11 17:49:54 +00:00
case 'a':
case 'area':
case 'link':
if ($this->hasAttribute('href')) {
$result = $this->getAttribute('href');
return $this->isAbsoluteUri($result) ? $result : $base.$result;
}
// No break
2018-11-11 17:49:54 +00:00
case 'object':
if ($this->hasAttribute('data')) {
$result = $this->getAttribute('data');
return $this->isAbsoluteUri($result) ? $result : $base.$result;
}
// No break
2018-11-11 17:49:54 +00:00
case 'data':
case 'meter':
if ($this->hasAttribute('value')) {
return $this->getAttribute('value');
}
// No break
2018-11-11 17:49:54 +00:00
case 'time':
if ($this->hasAttribute('datetime')) {
return $this->getAttribute('datetime');
}
// No break
2018-11-11 17:49:54 +00:00
default:
return $this->textContent;
}
}
protected function isAbsoluteUri(string $uri)
{
return preg_match("/^\w+:/", trim($uri));
}
2018-11-11 18:53:08 +00:00
protected function getChildElementNodes()
{
$childNodes = [];
foreach ($this->childNodes as $childNode) {
if ($childNode->nodeType == XML_ELEMENT_NODE) {
$childNodes[] = $childNode;
}
}
return $childNodes;
}
2018-11-11 19:47:37 +00:00
2018-11-11 19:55:36 +00:00
public function tokenizeAttribute($attributeName)
{
2018-11-11 19:47:37 +00:00
$attribute = [];
2018-11-11 19:55:36 +00:00
if ($this->hasAttribute($attributeName)) {
2018-11-11 19:47:37 +00:00
$attribute = $this->tokenize($this->getAttribute($attributeName));
}
return $attribute;
}
protected function tokenize($attribute)
{
return preg_split('/\s+/', trim($attribute));
}
2018-11-11 17:49:54 +00:00
}