2018-11-11 17:49:54 +00:00
|
|
|
<?php
|
|
|
|
|
|
|
|
namespace YusufKandemir\MicrodataParser;
|
|
|
|
|
|
|
|
class MicrodataDOMElement extends \DOMElement
|
|
|
|
{
|
2018-12-13 06:50:04 +00:00
|
|
|
/**
|
2018-12-14 07:05:33 +00:00
|
|
|
* @see https://www.w3.org/TR/2018/WD-microdata-20180426/#dfn-item-properties for details of algorithm
|
|
|
|
*
|
2018-12-13 06:50:04 +00:00
|
|
|
* @return array
|
|
|
|
*/
|
|
|
|
public function getProperties() : array
|
2018-11-11 17:49:54 +00:00
|
|
|
{
|
|
|
|
$results = [];
|
2018-11-11 20:18:13 +00:00
|
|
|
$memory = [$this];
|
|
|
|
$pending = $this->getChildElementNodes();
|
2018-11-11 17:49:54 +00:00
|
|
|
|
|
|
|
if ($this->hasAttribute('itemref')) {
|
2018-11-11 19:47:37 +00:00
|
|
|
$tokens = $this->tokenizeAttribute('itemref');
|
2018-11-11 17:49:54 +00:00
|
|
|
|
|
|
|
foreach ($tokens as $token) {
|
2018-11-12 16:11:52 +00:00
|
|
|
$references = $this->ownerDocument->xpath->query('//*[@id="'.$token.'"]');
|
|
|
|
|
|
|
|
if ($first = $references->item(0)) {
|
|
|
|
$pending[] = $first;
|
|
|
|
}
|
2018-11-11 17:49:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
while ($pending) {
|
|
|
|
$current = array_pop($pending);
|
|
|
|
|
|
|
|
foreach ($memory as $memory_item) {
|
|
|
|
if ($current->isSameNode($memory_item)) {
|
2018-11-11 18:53:08 +00:00
|
|
|
continue 2; // Skip next part and continue while loop if memory contains $current
|
2018-11-11 17:49:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
$memory[] = $current;
|
|
|
|
|
|
|
|
if (! $current->hasAttribute('itemscope')) {
|
2018-11-11 18:53:08 +00:00
|
|
|
$pending = array_merge($pending, $current->getChildElementNodes());
|
2018-11-11 17:49:54 +00:00
|
|
|
}
|
|
|
|
|
2018-11-11 20:18:13 +00:00
|
|
|
if ($current->hasAttribute('itemprop') && $current->hasPropertyNames()) {
|
2018-11-11 17:49:54 +00:00
|
|
|
$results[] = $current;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-11-11 19:47:37 +00:00
|
|
|
return array_reverse($results);
|
2018-11-11 17:49:54 +00:00
|
|
|
}
|
|
|
|
|
2018-12-13 06:50:04 +00:00
|
|
|
/**
|
|
|
|
* @return bool
|
|
|
|
*/
|
|
|
|
public function hasPropertyNames() : bool
|
2018-11-11 20:19:53 +00:00
|
|
|
{
|
2018-11-11 20:18:13 +00:00
|
|
|
return !empty($this->tokenizeAttribute('itemprop'));
|
|
|
|
}
|
|
|
|
|
2018-12-13 06:50:04 +00:00
|
|
|
/**
|
2018-12-14 07:05:33 +00:00
|
|
|
* @see https://www.w3.org/TR/2018/WD-microdata-20180426/#dfn-property-name
|
|
|
|
*
|
2018-12-13 06:50:04 +00:00
|
|
|
* @return array
|
|
|
|
*/
|
|
|
|
public function getPropertyNames() : array
|
2018-11-11 17:49:54 +00:00
|
|
|
{
|
2018-11-11 19:47:37 +00:00
|
|
|
$tokens = $this->tokenizeAttribute('itemprop');
|
2018-11-11 17:49:54 +00:00
|
|
|
|
|
|
|
$properties = [];
|
|
|
|
|
|
|
|
foreach ($tokens as $token) {
|
2018-11-11 19:47:37 +00:00
|
|
|
if (!$this->isAbsoluteUri($token) && $this->tokenizeAttribute('itemtype')) {
|
|
|
|
$token = /*$vocabularyIdentifier . */ $token;
|
2018-11-11 17:49:54 +00:00
|
|
|
}
|
2018-11-11 19:47:37 +00:00
|
|
|
|
|
|
|
$properties[] = $token;
|
2018-11-11 17:49:54 +00:00
|
|
|
}
|
|
|
|
|
2018-11-11 19:54:43 +00:00
|
|
|
return array_unique($properties);
|
2018-11-11 17:49:54 +00:00
|
|
|
}
|
|
|
|
|
2018-12-13 06:50:04 +00:00
|
|
|
/**
|
2018-12-14 07:05:33 +00:00
|
|
|
* @see https://www.w3.org/TR/2018/WD-microdata-20180426/#dfn-property-value for details of algorithm
|
|
|
|
*
|
2018-12-13 06:50:04 +00:00
|
|
|
* @return $this|string
|
|
|
|
*/
|
2018-11-11 17:49:54 +00:00
|
|
|
public function getPropertyValue()
|
|
|
|
{
|
|
|
|
if ($this->hasAttribute('itemscope')) {
|
|
|
|
return $this;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($this->hasAttribute('content')) {
|
|
|
|
return $this->getAttribute('content');
|
|
|
|
}
|
|
|
|
|
|
|
|
$base = $this->ownerDocument->documentURI;
|
|
|
|
|
|
|
|
switch ($this->tagName) {
|
|
|
|
case 'audio':
|
|
|
|
case 'embed':
|
|
|
|
case 'iframe':
|
|
|
|
case 'img':
|
|
|
|
case 'source':
|
|
|
|
case 'track':
|
|
|
|
case 'video':
|
|
|
|
if ($this->hasAttribute('src')) {
|
|
|
|
$result = $this->getAttribute('src');
|
|
|
|
|
|
|
|
// @todo check against protocol relative urls like "//example.com/test.jpg"
|
|
|
|
return $this->isAbsoluteUri($result) ? $result : $base.$result;
|
|
|
|
}
|
2018-11-11 19:59:59 +00:00
|
|
|
// No break
|
2018-11-11 17:49:54 +00:00
|
|
|
case 'a':
|
|
|
|
case 'area':
|
|
|
|
case 'link':
|
|
|
|
if ($this->hasAttribute('href')) {
|
|
|
|
$result = $this->getAttribute('href');
|
|
|
|
|
|
|
|
return $this->isAbsoluteUri($result) ? $result : $base.$result;
|
|
|
|
}
|
2018-11-11 19:59:59 +00:00
|
|
|
// No break
|
2018-11-11 17:49:54 +00:00
|
|
|
case 'object':
|
|
|
|
if ($this->hasAttribute('data')) {
|
|
|
|
$result = $this->getAttribute('data');
|
|
|
|
|
|
|
|
return $this->isAbsoluteUri($result) ? $result : $base.$result;
|
|
|
|
}
|
2018-11-11 19:59:59 +00:00
|
|
|
// No break
|
2018-11-11 17:49:54 +00:00
|
|
|
case 'data':
|
|
|
|
case 'meter':
|
|
|
|
if ($this->hasAttribute('value')) {
|
|
|
|
return $this->getAttribute('value');
|
|
|
|
}
|
2018-11-11 19:59:59 +00:00
|
|
|
// No break
|
2018-11-11 17:49:54 +00:00
|
|
|
case 'time':
|
|
|
|
if ($this->hasAttribute('datetime')) {
|
|
|
|
return $this->getAttribute('datetime');
|
|
|
|
}
|
2018-11-11 19:59:59 +00:00
|
|
|
// No break
|
2018-11-11 17:49:54 +00:00
|
|
|
default:
|
|
|
|
return $this->textContent;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-12-13 06:50:04 +00:00
|
|
|
/**
|
|
|
|
* Checks a string to see if its absolute uri or not
|
|
|
|
* Note: As it uses a simple regex to check, it is not that reliable
|
|
|
|
*
|
|
|
|
* @see \preg_match() for return values
|
|
|
|
*
|
|
|
|
* @param string $uri
|
|
|
|
*
|
|
|
|
* @return false|int
|
|
|
|
*/
|
2018-11-11 17:49:54 +00:00
|
|
|
protected function isAbsoluteUri(string $uri)
|
|
|
|
{
|
|
|
|
return preg_match("/^\w+:/", trim($uri));
|
|
|
|
}
|
2018-11-11 18:53:08 +00:00
|
|
|
|
2018-12-13 06:50:04 +00:00
|
|
|
/**
|
|
|
|
* Filters out TextNodes etc. and returns child ElementNodes as array
|
|
|
|
*
|
|
|
|
* @return array Result array which contains child ElementNodes
|
|
|
|
*/
|
2018-11-11 18:53:08 +00:00
|
|
|
protected function getChildElementNodes()
|
|
|
|
{
|
|
|
|
$childNodes = [];
|
|
|
|
|
|
|
|
foreach ($this->childNodes as $childNode) {
|
|
|
|
if ($childNode->nodeType == XML_ELEMENT_NODE) {
|
|
|
|
$childNodes[] = $childNode;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return $childNodes;
|
|
|
|
}
|
2018-11-11 19:47:37 +00:00
|
|
|
|
2018-12-13 06:50:04 +00:00
|
|
|
/**
|
|
|
|
* Tokenizes value of given attribute
|
|
|
|
*
|
|
|
|
* @param string $attributeName Name of the attribute
|
|
|
|
*
|
|
|
|
* @return array|array[]|false|string[]
|
|
|
|
*/
|
|
|
|
public function tokenizeAttribute(string $attributeName)
|
2018-11-11 19:55:36 +00:00
|
|
|
{
|
2018-11-11 19:47:37 +00:00
|
|
|
$attribute = [];
|
|
|
|
|
2018-11-11 19:55:36 +00:00
|
|
|
if ($this->hasAttribute($attributeName)) {
|
2018-11-11 19:47:37 +00:00
|
|
|
$attribute = $this->tokenize($this->getAttribute($attributeName));
|
|
|
|
}
|
|
|
|
|
|
|
|
return $attribute;
|
|
|
|
}
|
|
|
|
|
2018-12-13 06:50:04 +00:00
|
|
|
/**
|
|
|
|
* Splits given attribute value in space characters to array
|
|
|
|
*
|
|
|
|
* @see \preg_split() for possible return values and behaviour
|
|
|
|
*
|
2018-12-14 07:05:33 +00:00
|
|
|
* @see https://www.w3.org/TR/2018/WD-microdata-20180426/#dfn-split-a-string-on-spaces for definition of tokens
|
|
|
|
*
|
2018-12-13 06:50:04 +00:00
|
|
|
* @param string $attribute
|
|
|
|
*
|
|
|
|
* @return array[]|false|string[]
|
|
|
|
*/
|
|
|
|
protected function tokenize(string $attribute)
|
2018-11-11 19:47:37 +00:00
|
|
|
{
|
|
|
|
return preg_split('/\s+/', trim($attribute));
|
|
|
|
}
|
2018-11-11 17:49:54 +00:00
|
|
|
}
|