microdata-parser/src/MicrodataDOMElement.php
Yusuf Kandemir a4a5cce8d8 Refactored long switch case with duplicate code to lookup table
This is more performant(can be ignored but an extra) due to nature of lookup tables. Also it is more compact and has ease of extension. It is a little bit scary at the first sight though.
2018-12-17 19:20:18 +03:00

218 lines
5.7 KiB
PHP

<?php
namespace YusufKandemir\MicrodataParser;
class MicrodataDOMElement extends \DOMElement
{
/** @var array "tag name" to "attribute name" mapping */
private static $tagNameLookup = [
'audio' => 'src',
'embed' => 'src',
'iframe' => 'src',
'img' => 'src',
'source' => 'src',
'track' => 'src',
'video' => 'src',
'a' => 'href',
'area' => 'href',
'link' => 'href',
'object' => 'data',
'data' => 'value',
'meter' => 'value',
'time' => 'datetime',
];
/** @var array Attributes that have absolute values */
private static $absoluteAttributes = ['src', 'href', 'data',];
/**
* @see https://www.w3.org/TR/2018/WD-microdata-20180426/#dfn-item-properties for details of algorithm
*
* @return array
*/
public function getProperties() : array
{
$results = [];
$memory = [$this];
$pending = $this->getChildElementNodes();
$pending = array_merge($pending, $this->getReferenceNodes());
while ($pending) {
$current = array_pop($pending);
foreach ($memory as $memory_item) {
if ($current->isSameNode($memory_item)) {
continue 2; // Skip next part and continue while loop if memory contains $current
}
}
$memory[] = $current;
if (! $current->hasAttribute('itemscope')) {
$pending = array_merge($pending, $current->getChildElementNodes());
}
if ($current->hasAttribute('itemprop') && $current->hasPropertyNames()) {
$results[] = $current;
}
}
return array_reverse($results);
}
/**
* @return bool
*/
public function hasPropertyNames() : bool
{
return !empty($this->tokenizeAttribute('itemprop'));
}
/**
* @see https://www.w3.org/TR/2018/WD-microdata-20180426/#dfn-property-name
*
* @return array
*/
public function getPropertyNames() : array
{
$tokens = $this->tokenizeAttribute('itemprop');
$properties = [];
foreach ($tokens as $token) {
if (!$this->isAbsoluteUri($token) && $this->tokenizeAttribute('itemtype')) {
$token = /*$vocabularyIdentifier . */ $token;
}
$properties[] = $token;
}
return array_unique($properties);
}
/**
* @see https://www.w3.org/TR/2018/WD-microdata-20180426/#dfn-property-value for details of algorithm
*
* @return $this|string
*/
public function getPropertyValue()
{
if ($this->hasAttribute('itemscope')) {
return $this;
}
if ($this->hasAttribute('content')) {
return $this->getAttribute('content');
}
$base = $this->ownerDocument->documentURI;
$value = '';
if (\array_key_exists($this->tagName, self::$tagNameLookup)) {
$attribute = self::$tagNameLookup[$this->tagName];
$value = $this->getAttribute($attribute);
if (!empty($value) && \in_array($attribute, self::$absoluteAttributes) && !$this->isAbsoluteUri($value)) {
$value = $base . $value;
}
}
return $value ?: $this->textContent;
}
/**
* Checks a string to see if its absolute uri or not
* Note: As it uses a simple regex to check, it is not that reliable
*
* @see \preg_match() for return values
*
* @param string $uri
*
* @return false|int
*/
protected function isAbsoluteUri(string $uri)
{
return preg_match("/^\w+:/", trim($uri));
}
/**
* Filters out TextNodes etc. and returns child ElementNodes as array
*
* @return array Result array which contains child ElementNodes
*/
protected function getChildElementNodes()
{
$childNodes = [];
foreach ($this->childNodes as $childNode) {
if ($childNode->nodeType == XML_ELEMENT_NODE) {
$childNodes[] = $childNode;
}
}
return $childNodes;
}
/**
* Tokenizes value of given attribute
*
* @param string $attributeName Name of the attribute
*
* @return array|array[]|false|string[]
*/
public function tokenizeAttribute(string $attributeName)
{
$attribute = [];
if ($this->hasAttribute($attributeName)) {
$attribute = $this->tokenize($this->getAttribute($attributeName));
}
return $attribute;
}
/**
* Splits given attribute value in space characters to array
*
* @see \preg_split() for possible return values and behaviour
*
* @see https://www.w3.org/TR/2018/WD-microdata-20180426/#dfn-split-a-string-on-spaces for definition of tokens
*
* @param string $attribute
*
* @return array[]|false|string[]
*/
protected function tokenize(string $attribute)
{
return preg_split('/\s+/', trim($attribute));
}
/**
* Finds the nodes that this node references through the document
*
* @see https://www.w3.org/TR/microdata/#dfn-item-properties 4th step
*
* @return array
*/
protected function getReferenceNodes(): array
{
$referenceNodes = [];
if ($this->hasAttribute('itemref')) {
$tokens = $this->tokenizeAttribute('itemref');
foreach ($tokens as $token) {
$references = $this->ownerDocument->xpath->query('//*[@id="' . $token . '"]');
if ($first = $references->item(0)) {
$referenceNodes[] = $first;
}
}
}
return $referenceNodes;
}
}