Moved document parsing logic to DocumentParser

This commit is contained in:
Yusuf Kandemir 2018-12-27 09:36:42 +03:00
parent d8eaf093da
commit 73899ce987
2 changed files with 59 additions and 47 deletions

View File

@ -1,47 +0,0 @@
<?php
namespace YusufKandemir\MicrodataParser;
class MicrodataDOMDocument extends \DOMDocument
{
/** @var \DOMXPath */
public $xpath;
/**
* Get top-level items of the document
*
* @see https://www.w3.org/TR/2018/WD-microdata-20180426/#dfn-top-level-microdata-item
*
* @return \DOMNodeList List of top level items as elements
*/
public function getItems() : \DOMNodeList
{
return $this->xpath->query('//*[@itemscope and not(@itemprop)]');
}
/**
* {@inheritdoc}
* Also assigns $xpath with DOMXPath of freshly loaded DOMDocument
*/
public function loadHTML($source, $options = 0)
{
$return = parent::loadHTML($source, $options);
$this->xpath = new \DOMXPath($this);
return $return;
}
/**
* {@inheritdoc}
* Also assigns $xpath with DOMXPath of freshly loaded DOMDocument
*/
public function loadHTMLFile($filename, $options = 0)
{
$return = parent::loadHTMLFile($filename, $options);
$this->xpath = new \DOMXPath($this);
return $return;
}
}

View File

@ -0,0 +1,59 @@
<?php
namespace YusufKandemir\MicrodataParser;
class MicrodataDocumentParser
{
/** @var \DOMDocument */
protected $dom;
/** @var \DOMXPath */
protected $xpath;
/** @var MicrodataElementParser */
protected $elementParser;
/**
* MicrodataParser constructor.
*
* @param \DOMDocument $dom DOMDocument to be parsed
* @param MicrodataElementParser|null $elementParser
*/
public function __construct(\DOMDocument $dom, MicrodataElementParser $elementParser = null)
{
$this->dom = $dom;
$this->xpath = new \DOMXPath($this->dom);
$this->elementParser = $elementParser ?? new MicrodataElementParser;
}
/**
* Parses microdata and returns result as object
*
* @return \stdClass
*/
public function parse() : \stdClass
{
$result = new \stdClass;
$result->items = [];
foreach ($this->getTopLevelItems() as $item) {
$result->items[] = $this->elementParser->parse($item);
}
return $result;
}
/**
* Finds top level items in document
*
* @see https://www.w3.org/TR/2018/WD-microdata-20180426/#dfn-top-level-microdata-item
*
* @return \DOMNodeList
*/
protected function getTopLevelItems() : \DOMNodeList
{
return $this->xpath->query('//*[@itemscope and not(@itemprop)]');
}
}