diff --git a/src/MicrodataParser.php b/src/MicrodataParser.php new file mode 100644 index 0000000..0e5eb22 --- /dev/null +++ b/src/MicrodataParser.php @@ -0,0 +1,81 @@ +loadHTML($html, \LIBXML_NOERROR); + $dom->documentURI = $documentURI; + + return $this->parse($dom, $absoluteUriHandler); + } + + /** + * Parses HTML file, extracts microdata from it + * + * @param string $path Path to the file to be parsed + * @param string $documentURI DocumentURI to be used in absolutizing URIs + * @param callable|null $absoluteUriHandler + * + * @see MicrodataElementParser::$absoluteUriHandler + * + * @return \stdClass + */ + public function parseHTMLFile(string $path, string $documentURI = '', callable $absoluteUriHandler = null) : \stdClass + { + $dom = new \DOMDocument; + $dom->loadHTMLFile($path, \LIBXML_NOERROR); + $dom->documentURI = $documentURI; + + return $this->parse($dom, $absoluteUriHandler); + } + + /** + * Creates a MicrodataParser from a DOMDocument instance. + * If you have MicrodataDOMDocument then instantiate MicrodataParser class directly to avoid conversion. + * + * @param \DOMDocument $domDocument DOMDocument to be parsed. + * @param string $documentURI If non-empty value is provided, + * it will be new value of documentURI property of $domDocument. + * + * @return \stdClass + */ + public function parseDOMDocument(\DOMDocument $domDocument, string $documentURI = '') : \stdClass + { + if (!empty($documentURI)) { + $domDocument->documentURI = $documentURI; + } + + return $this->parse($domDocument); + } + + /** + * @param \DOMDocument $dom + * @param callable|null $absoluteUriHandler + * + * @see MicrodataElementParser::$absoluteUriHandler + * + * @return \stdClass + */ + protected function parse(\DOMDocument $dom, callable $absoluteUriHandler = null) : \stdClass + { + $elementParser = new MicrodataElementParser($absoluteUriHandler); + $documentParser = new MicrodataDocumentParser($dom, $elementParser); + + return $documentParser->parse(); + } +}