Docblocking and type hinting
This commit is contained in:
parent
59d302176d
commit
9eaf3cbcfa
@ -4,7 +4,15 @@ namespace YusufKandemir\MicrodataParser;
|
|||||||
|
|
||||||
abstract class Microdata
|
abstract class Microdata
|
||||||
{
|
{
|
||||||
public static function fromHTML($html, $documentURI = '')
|
/**
|
||||||
|
* Creates a MicrodataParser from HTML string
|
||||||
|
*
|
||||||
|
* @param string $html HTML string to be parsed
|
||||||
|
* @param string $documentURI DocumentURI to be used in absolutizing URIs
|
||||||
|
*
|
||||||
|
* @return MicrodataParser
|
||||||
|
*/
|
||||||
|
public static function fromHTML(string $html, string $documentURI = '') : MicrodataParser
|
||||||
{
|
{
|
||||||
$dom = new MicrodataDOMDocument;
|
$dom = new MicrodataDOMDocument;
|
||||||
$dom->loadHTML($html, LIBXML_NOERROR);
|
$dom->loadHTML($html, LIBXML_NOERROR);
|
||||||
@ -13,7 +21,15 @@ abstract class Microdata
|
|||||||
return new MicrodataParser($dom);
|
return new MicrodataParser($dom);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static function fromHTMLFile($filename, $documentURI = '')
|
/**
|
||||||
|
* Creates a MicrodataParser from a HTML file
|
||||||
|
*
|
||||||
|
* @param string $filename Path to the file to be parsed
|
||||||
|
* @param string $documentURI DocumentURI to be used in absolutizing URIs
|
||||||
|
*
|
||||||
|
* @return MicrodataParser
|
||||||
|
*/
|
||||||
|
public static function fromHTMLFile(string $filename, string $documentURI = '') : MicrodataParser
|
||||||
{
|
{
|
||||||
$dom = new MicrodataDOMDocument;
|
$dom = new MicrodataDOMDocument;
|
||||||
$dom->loadHTMLFile($filename, LIBXML_NOERROR);
|
$dom->loadHTMLFile($filename, LIBXML_NOERROR);
|
||||||
@ -22,7 +38,16 @@ abstract class Microdata
|
|||||||
return new MicrodataParser($dom);
|
return new MicrodataParser($dom);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static function fromDOMDocument(\DOMDocument $domDocument)
|
/**
|
||||||
|
* Creates a MicrodataParser from a DOMDocument instance.
|
||||||
|
* If you have MicrodataDOMDocument then instantiate MicrodataParser class directly to avoid conversion.
|
||||||
|
*
|
||||||
|
* @param \DOMDocument $domDocument DOMDocument to be parsed.
|
||||||
|
* Needs to have documentURI property to be used in absolutizing URIs if wanted.
|
||||||
|
*
|
||||||
|
* @return MicrodataParser
|
||||||
|
*/
|
||||||
|
public static function fromDOMDocument(\DOMDocument $domDocument) : MicrodataParser
|
||||||
{
|
{
|
||||||
$dom = new MicrodataDOMDocument;
|
$dom = new MicrodataDOMDocument;
|
||||||
$importedNode = $dom->importNode($domDocument->documentElement, true);
|
$importedNode = $dom->importNode($domDocument->documentElement, true);
|
||||||
|
@ -4,13 +4,23 @@ namespace YusufKandemir\MicrodataParser;
|
|||||||
|
|
||||||
class MicrodataDOMDocument extends \DOMDocument
|
class MicrodataDOMDocument extends \DOMDocument
|
||||||
{
|
{
|
||||||
|
/** @var \DOMXPath */
|
||||||
public $xpath;
|
public $xpath;
|
||||||
|
|
||||||
public function getItems()
|
/**
|
||||||
|
* Get top-level items of the document
|
||||||
|
*
|
||||||
|
* @return \DOMNodeList List of top level items as elements
|
||||||
|
*/
|
||||||
|
public function getItems() : \DOMNodeList
|
||||||
{
|
{
|
||||||
return $this->xpath->query('//*[@itemscope and not(@itemprop)]');
|
return $this->xpath->query('//*[@itemscope and not(@itemprop)]');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritdoc}
|
||||||
|
* Also assigns $xpath with DOMXPath of freshly loaded DOMDocument
|
||||||
|
*/
|
||||||
public function loadHTML($source, $options = 0)
|
public function loadHTML($source, $options = 0)
|
||||||
{
|
{
|
||||||
$return = parent::loadHTML($source, $options);
|
$return = parent::loadHTML($source, $options);
|
||||||
@ -20,6 +30,10 @@ class MicrodataDOMDocument extends \DOMDocument
|
|||||||
return $return;
|
return $return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritdoc}
|
||||||
|
* Also assigns $xpath with DOMXPath of freshly loaded DOMDocument
|
||||||
|
*/
|
||||||
public function loadHTMLFile($filename, $options = 0)
|
public function loadHTMLFile($filename, $options = 0)
|
||||||
{
|
{
|
||||||
$return = parent::loadHTMLFile($filename, $options);
|
$return = parent::loadHTMLFile($filename, $options);
|
||||||
|
@ -4,7 +4,10 @@ namespace YusufKandemir\MicrodataParser;
|
|||||||
|
|
||||||
class MicrodataDOMElement extends \DOMElement
|
class MicrodataDOMElement extends \DOMElement
|
||||||
{
|
{
|
||||||
public function getProperties()
|
/**
|
||||||
|
* @return array
|
||||||
|
*/
|
||||||
|
public function getProperties() : array
|
||||||
{
|
{
|
||||||
$results = [];
|
$results = [];
|
||||||
$memory = [$this];
|
$memory = [$this];
|
||||||
@ -45,12 +48,18 @@ class MicrodataDOMElement extends \DOMElement
|
|||||||
return array_reverse($results);
|
return array_reverse($results);
|
||||||
}
|
}
|
||||||
|
|
||||||
public function hasPropertyNames()
|
/**
|
||||||
|
* @return bool
|
||||||
|
*/
|
||||||
|
public function hasPropertyNames() : bool
|
||||||
{
|
{
|
||||||
return !empty($this->tokenizeAttribute('itemprop'));
|
return !empty($this->tokenizeAttribute('itemprop'));
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getPropertyNames()
|
/**
|
||||||
|
* @return array
|
||||||
|
*/
|
||||||
|
public function getPropertyNames() : array
|
||||||
{
|
{
|
||||||
$tokens = $this->tokenizeAttribute('itemprop');
|
$tokens = $this->tokenizeAttribute('itemprop');
|
||||||
|
|
||||||
@ -67,6 +76,9 @@ class MicrodataDOMElement extends \DOMElement
|
|||||||
return array_unique($properties);
|
return array_unique($properties);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return $this|string
|
||||||
|
*/
|
||||||
public function getPropertyValue()
|
public function getPropertyValue()
|
||||||
{
|
{
|
||||||
if ($this->hasAttribute('itemscope')) {
|
if ($this->hasAttribute('itemscope')) {
|
||||||
@ -126,11 +138,26 @@ class MicrodataDOMElement extends \DOMElement
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks a string to see if its absolute uri or not
|
||||||
|
* Note: As it uses a simple regex to check, it is not that reliable
|
||||||
|
*
|
||||||
|
* @see \preg_match() for return values
|
||||||
|
*
|
||||||
|
* @param string $uri
|
||||||
|
*
|
||||||
|
* @return false|int
|
||||||
|
*/
|
||||||
protected function isAbsoluteUri(string $uri)
|
protected function isAbsoluteUri(string $uri)
|
||||||
{
|
{
|
||||||
return preg_match("/^\w+:/", trim($uri));
|
return preg_match("/^\w+:/", trim($uri));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Filters out TextNodes etc. and returns child ElementNodes as array
|
||||||
|
*
|
||||||
|
* @return array Result array which contains child ElementNodes
|
||||||
|
*/
|
||||||
protected function getChildElementNodes()
|
protected function getChildElementNodes()
|
||||||
{
|
{
|
||||||
$childNodes = [];
|
$childNodes = [];
|
||||||
@ -144,7 +171,14 @@ class MicrodataDOMElement extends \DOMElement
|
|||||||
return $childNodes;
|
return $childNodes;
|
||||||
}
|
}
|
||||||
|
|
||||||
public function tokenizeAttribute($attributeName)
|
/**
|
||||||
|
* Tokenizes value of given attribute
|
||||||
|
*
|
||||||
|
* @param string $attributeName Name of the attribute
|
||||||
|
*
|
||||||
|
* @return array|array[]|false|string[]
|
||||||
|
*/
|
||||||
|
public function tokenizeAttribute(string $attributeName)
|
||||||
{
|
{
|
||||||
$attribute = [];
|
$attribute = [];
|
||||||
|
|
||||||
@ -155,7 +189,16 @@ class MicrodataDOMElement extends \DOMElement
|
|||||||
return $attribute;
|
return $attribute;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function tokenize($attribute)
|
/**
|
||||||
|
* Splits given attribute value in space characters to array
|
||||||
|
*
|
||||||
|
* @see \preg_split() for possible return values and behaviour
|
||||||
|
*
|
||||||
|
* @param string $attribute
|
||||||
|
*
|
||||||
|
* @return array[]|false|string[]
|
||||||
|
*/
|
||||||
|
protected function tokenize(string $attribute)
|
||||||
{
|
{
|
||||||
return preg_split('/\s+/', trim($attribute));
|
return preg_split('/\s+/', trim($attribute));
|
||||||
}
|
}
|
||||||
|
@ -4,8 +4,14 @@ namespace YusufKandemir\MicrodataParser;
|
|||||||
|
|
||||||
class MicrodataParser
|
class MicrodataParser
|
||||||
{
|
{
|
||||||
|
/** @var MicrodataDOMDocument */
|
||||||
protected $dom;
|
protected $dom;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* MicrodataParser constructor.
|
||||||
|
*
|
||||||
|
* @param MicrodataDOMDocument $dom
|
||||||
|
*/
|
||||||
public function __construct(MicrodataDOMDocument $dom)
|
public function __construct(MicrodataDOMDocument $dom)
|
||||||
{
|
{
|
||||||
$dom->registerNodeClass(\DOMDocument::class, MicrodataDOMDocument::class);
|
$dom->registerNodeClass(\DOMDocument::class, MicrodataDOMDocument::class);
|
||||||
@ -14,23 +20,46 @@ class MicrodataParser
|
|||||||
$this->dom = $dom;
|
$this->dom = $dom;
|
||||||
}
|
}
|
||||||
|
|
||||||
public function toArray()
|
/**
|
||||||
|
* Extracts and converts microdata to associative array
|
||||||
|
*
|
||||||
|
* @return array
|
||||||
|
*/
|
||||||
|
public function toArray() : array
|
||||||
{
|
{
|
||||||
// Somewhat hacky way to convert deep objects
|
// Somewhat hacky way to convert deep objects
|
||||||
return json_decode(json_encode($this->extractMicrodata()), true);
|
return json_decode(json_encode($this->extractMicrodata()), true);
|
||||||
}
|
}
|
||||||
|
|
||||||
public function toObject()
|
/**
|
||||||
|
* Extracts and converts microdata to object
|
||||||
|
*
|
||||||
|
* @return \stdClass
|
||||||
|
*/
|
||||||
|
public function toObject() : \stdClass
|
||||||
{
|
{
|
||||||
return $this->extractMicrodata();
|
return $this->extractMicrodata();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts and converts microdata to json using \json_encode()
|
||||||
|
*
|
||||||
|
* @see \json_encode() to description of parameters and return values
|
||||||
|
*
|
||||||
|
* @param int $options
|
||||||
|
* @param int $depth
|
||||||
|
*
|
||||||
|
* @return false|string
|
||||||
|
*/
|
||||||
public function toJSON($options = 0, $depth = 512)
|
public function toJSON($options = 0, $depth = 512)
|
||||||
{
|
{
|
||||||
return json_encode($this->extractMicrodata(), $options, $depth);
|
return json_encode($this->extractMicrodata(), $options, $depth);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function extractMicrodata()
|
/**
|
||||||
|
* @return \stdClass
|
||||||
|
*/
|
||||||
|
protected function extractMicrodata() : \stdClass
|
||||||
{
|
{
|
||||||
$result = new \stdClass;
|
$result = new \stdClass;
|
||||||
|
|
||||||
@ -43,7 +72,13 @@ class MicrodataParser
|
|||||||
return $result;
|
return $result;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function getObject(MicrodataDOMElement $item, $memory = [])
|
/**
|
||||||
|
* @param MicrodataDOMElement $item
|
||||||
|
* @param array $memory
|
||||||
|
*
|
||||||
|
* @return \stdClass
|
||||||
|
*/
|
||||||
|
protected function getObject(MicrodataDOMElement $item, $memory = []) : \stdClass
|
||||||
{
|
{
|
||||||
$result = new \stdClass;
|
$result = new \stdClass;
|
||||||
|
|
||||||
@ -85,7 +120,14 @@ class MicrodataParser
|
|||||||
return $result;
|
return $result;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function isItem($element)
|
/**
|
||||||
|
* Check if the given parameter is a MicrodataDOMElement and has itemscope attribute
|
||||||
|
*
|
||||||
|
* @param $element
|
||||||
|
*
|
||||||
|
* @return bool
|
||||||
|
*/
|
||||||
|
protected function isItem($element) : bool
|
||||||
{
|
{
|
||||||
return $element instanceof MicrodataDOMElement && $element->hasAttribute('itemscope');
|
return $element instanceof MicrodataDOMElement && $element->hasAttribute('itemscope');
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user