* @copyright (c) 2022, NoccyLabs * @package noccylabs/dataset */ class DatasetManager { private static array $packageVersions = []; private static array $datasets = []; public function __construct() { if (count(self::$datasets) == 0) { $this->scanForDatasets(); } } /** * Return all the available datasets * * @return Array The available datasets */ public function getAvailableDatasets(): array { return self::$datasets; } /** * Directly return a reader for a specific dataset. * * @param string $identifier The dataset identifier * @return Iterator A reader for the data * @throws InvalidDatasetException if the dataset can not be opened * @throws UnknownDatasetException if the dataset does not exist */ public function openDataset(string $identifier): Iterator { return $this->getDataset($identifier)->open(); } /** * Return a Dataset object containing metadata and methods to retrieve * a reader for the data in the set. * * @param string $identifier The dataset identifier * @throws UnknownDatasetException if the dataset does not exist */ public function getDataset(string $identifier): Dataset { if (!array_key_exists($identifier, self::$datasets)) { throw UnknownDatasetException::DatasetNotFound(); } return self::$datasets[$identifier]; } /** * Find the vendor directory and try to locate all bundled datasets * * */ private function scanForDatasets() { $root = $this->determineVendorPath(); if (!$root) { // Skip loading if we couldn't determine the root return; } $glob = glob($root."/*/*/dataset.json"); self::$packageVersions = []; $fn = realpath($root."/composer/installed.php"); if (file_exists($fn)) { $versions = include $fn; foreach ($versions['versions'] as $name=>$version) { self::$packageVersions[$name] = $version['version']; } } foreach ($glob as $match) { $path = dirname($match); $package = basename(dirname($path))."/".basename($path); $this->scanPackageDatasets($package, $path); } } /** * * * * @param string $package The package name (org/package) * @param string $path The full path to the package (..../vendor/org/package) */ private function scanPackageDatasets(string $package, string $path) { $file = $path."/dataset.json"; $json = @json_decode( @file_get_contents($file), true ); if (!$json) { trigger_error("Unable to parse dataset.json in {$package}"); return; } $this->loadDatasets($json['datasets'], null, $package, $path); } /** * * * * */ private function loadDatasets(array $datasets, ?string $prefix, string $package, string $path) { foreach ($datasets as $name=>$options) { if (!array_key_exists('filename', $options)) { $this->loadDatasets($options, ltrim($prefix . "." . $name, "."), $package, $path); return; } $options['filename'] = $path . "/" . $options['filename']; $pn = sprintf("%s#%s", $package, ltrim($prefix.".".$name,".")); $pv = self::$packageVersions[$package]??null; $ds = new Dataset($pn, $options, $pv); $this->registerDataset($ds); } } /** * * * */ private function determineVendorPath(): ?string { $d = defined("NOCCYLABS_DATASET_TEST") ? (dirname(__DIR__)."/tests") : __DIR__; while ($d != dirname($d)) { if (file_exists($d."/autoload.php")) break; $d = dirname($d); } if (file_exists($d."/autoload.php")) { return $d; } return null; } /** * Register a dataset * * */ public function registerDataset(Dataset $dataset) { $id = $dataset->getIdentifier(); if (array_key_exists($id, self::$datasets)) { // Don't overwrite previously registered datasets. Investigate how // this can be handled better in the future. return; } self::$datasets[$id] = $dataset; } }