* @copyright (c) 2022, NoccyLabs * @package noccylabs/dataset */ class DatasetManager { private static $datasets = []; public function __construct() { if (count(self::$datasets) == 0) { $this->scanForDatasets(); } } public function getAvailableDatasets(): array { return self::$datasets; } public function openDataset(string $identifier): ReaderInterface { return $this->getDataset($identifier)->open(); } /** * Find the vendor directory and try to locate all bundled datasets * * */ private function scanForDatasets() { $root = $this->determineVendorPath(); if (!$root) { // Skip loading if we couldn't determine the root return; } $glob = glob($root."/*/*/dataset.json"); foreach ($glob as $match) { $path = dirname($match); $package = basename(dirname($path))."/".basename($path); $this->scanPackageDatasets($package, $path); } } /** * * * * @param string The package name (org/package) * @param string The full path to the package (..../vendor/org/package) */ private function scanPackageDatasets(string $package, string $path) { $file = $path."/dataset.json"; $json = @json_decode( @file_get_contents($file), true ); if (!$json) { trigger_error("Unable to parse dataset.json in {$package}"); return; } $this->loadDatasets($json['datasets'], null, $package, $path); //printf("found %d sets in %s\n", count($json['datasets']), $package); } private function loadDatasets(array $datasets, ?string $prefix, string $package, string $path) { foreach ($datasets as $name=>$info) { if (!array_key_exists('filename', $info)) { $this->loadDatasets($info, ltrim($prefix . "." . $name, "."), $package, $path); return; } $pn = sprintf("%s#%s", $package, ltrim($prefix.".".$name,".")); $ds = new Dataset($pn, $info); $this->registerDataset($ds); } } private function determineVendorPath(): ?string { if (file_exists(__DIR__."/../../../autoload.php")) { // we are installed as a composer package return dirname(__DIR__, 3); } return null; } public function registerDataset(Dataset $dataset) { $id = $dataset->getIdentifier(); if (array_key_exists($id, self::$datasets)) { // Don't overwrite previously registered datasets. Investigate how // this can be handled better in the future. return; } self::$datasets[$id] = $dataset; } /** * * * @throws InvalidDatasetException if the dataset can not be opened * @throws UnknownDatasetExcception if the dataset does not exist */ public function getDataset(string $identifier): Dataset { return self::$datasets[$identifier]; } }