134 lines
3.4 KiB
PHP
134 lines
3.4 KiB
PHP
<?php
|
|
|
|
namespace NoccyLabs\Dataset;
|
|
|
|
/**
|
|
* DatasetManager is the central class of noccylabs/dataset.
|
|
*
|
|
* @author Christopher Vagnetoft <cvagnetoft@gmail.com>
|
|
* @copyright (c) 2022, NoccyLabs
|
|
* @package noccylabs/dataset
|
|
*/
|
|
class DatasetManager
|
|
{
|
|
|
|
|
|
private static $datasets = [];
|
|
|
|
public function __construct()
|
|
{
|
|
if (count(self::$datasets) == 0) {
|
|
$this->scanForDatasets();
|
|
}
|
|
}
|
|
|
|
public function getAvailableDatasets(): array
|
|
{
|
|
return self::$datasets;
|
|
}
|
|
|
|
public function getDataset(string $identifier): Dataset
|
|
{
|
|
return self::$dataset[$identifier];
|
|
}
|
|
|
|
public function openDataset(string $identifier): ReaderInterface
|
|
{
|
|
return $this->getDataset($identifier)->open();
|
|
}
|
|
|
|
/**
|
|
* Find the vendor directory and try to locate all bundled datasets
|
|
*
|
|
*
|
|
*/
|
|
private function scanForDatasets()
|
|
{
|
|
$root = $this->determineVendorPath();
|
|
if (!$root) {
|
|
// Skip loading if we couldn't determine the root
|
|
return;
|
|
}
|
|
|
|
$glob = glob($root."/*/*/dataset.json");
|
|
|
|
foreach ($glob as $match) {
|
|
$path = dirname($match);
|
|
$package = basename(dirname($path))."/".basename($path);
|
|
$this->scanPackageDatasets($package, $path);
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
*
|
|
*
|
|
* @param string The package name (org/package)
|
|
* @param string The full path to the package (..../vendor/org/package)
|
|
*/
|
|
private function scanPackageDatasets(string $package, string $path)
|
|
{
|
|
$file = $path."/dataset.json";
|
|
$json = @json_decode(
|
|
@file_get_contents($file),
|
|
true
|
|
);
|
|
|
|
if (!$json) {
|
|
trigger_error("Unable to parse dataset.json in {$package}");
|
|
return;
|
|
}
|
|
|
|
$this->loadDatasets($json['datasets'], null, $package, $path);
|
|
//printf("found %d sets in %s\n", count($json['datasets']), $package);
|
|
|
|
}
|
|
|
|
private function loadDatasets(array $datasets, ?string $prefix, string $package, string $path)
|
|
{
|
|
foreach ($datasets as $name=>$info) {
|
|
if (!array_key_exists('filename', $info)) {
|
|
$this->loadDatasets($info, ltrim($prefix . "." . $name, "."), $package, $path);
|
|
return;
|
|
}
|
|
$pn = sprintf("%s#%s", $package, ltrim($prefix.".".$name,"."));
|
|
$ds = new Dataset($pn, $info);
|
|
$this->registerDataset($ds);
|
|
}
|
|
}
|
|
|
|
private function determineVendorPath(): ?string
|
|
{
|
|
if (file_exists(__DIR__."/../../../autoload.php")) {
|
|
// we are installed as a composer package
|
|
return dirname(__DIR__, 3);
|
|
}
|
|
return null;
|
|
}
|
|
|
|
public function registerDataset(Dataset $dataset)
|
|
{
|
|
$id = $dataset->getIdentifier();
|
|
if (array_key_exists($id, self::$datasets)) {
|
|
// Don't overwrite previously registered datasets. Investigate how
|
|
// this can be handled better in the future.
|
|
return;
|
|
}
|
|
|
|
self::$datasets[$id] = $dataset;
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
*
|
|
*
|
|
* @throws InvalidDatasetException if the dataset can not be opened
|
|
* @throws UnknownDatasetExcception if the dataset does not exist
|
|
*/
|
|
public function getDataset(string $identifier): Dataset
|
|
{
|
|
|
|
return self::$datasets[$identifier];
|
|
}
|
|
} |