php-dataset/src/DatasetManager.php

180 lines
4.6 KiB
PHP

<?php
namespace NoccyLabs\Dataset;
use Iterator;
/**
* DatasetManager is the central class of noccylabs/dataset.
*
* @author Christopher Vagnetoft <cvagnetoft@gmail.com>
* @copyright (c) 2022, NoccyLabs
* @package noccylabs/dataset
*/
class DatasetManager
{
private static array $packageVersions = [];
private static array $datasets = [];
public function __construct()
{
if (count(self::$datasets) == 0) {
$this->scanForDatasets();
}
}
/**
* Return all the available datasets
*
* @return Array<Dataset> The available datasets
*/
public function getAvailableDatasets(): array
{
return self::$datasets;
}
/**
* Directly return a reader for a specific dataset.
*
* @param string $identifier The dataset identifier
* @return Iterator A reader for the data
* @throws InvalidDatasetException if the dataset can not be opened
* @throws UnknownDatasetException if the dataset does not exist
*/
public function openDataset(string $identifier): Iterator
{
return $this->getDataset($identifier)->open();
}
/**
* Return a Dataset object containing metadata and methods to retrieve
* a reader for the data in the set.
*
* @param string $identifier The dataset identifier
* @throws UnknownDatasetException if the dataset does not exist
*/
public function getDataset(string $identifier): Dataset
{
if (!array_key_exists($identifier, self::$datasets)) {
throw UnknownDatasetException::DatasetNotFound();
}
return self::$datasets[$identifier];
}
/**
* Find the vendor directory and try to locate all bundled datasets
*
*
*/
private function scanForDatasets()
{
$root = $this->determineVendorPath();
if (!$root) {
// Skip loading if we couldn't determine the root
return;
}
$glob = glob($root."/*/*/dataset.json");
self::$packageVersions = [];
$fn = realpath($root."/composer/installed.php");
if (file_exists($fn)) {
$versions = include $fn;
foreach ($versions['versions'] as $name=>$version) {
self::$packageVersions[$name] = $version['version'];
}
}
foreach ($glob as $match) {
$path = dirname($match);
$package = basename(dirname($path))."/".basename($path);
$this->scanPackageDatasets($package, $path);
}
}
/**
*
*
*
* @param string $package The package name (org/package)
* @param string $path The full path to the package (..../vendor/org/package)
*/
private function scanPackageDatasets(string $package, string $path)
{
$file = $path."/dataset.json";
$json = @json_decode(
@file_get_contents($file),
true
);
if (!$json) {
trigger_error("Unable to parse dataset.json in {$package}");
return;
}
$this->loadDatasets($json['datasets'], null, $package, $path);
}
/**
*
*
*
*
*/
private function loadDatasets(array $datasets, ?string $prefix, string $package, string $path)
{
foreach ($datasets as $name=>$options) {
if (!array_key_exists('filename', $options)) {
$this->loadDatasets($options, ltrim($prefix . "." . $name, "."), $package, $path);
return;
}
$options['filename'] = $path . "/" . $options['filename'];
$pn = sprintf("%s#%s", $package, ltrim($prefix.".".$name,"."));
$pv = self::$packageVersions[$package]??null;
$ds = new Dataset($pn, $options, $pv);
$this->registerDataset($ds);
}
}
/**
*
*
*
*/
private function determineVendorPath(): ?string
{
$d = defined("NOCCYLABS_DATASET_TEST") ? (dirname(__DIR__)."/tests") : __DIR__;
while ($d != dirname($d)) {
if (file_exists($d."/autoload.php")) break;
$d = dirname($d);
}
if (file_exists($d."/autoload.php")) {
return $d;
}
return null;
}
/**
* Register a dataset
*
*
*/
public function registerDataset(Dataset $dataset)
{
$id = $dataset->getIdentifier();
if (array_key_exists($id, self::$datasets)) {
// Don't overwrite previously registered datasets. Investigate how
// this can be handled better in the future.
return;
}
self::$datasets[$id] = $dataset;
}
}