2022-09-03 00:28:03 +00:00
|
|
|
<?php
|
|
|
|
|
|
|
|
namespace NoccyLabs\Dataset;
|
|
|
|
|
2022-10-31 01:58:34 +00:00
|
|
|
use Iterator;
|
2022-09-03 12:39:08 +00:00
|
|
|
use NoccyLabs\Dataset\Readers\CsvReader;
|
|
|
|
use NoccyLabs\Dataset\Readers\JsonReader;
|
2022-09-03 00:28:03 +00:00
|
|
|
|
2022-10-31 01:58:34 +00:00
|
|
|
/**
|
|
|
|
*
|
|
|
|
*
|
|
|
|
*
|
|
|
|
*/
|
2022-09-03 00:28:03 +00:00
|
|
|
class Dataset
|
|
|
|
{
|
2022-10-30 23:42:29 +00:00
|
|
|
protected string $packageName;
|
|
|
|
|
|
|
|
protected string $datasetName;
|
|
|
|
|
2022-09-03 12:11:20 +00:00
|
|
|
protected string $identifier;
|
|
|
|
|
|
|
|
protected array $options;
|
2022-09-03 00:28:03 +00:00
|
|
|
|
2022-10-31 01:58:34 +00:00
|
|
|
protected string $version;
|
2022-10-30 22:02:14 +00:00
|
|
|
|
2022-10-31 01:58:34 +00:00
|
|
|
/**
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* @param string $identifier The identifier for the dataset (vendor/package#dataset)
|
|
|
|
* @param array $options Configured options for the dataset
|
|
|
|
* @param string|null $version The package version
|
|
|
|
*/
|
2022-10-30 22:02:14 +00:00
|
|
|
public function __construct(string $identifier, array $options, ?string $version=null)
|
2022-09-03 00:28:03 +00:00
|
|
|
{
|
|
|
|
$this->identifier = $identifier;
|
2022-09-03 12:11:20 +00:00
|
|
|
$this->options = $options;
|
2022-10-31 01:58:34 +00:00
|
|
|
$this->version = $version??"0.0.0.0";
|
2022-10-30 23:42:29 +00:00
|
|
|
|
|
|
|
[$this->packageName, $this->datasetName] = explode("#", $identifier, 2);
|
2022-09-03 00:28:03 +00:00
|
|
|
}
|
|
|
|
|
2022-10-31 01:58:34 +00:00
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @return string
|
|
|
|
*/
|
2022-09-03 00:28:03 +00:00
|
|
|
public function getIdentifier(): string
|
|
|
|
{
|
|
|
|
return $this->identifier;
|
|
|
|
}
|
|
|
|
|
2022-10-31 01:58:34 +00:00
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @return string
|
|
|
|
*/
|
2022-10-30 23:42:29 +00:00
|
|
|
public function getPackageName(): string
|
|
|
|
{
|
|
|
|
return $this->packageName;
|
|
|
|
}
|
|
|
|
|
2022-10-31 01:58:34 +00:00
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @return string
|
|
|
|
*/
|
2022-10-30 23:42:29 +00:00
|
|
|
public function getDatasetName(): string
|
|
|
|
{
|
|
|
|
return $this->datasetName;
|
|
|
|
}
|
|
|
|
|
2022-10-31 01:58:34 +00:00
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @return string
|
|
|
|
*/
|
|
|
|
public function getVersion(): string
|
2022-10-30 22:02:14 +00:00
|
|
|
{
|
|
|
|
return $this->version;
|
|
|
|
}
|
|
|
|
|
2022-10-31 01:58:34 +00:00
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @return string|null
|
|
|
|
*/
|
2022-10-30 23:42:29 +00:00
|
|
|
public function getComment(): ?string
|
|
|
|
{
|
|
|
|
return array_key_exists('comment', $this->options) ? $this->options['comment'] : null;
|
|
|
|
}
|
|
|
|
|
2022-10-31 01:58:34 +00:00
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @return string|null
|
|
|
|
*/
|
2022-10-30 23:42:29 +00:00
|
|
|
public function getLicense(): ?string
|
|
|
|
{
|
|
|
|
return array_key_exists('license', $this->options) ? $this->options['license'] : null;
|
|
|
|
}
|
|
|
|
|
2022-10-31 01:58:34 +00:00
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @return Iterator
|
|
|
|
*/
|
|
|
|
public function open(): Iterator
|
2022-09-03 12:39:08 +00:00
|
|
|
{
|
|
|
|
$filename = $this->options['filename'];
|
|
|
|
$reader = $this->determineReaderForFile($filename);
|
|
|
|
$inst = new $reader($filename, $this->options);
|
|
|
|
return $inst;
|
|
|
|
}
|
|
|
|
|
2022-10-31 01:58:34 +00:00
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @param string $filename
|
|
|
|
* @return string
|
|
|
|
*/
|
2022-09-03 12:39:08 +00:00
|
|
|
private function determineReaderForFile(string $filename): string
|
|
|
|
{
|
|
|
|
if ($reader = $this->options['reader']??null) {
|
|
|
|
return $reader;
|
|
|
|
}
|
|
|
|
$ext = pathinfo($filename, PATHINFO_EXTENSION);
|
|
|
|
return match ($ext) {
|
|
|
|
'json' => JsonReader::class,
|
|
|
|
'csv' => CsvReader::class,
|
|
|
|
default => throw new \RuntimeException("Unable to determine reader for dataset file")
|
|
|
|
};
|
|
|
|
}
|
2022-09-03 00:28:03 +00:00
|
|
|
|
|
|
|
}
|