More tests, filtering iterator, cleanup
This commit is contained in:
@ -2,9 +2,15 @@
|
||||
|
||||
namespace NoccyLabs\Dataset;
|
||||
|
||||
use Iterator;
|
||||
use NoccyLabs\Dataset\Readers\CsvReader;
|
||||
use NoccyLabs\Dataset\Readers\JsonReader;
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
*
|
||||
*/
|
||||
class Dataset
|
||||
{
|
||||
protected string $packageName;
|
||||
@ -15,48 +21,83 @@ class Dataset
|
||||
|
||||
protected array $options;
|
||||
|
||||
protected ?string $version;
|
||||
protected string $version;
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @param string $identifier The identifier for the dataset (vendor/package#dataset)
|
||||
* @param array $options Configured options for the dataset
|
||||
* @param string|null $version The package version
|
||||
*/
|
||||
public function __construct(string $identifier, array $options, ?string $version=null)
|
||||
{
|
||||
$this->identifier = $identifier;
|
||||
$this->options = $options;
|
||||
$this->version = $version;
|
||||
$this->version = $version??"0.0.0.0";
|
||||
|
||||
[$this->packageName, $this->datasetName] = explode("#", $identifier, 2);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getIdentifier(): string
|
||||
{
|
||||
return $this->identifier;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getPackageName(): string
|
||||
{
|
||||
return $this->packageName;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getDatasetName(): string
|
||||
{
|
||||
return $this->datasetName;
|
||||
}
|
||||
|
||||
public function getVersion(): ?string
|
||||
/**
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getVersion(): string
|
||||
{
|
||||
return $this->version;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return string|null
|
||||
*/
|
||||
public function getComment(): ?string
|
||||
{
|
||||
return array_key_exists('comment', $this->options) ? $this->options['comment'] : null;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return string|null
|
||||
*/
|
||||
public function getLicense(): ?string
|
||||
{
|
||||
return array_key_exists('license', $this->options) ? $this->options['license'] : null;
|
||||
}
|
||||
|
||||
public function open(): ReaderInterface
|
||||
/**
|
||||
*
|
||||
* @return Iterator
|
||||
*/
|
||||
public function open(): Iterator
|
||||
{
|
||||
$filename = $this->options['filename'];
|
||||
$reader = $this->determineReaderForFile($filename);
|
||||
@ -64,6 +105,11 @@ class Dataset
|
||||
return $inst;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param string $filename
|
||||
* @return string
|
||||
*/
|
||||
private function determineReaderForFile(string $filename): string
|
||||
{
|
||||
if ($reader = $this->options['reader']??null) {
|
||||
|
@ -2,6 +2,8 @@
|
||||
|
||||
namespace NoccyLabs\Dataset;
|
||||
|
||||
use Iterator;
|
||||
|
||||
/**
|
||||
* DatasetManager is the central class of noccylabs/dataset.
|
||||
*
|
||||
@ -36,11 +38,11 @@ class DatasetManager
|
||||
* Directly return a reader for a specific dataset.
|
||||
*
|
||||
* @param string $identifier The dataset identifier
|
||||
* @return ReaderInterface A reader for the data
|
||||
* @return Iterator A reader for the data
|
||||
* @throws InvalidDatasetException if the dataset can not be opened
|
||||
* @throws UnknownDatasetException if the dataset does not exist
|
||||
*/
|
||||
public function openDataset(string $identifier): ReaderInterface
|
||||
public function openDataset(string $identifier): Iterator
|
||||
{
|
||||
return $this->getDataset($identifier)->open();
|
||||
}
|
||||
@ -157,6 +159,11 @@ class DatasetManager
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Register a dataset
|
||||
*
|
||||
*
|
||||
*/
|
||||
public function registerDataset(Dataset $dataset)
|
||||
{
|
||||
$id = $dataset->getIdentifier();
|
||||
|
92
src/FilteringReaderIterator.php
Normal file
92
src/FilteringReaderIterator.php
Normal file
@ -0,0 +1,92 @@
|
||||
<?php
|
||||
|
||||
namespace NoccyLabs\Dataset;
|
||||
|
||||
use Iterator;
|
||||
|
||||
class FilteringReaderIterator implements Iterator
|
||||
{
|
||||
private Iterator $reader;
|
||||
|
||||
private $condition;
|
||||
|
||||
public function __construct(Iterator $reader, array|callable $condition)
|
||||
{
|
||||
$this->reader = $reader;
|
||||
$this->condition = $condition;
|
||||
}
|
||||
|
||||
public function current(): mixed
|
||||
{
|
||||
return $this->reader->current();
|
||||
}
|
||||
|
||||
public function key(): mixed
|
||||
{
|
||||
return $this->reader->key();
|
||||
}
|
||||
|
||||
public function valid(): bool
|
||||
{
|
||||
return $this->reader->valid();
|
||||
}
|
||||
|
||||
public function next(): void
|
||||
{
|
||||
$this->reader->next();
|
||||
while ($this->reader->valid()) {
|
||||
$curr = $this->reader->current();
|
||||
if ($this->matchCondition($curr))
|
||||
break;
|
||||
$this->reader->next();
|
||||
}
|
||||
}
|
||||
|
||||
public function rewind(): void
|
||||
{
|
||||
$this->reader->rewind();
|
||||
while ($this->reader->valid()) {
|
||||
$curr = $this->reader->current();
|
||||
if ($this->matchCondition($curr))
|
||||
break;
|
||||
$this->reader->next();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test a condition against a row
|
||||
*
|
||||
* The matched conditions are:
|
||||
* - string/bool/int/float - match value
|
||||
* - array - all conditions in array must match:
|
||||
* - eq equals
|
||||
* - neq not equals
|
||||
* - gt greater than
|
||||
* - gte greater than or equal
|
||||
* - lt less than
|
||||
* - in value in array
|
||||
* - nin value not in array
|
||||
*/
|
||||
private function matchCondition(array $row): bool
|
||||
{
|
||||
if (is_callable($this->condition)) {
|
||||
return (bool)call_user_func($this->condition, $row);
|
||||
}
|
||||
foreach ($this->condition as $field=>$test) {
|
||||
if (!array_key_exists($field, $row)) continue;
|
||||
if (is_array($test)) {
|
||||
if (array_key_exists('eq',$test) && $row[$field] != $test['eq']) return false;
|
||||
if (array_key_exists('neq',$test) && $row[$field] == $test['neq']) return false;
|
||||
if (array_key_exists('gt',$test) && $row[$field] <= $test['gt']) return false;
|
||||
if (array_key_exists('gte',$test) && $row[$field] < $test['gte']) return false;
|
||||
if (array_key_exists('lt',$test) && $row[$field] >= $test['lt']) return false;
|
||||
if (array_key_exists('lte',$test) && $row[$field] > $test['lte']) return false;
|
||||
if (array_key_exists('in',$test) && !in_array($row[$field], $test['in'])) return false;
|
||||
if (array_key_exists('nin',$test) && in_array($row[$field], $test['in'])) return false;
|
||||
} else {
|
||||
if ($row[$field] != $test) return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
@ -45,12 +45,16 @@ class CsvReader implements ReaderInterface
|
||||
|
||||
private function loadData(array $data)
|
||||
{
|
||||
// FIXME parse data according to directives if present
|
||||
$separator = $this->options['separator']??',';
|
||||
$enclosure = $this->options['enclosure']??'"';
|
||||
$escape = $this->options['escape']??"\\";
|
||||
|
||||
|
||||
$head = str_getcsv(array_shift($data));
|
||||
$this->data = [];
|
||||
foreach ($data as $row) {
|
||||
if ($row) {
|
||||
$row = str_getcsv($row);
|
||||
$row = str_getcsv($row, $separator, $enclosure, $escape);
|
||||
$this->data[] = array_combine($head, $row);
|
||||
}
|
||||
}
|
||||
|
@ -35,8 +35,10 @@ class JsonReader implements ReaderInterface
|
||||
//printf("Reached end of set at slice=%d\n", $this->currentFile);
|
||||
return;
|
||||
}
|
||||
|
||||
$flags = ($this->options['bigintAsString']??false)?JSON_BIGINT_AS_STRING:0;
|
||||
$file = $this->files[$this->currentFile];
|
||||
$json = @json_decode(@file_get_contents($file), true);
|
||||
$json = @json_decode(@file_get_contents($file), true, flags:$flags);
|
||||
|
||||
$this->loadData($json);
|
||||
$this->loadedFile = $this->currentFile;
|
||||
|
Reference in New Issue
Block a user