More tests, filtering iterator, cleanup

This commit is contained in:
2022-10-31 02:58:34 +01:00
parent d0956f851c
commit fd2767b642
14 changed files with 340 additions and 21 deletions

View File

@ -2,9 +2,15 @@
namespace NoccyLabs\Dataset;
use Iterator;
use NoccyLabs\Dataset\Readers\CsvReader;
use NoccyLabs\Dataset\Readers\JsonReader;
/**
*
*
*
*/
class Dataset
{
protected string $packageName;
@ -15,48 +21,83 @@ class Dataset
protected array $options;
protected ?string $version;
protected string $version;
/**
*
*
* @param string $identifier The identifier for the dataset (vendor/package#dataset)
* @param array $options Configured options for the dataset
* @param string|null $version The package version
*/
public function __construct(string $identifier, array $options, ?string $version=null)
{
$this->identifier = $identifier;
$this->options = $options;
$this->version = $version;
$this->version = $version??"0.0.0.0";
[$this->packageName, $this->datasetName] = explode("#", $identifier, 2);
}
/**
*
* @return string
*/
public function getIdentifier(): string
{
return $this->identifier;
}
/**
*
* @return string
*/
public function getPackageName(): string
{
return $this->packageName;
}
/**
*
* @return string
*/
public function getDatasetName(): string
{
return $this->datasetName;
}
public function getVersion(): ?string
/**
*
* @return string
*/
public function getVersion(): string
{
return $this->version;
}
/**
*
* @return string|null
*/
public function getComment(): ?string
{
return array_key_exists('comment', $this->options) ? $this->options['comment'] : null;
}
/**
*
* @return string|null
*/
public function getLicense(): ?string
{
return array_key_exists('license', $this->options) ? $this->options['license'] : null;
}
public function open(): ReaderInterface
/**
*
* @return Iterator
*/
public function open(): Iterator
{
$filename = $this->options['filename'];
$reader = $this->determineReaderForFile($filename);
@ -64,6 +105,11 @@ class Dataset
return $inst;
}
/**
*
* @param string $filename
* @return string
*/
private function determineReaderForFile(string $filename): string
{
if ($reader = $this->options['reader']??null) {

View File

@ -2,6 +2,8 @@
namespace NoccyLabs\Dataset;
use Iterator;
/**
* DatasetManager is the central class of noccylabs/dataset.
*
@ -36,11 +38,11 @@ class DatasetManager
* Directly return a reader for a specific dataset.
*
* @param string $identifier The dataset identifier
* @return ReaderInterface A reader for the data
* @return Iterator A reader for the data
* @throws InvalidDatasetException if the dataset can not be opened
* @throws UnknownDatasetException if the dataset does not exist
*/
public function openDataset(string $identifier): ReaderInterface
public function openDataset(string $identifier): Iterator
{
return $this->getDataset($identifier)->open();
}
@ -157,6 +159,11 @@ class DatasetManager
return null;
}
/**
* Register a dataset
*
*
*/
public function registerDataset(Dataset $dataset)
{
$id = $dataset->getIdentifier();

View File

@ -0,0 +1,92 @@
<?php
namespace NoccyLabs\Dataset;
use Iterator;
class FilteringReaderIterator implements Iterator
{
private Iterator $reader;
private $condition;
public function __construct(Iterator $reader, array|callable $condition)
{
$this->reader = $reader;
$this->condition = $condition;
}
public function current(): mixed
{
return $this->reader->current();
}
public function key(): mixed
{
return $this->reader->key();
}
public function valid(): bool
{
return $this->reader->valid();
}
public function next(): void
{
$this->reader->next();
while ($this->reader->valid()) {
$curr = $this->reader->current();
if ($this->matchCondition($curr))
break;
$this->reader->next();
}
}
public function rewind(): void
{
$this->reader->rewind();
while ($this->reader->valid()) {
$curr = $this->reader->current();
if ($this->matchCondition($curr))
break;
$this->reader->next();
}
}
/**
* Test a condition against a row
*
* The matched conditions are:
* - string/bool/int/float - match value
* - array - all conditions in array must match:
* - eq equals
* - neq not equals
* - gt greater than
* - gte greater than or equal
* - lt less than
* - in value in array
* - nin value not in array
*/
private function matchCondition(array $row): bool
{
if (is_callable($this->condition)) {
return (bool)call_user_func($this->condition, $row);
}
foreach ($this->condition as $field=>$test) {
if (!array_key_exists($field, $row)) continue;
if (is_array($test)) {
if (array_key_exists('eq',$test) && $row[$field] != $test['eq']) return false;
if (array_key_exists('neq',$test) && $row[$field] == $test['neq']) return false;
if (array_key_exists('gt',$test) && $row[$field] <= $test['gt']) return false;
if (array_key_exists('gte',$test) && $row[$field] < $test['gte']) return false;
if (array_key_exists('lt',$test) && $row[$field] >= $test['lt']) return false;
if (array_key_exists('lte',$test) && $row[$field] > $test['lte']) return false;
if (array_key_exists('in',$test) && !in_array($row[$field], $test['in'])) return false;
if (array_key_exists('nin',$test) && in_array($row[$field], $test['in'])) return false;
} else {
if ($row[$field] != $test) return false;
}
}
return true;
}
}

View File

@ -45,12 +45,16 @@ class CsvReader implements ReaderInterface
private function loadData(array $data)
{
// FIXME parse data according to directives if present
$separator = $this->options['separator']??',';
$enclosure = $this->options['enclosure']??'"';
$escape = $this->options['escape']??"\\";
$head = str_getcsv(array_shift($data));
$this->data = [];
foreach ($data as $row) {
if ($row) {
$row = str_getcsv($row);
$row = str_getcsv($row, $separator, $enclosure, $escape);
$this->data[] = array_combine($head, $row);
}
}

View File

@ -35,8 +35,10 @@ class JsonReader implements ReaderInterface
//printf("Reached end of set at slice=%d\n", $this->currentFile);
return;
}
$flags = ($this->options['bigintAsString']??false)?JSON_BIGINT_AS_STRING:0;
$file = $this->files[$this->currentFile];
$json = @json_decode(@file_get_contents($file), true);
$json = @json_decode(@file_get_contents($file), true, flags:$flags);
$this->loadData($json);
$this->loadedFile = $this->currentFile;