php-dataset/src/Readers/CsvReader.php

106 lines
2.8 KiB
PHP

<?php
namespace NoccyLabs\Dataset\Readers;
use NoccyLabs\Dataset\ReaderInterface;
class CsvReader implements ReaderInterface
{
private array $files = [];
private array $options = [];
private int $currentFile = 0;
private ?int $loadedFile = null;
private array $data = [];
private int $currentIndex = 0;
private int $counter = 0;
public function __construct(string $filename, array $options)
{
$this->files = glob($filename);
$this->options = $options;
}
private function checkLoadedSlice()
{
// If the current file is the loaded file, we're already set
if ($this->currentFile === $this->loadedFile) return;
if ($this->currentFile >= count($this->files)) {
//printf("Reached end of set at slice=%d\n", $this->currentFile);
return;
}
$file = $this->files[$this->currentFile];
$csv = @file($file, FILE_SKIP_EMPTY_LINES|FILE_IGNORE_NEW_LINES);
$this->loadData($csv);
$this->loadedFile = $this->currentFile;
//printf("loaded slice %d: %s\n", $this->currentFile, $file);
}
private function loadData(array $data)
{
$separator = $this->options['separator']??',';
$enclosure = $this->options['enclosure']??'"';
$escape = $this->options['escape']??"\\";
$head = str_getcsv(array_shift($data));
$this->data = [];
foreach ($data as $row) {
if ($row) {
$row = str_getcsv($row, $separator, $enclosure, $escape);
$this->data[] = array_combine($head, $row);
}
}
$this->currentIndex = 0;
}
public function rewind(): void
{
$this->currentFile = 0;
$this->currentIndex = 0;
$this->counter = 0;
//printf("Rewinding to slice=%d index=%d\n", $this->currentFile, $this->currentIndex);
$this->checkLoadedSlice();
}
public function key()
{
//$this->checkLoadedSlice();
return $this->counter;
}
public function current()
{
//$this->checkLoadedSlice();
return $this->data[$this->currentIndex];
}
public function next(): void
{
$this->counter++;
$this->currentIndex++;
if ($this->currentIndex >= count($this->data)) {
$this->currentFile++;
$this->currentIndex = 0;
//printf("Rolling over to slice=%d index=%d counter=%d\n", $this->currentFile, $this->currentIndex, $this->counter);
}
//$this->checkLoadedSlice();
}
public function valid(): bool
{
$this->checkLoadedSlice();
return ($this->currentFile < count($this->files) && ($this->currentIndex < count($this->data)));
}
}