php-dataset/src/Readers/CsvReader.php

106 lines
2.8 KiB
PHP
Raw Normal View History

2022-09-03 12:39:08 +00:00
<?php
namespace NoccyLabs\Dataset\Readers;
use NoccyLabs\Dataset\ReaderInterface;
class CsvReader implements ReaderInterface
{
2022-09-03 14:12:22 +00:00
private array $files = [];
2022-10-30 23:42:29 +00:00
private array $options = [];
2022-09-03 14:12:22 +00:00
private int $currentFile = 0;
private ?int $loadedFile = null;
private array $data = [];
private int $currentIndex = 0;
private int $counter = 0;
2022-09-03 12:39:08 +00:00
public function __construct(string $filename, array $options)
{
2022-09-03 14:12:22 +00:00
$this->files = glob($filename);
2022-10-30 23:42:29 +00:00
$this->options = $options;
2022-09-03 14:12:22 +00:00
}
private function checkLoadedSlice()
{
// If the current file is the loaded file, we're already set
if ($this->currentFile === $this->loadedFile) return;
if ($this->currentFile >= count($this->files)) {
//printf("Reached end of set at slice=%d\n", $this->currentFile);
return;
}
$file = $this->files[$this->currentFile];
$csv = @file($file, FILE_SKIP_EMPTY_LINES|FILE_IGNORE_NEW_LINES);
$this->loadData($csv);
$this->loadedFile = $this->currentFile;
//printf("loaded slice %d: %s\n", $this->currentFile, $file);
}
private function loadData(array $data)
{
$separator = $this->options['separator']??',';
$enclosure = $this->options['enclosure']??'"';
$escape = $this->options['escape']??"\\";
2022-09-03 14:12:22 +00:00
$head = str_getcsv(array_shift($data));
$this->data = [];
foreach ($data as $row) {
if ($row) {
$row = str_getcsv($row, $separator, $enclosure, $escape);
2022-09-03 14:12:22 +00:00
$this->data[] = array_combine($head, $row);
}
}
$this->currentIndex = 0;
}
public function rewind(): void
{
$this->currentFile = 0;
$this->currentIndex = 0;
$this->counter = 0;
//printf("Rewinding to slice=%d index=%d\n", $this->currentFile, $this->currentIndex);
$this->checkLoadedSlice();
}
2022-10-31 12:01:29 +00:00
public function key()
2022-09-03 14:12:22 +00:00
{
//$this->checkLoadedSlice();
return $this->counter;
}
2022-10-31 12:01:29 +00:00
public function current()
2022-09-03 14:12:22 +00:00
{
//$this->checkLoadedSlice();
return $this->data[$this->currentIndex];
}
public function next(): void
{
$this->counter++;
$this->currentIndex++;
if ($this->currentIndex >= count($this->data)) {
$this->currentFile++;
$this->currentIndex = 0;
//printf("Rolling over to slice=%d index=%d counter=%d\n", $this->currentFile, $this->currentIndex, $this->counter);
}
//$this->checkLoadedSlice();
}
public function valid(): bool
{
$this->checkLoadedSlice();
return ($this->currentFile < count($this->files) && ($this->currentIndex < count($this->data)));
2022-09-03 12:39:08 +00:00
}
2022-10-31 12:01:29 +00:00
}