3 Commits

Author SHA1 Message Date
d0956f851c Initial unit tests, code cleanup 2022-10-31 00:42:29 +01:00
abde48640f updated readme 2022-10-30 23:24:27 +01:00
f1ab6bd74b Updated readme, bins 2022-10-30 23:21:53 +01:00
15 changed files with 217 additions and 34 deletions

3
.gitignore vendored
View File

@ -1,3 +1,4 @@
/vendor
/composer.lock
/*.zip
/.phpunit.cache

View File

@ -1,22 +1,47 @@
# Dataset Library for PHP/Composer
This is a library for loading datasets from bundled packages.
This is a library for loading datasets from bundled packages. The idea isn't to
use the classes in this library as a generic datasource, although you could
probably pull that off. Instead, use the datasets to import the relevant data to
your database of choice, and optionally keep track of the version numbers so
new data can be imported automatically when the dependencies have been updated
and a new version has been installed.
## Installing
To install dataset, require it using composer:
```shell
$ composer require noccylabs/dataset
```
You also need some actual datasets. Some interesting ones could be:
Package | Description
---|---
[noccylabs/dataset-postal](https://dev.noccylabs.info/noccy/dataset-postal) | Patterns and info for postal (zip) code validation
[noccylabs/dataset-calendar](https://dev.noccylabs.info/noccy/dataset-calendar) | Bank holidays
[noccylabs/dataset-iso3166](https://dev.noccylabs.info/noccy/dataset-iso3166) | ISO 3166 country codes and namess
## Example
Require Dataset, and some actual datasets:
$ composer require noccylabs/dataset juicebox/flavordata
You can now open the desired sets and do useful stuff with the data:
```php
use NoccyLabs\Dataset\DatasetManager;
$dm = new DatasetManager();
$ds = $dm->getDataset("juicebox/flavordata#flavors");
foreach ($ds as $row) {
// Call on getDataset() if you want access to the metadata,
// Replace with openDataset() to quicly call getDataset()->open()
$ds = $dm->getDataset("noccylabs/dataset-iso3166#countries");
// This is how you get the metadata
echo "Dataset ID: ".$ds->getIdentifier(); // noccylabs/dataset-iso3166#countries
echo "Dataset version: ".$ds->getVersion(); // 2022.10.1
// Get a reader by calling open()
$reader = $ds->open();
foreach ($reader as $row) {
// row is an array
}
```

View File

@ -2,8 +2,9 @@
<?php
foreach ([
__DIR__."/..",
__DIR__."/../vendor",
__DIR__."/../../../vendor",
__DIR__."/../../..",
__DIR__."/vendor",
] as $dir) {
if (file_exists($dir."/autoload.php")) {
@ -25,5 +26,5 @@ foreach ($datasets as $dataset) {
if (!$headers) $headers = array_keys($row);
$rows++;
}
echo " ".$rows." rows\n - ".join("\n - ",$headers)."\n";
echo " # ".$rows." rows\n - ".join("\n - ",$headers)."\n";
}

View File

@ -14,6 +14,9 @@
"email": "cvagnetoft@gmail.com"
}
],
"require": {},
"bin": [ "bin/dataset-info" ]
"bin": [ "bin/dataset-info" ],
"require-dev": {
"phpunit/phpunit": "^9.5",
"phpstan/phpstan": "^1.8"
}
}

13
phpstan.neon Normal file
View File

@ -0,0 +1,13 @@
parameters:
level: 5
excludePaths:
- doc
- vendor
- var
- tests
# Paths to include in the analysis
paths:
- src

27
phpunit.xml Normal file
View File

@ -0,0 +1,27 @@
<?xml version="1.0" encoding="UTF-8"?>
<phpunit xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:noNamespaceSchemaLocation="https://schema.phpunit.de/9.5/phpunit.xsd"
bootstrap="vendor/autoload.php"
cacheResultFile=".phpunit.cache/test-results"
executionOrder="depends,defects"
forceCoversAnnotation="true"
beStrictAboutCoversAnnotation="true"
beStrictAboutOutputDuringTests="true"
beStrictAboutTodoAnnotatedTests="true"
convertDeprecationsToExceptions="true"
failOnRisky="true"
failOnWarning="true"
verbose="true">
<testsuites>
<testsuite name="default">
<directory>tests</directory>
</testsuite>
</testsuites>
<coverage cacheDirectory=".phpunit.cache/code-coverage"
processUncoveredFiles="true">
<include>
<directory suffix=".php">src</directory>
</include>
</coverage>
</phpunit>

View File

@ -7,6 +7,10 @@ use NoccyLabs\Dataset\Readers\JsonReader;
class Dataset
{
protected string $packageName;
protected string $datasetName;
protected string $identifier;
protected array $options;
@ -18,6 +22,8 @@ class Dataset
$this->identifier = $identifier;
$this->options = $options;
$this->version = $version;
[$this->packageName, $this->datasetName] = explode("#", $identifier, 2);
}
public function getIdentifier(): string
@ -25,11 +31,31 @@ class Dataset
return $this->identifier;
}
public function getPackageName(): string
{
return $this->packageName;
}
public function getDatasetName(): string
{
return $this->datasetName;
}
public function getVersion(): ?string
{
return $this->version;
}
public function getComment(): ?string
{
return array_key_exists('comment', $this->options) ? $this->options['comment'] : null;
}
public function getLicense(): ?string
{
return array_key_exists('license', $this->options) ? $this->options['license'] : null;
}
public function open(): ReaderInterface
{
$filename = $this->options['filename'];

View File

@ -22,16 +22,46 @@ class DatasetManager
}
}
/**
* Return all the available datasets
*
* @return Array<Dataset> The available datasets
*/
public function getAvailableDatasets(): array
{
return self::$datasets;
}
/**
* Directly return a reader for a specific dataset.
*
* @param string $identifier The dataset identifier
* @return ReaderInterface A reader for the data
* @throws InvalidDatasetException if the dataset can not be opened
* @throws UnknownDatasetException if the dataset does not exist
*/
public function openDataset(string $identifier): ReaderInterface
{
return $this->getDataset($identifier)->open();
}
/**
* Return a Dataset object containing metadata and methods to retrieve
* a reader for the data in the set.
*
* @param string $identifier The dataset identifier
* @throws UnknownDatasetException if the dataset does not exist
*/
public function getDataset(string $identifier): Dataset
{
if (!array_key_exists($identifier, self::$datasets)) {
throw UnknownDatasetException::DatasetNotFound();
}
return self::$datasets[$identifier];
}
/**
* Find the vendor directory and try to locate all bundled datasets
*
@ -67,8 +97,8 @@ class DatasetManager
*
*
*
* @param string The package name (org/package)
* @param string The full path to the package (..../vendor/org/package)
* @param string $package The package name (org/package)
* @param string $path The full path to the package (..../vendor/org/package)
*/
private function scanPackageDatasets(string $package, string $path)
{
@ -84,10 +114,15 @@ class DatasetManager
}
$this->loadDatasets($json['datasets'], null, $package, $path);
//printf("found %d sets in %s\n", count($json['datasets']), $package);
}
/**
*
*
*
*
*/
private function loadDatasets(array $datasets, ?string $prefix, string $package, string $path)
{
foreach ($datasets as $name=>$options) {
@ -103,14 +138,21 @@ class DatasetManager
}
}
/**
*
*
*
*/
private function determineVendorPath(): ?string
{
if (defined("COMPOSER_VENDOR_PATH")) {
return COMPOSER_VENDOR_PATH;
$d = defined("NOCCYLABS_DATASET_TEST") ? (dirname(__DIR__)."/tests") : __DIR__;
while ($d != dirname($d)) {
if (file_exists($d."/autoload.php")) break;
$d = dirname($d);
}
if (file_exists(__DIR__."/../../../autoload.php")) {
// we are installed as a composer package
return dirname(__DIR__, 3);
if (file_exists($d."/autoload.php")) {
return $d;
}
return null;
}
@ -127,17 +169,4 @@ class DatasetManager
self::$datasets[$id] = $dataset;
}
/**
*
*
* @throws InvalidDatasetException if the dataset can not be opened
* @throws UnknownDatasetExcception if the dataset does not exist
*/
public function getDataset(string $identifier): Dataset
{
return self::$datasets[$identifier];
}
}

View File

@ -0,0 +1,8 @@
<?php
namespace NoccyLabs\Dataset;
use RuntimeException;
class InvalidDatasetException extends RuntimeException
{}

View File

@ -8,6 +8,8 @@ class CsvReader implements ReaderInterface
{
private array $files = [];
private array $options = [];
private int $currentFile = 0;
private ?int $loadedFile = null;
@ -21,6 +23,7 @@ class CsvReader implements ReaderInterface
public function __construct(string $filename, array $options)
{
$this->files = glob($filename);
$this->options = $options;
}
private function checkLoadedSlice()

View File

@ -8,6 +8,8 @@ class JsonReader implements ReaderInterface
{
private array $files = [];
private array $options = [];
private int $currentFile = 0;
private ?int $loadedFile = null;
@ -21,6 +23,7 @@ class JsonReader implements ReaderInterface
public function __construct(string $filename, array $options)
{
$this->files = glob($filename);
$this->options = $options;
}
private function checkLoadedSlice()

View File

@ -0,0 +1,13 @@
<?php
namespace NoccyLabs\Dataset;
use RuntimeException;
class UnknownDatasetException extends RuntimeException
{
public static function DatasetNotFound(): self
{
return new UnknownDatasetException("The requested dataset is not available");
}
}

3
tests/autoload.php Normal file
View File

@ -0,0 +1,3 @@
<?php
// This is not an autoloader, it is used by the tests

View File

@ -0,0 +1,7 @@
{
"datasets": {
"baz": {
"filename": "data/test.csv"
}
}
}

View File

@ -0,0 +1,21 @@
<?php
namespace NoccyLabs\Dataset;
defined("NOCCYLABS_DATASET_TEST") || define("NOCCYLABS_DATASET_TEST", true);
class DatasetManagerTest extends \PHPUnit\Framework\TestCase
{
/**
* @covers DatasetManager::__construct
* @covers DatasetManager::determineVendorPath
*/
public function testCreatingTheDatasetManager()
{
$dm = new DatasetManager();
$sets = $dm->getAvailableDatasets();
$this->assertEquals(1, count($sets), "Expected 1 loaded set");
}
}