tests/schemas/todo.yml

#
# JBZoo Toolbox - Csv-Blueprint.
#
# This file is part of the JBZoo Toolbox project.
# For the full copyright and license information, please view the LICENSE
# file that was distributed with this source code.
#
# @license    MIT
# @copyright  Copyright (C) JBZoo.com, All rights reserved.
# @see        https://github.com/JBZoo/Csv-Blueprint
#

# File contains just ideas. It's invalid!


csv: # How to parse file before validation
  auto_detect: false                  # If true, then the control chars will be detected automatically.
  empty_values: # List of values that will be treated as empty
    - ""                              # By default, only empty string is treated as empty (string length = 0).
    - null
    - none
    - empty
    - nil

structural_rules:
  file_ends_with_newline: false       # If true, then the file must end with a newline character (\n).
  allow_blank_lines: true             # If true, then the file can contain blank lines.
  columns_count_min: ~                # Minimum number of columns in the file. By default, it is equal to the number of columns in the schema.
  columns_count: ~                    # Exact number of columns in the file. By default, it is equal to the number of columns in the schema.
  columns_count_max: ~                # Minimum number of columns in the file. By default, it is equal to the number of columns in the schema.
  ignore_duplicate_rows: false        # If true, then duplicate rows will be ignored. Duplicate rows are rows that have the same values in all columns - 100% match.
  allow_duplicate_column_names: false # Allow duplicate rows in the CSV file for different columns.


columns:
  - empty_values: [ '' ]                # Override csv.empty_values. List of values that will be treated as empty.

    # Multi prop
    multiple: true
    multiple_separator: "|"           # Separator for multiple values
    faker: [ faker_method arg1 arg2 ] # Faker method with arguments to generate random CSV data

    rules:
      is_null: true                   # see csv.empty_values and column.empty_values
      _list: true                     # Example: starts_with_list: [ 'a', 'b', 'c' ]

      # File system
      is_filename: true
      is_dirname: true
      is_realtive_path: true
      is_scientific_notation: true

      is_positive: true
      is_positive_zero: true
      is_negative: true
      is_negative_zero: true
      soft_precision: "1.1 == 1.10"
      # https://stackoverflow.com/questions/69107743/regular-expression-for-wkt-polygon: true
      is_unicode: true
      is_ascii: true
      is_latin: true
      coordinates: "1.0, -2.0"

      # identifier
      is_bsn: true                    # Validates a Dutch citizen service number (BSN).
      is_cnh: true                    # Validates a Brazilian national health card number (CNH).
      is_cnpj: true                   # Validates a Brazilian company identifier (CNPJ).
      is_cpf: true                    # Validates a Brazilian individual taxpayer identifier (CPF).
      is_nfe_access_key: true         # Validates a Brazilian Nota Fiscal Eletronica (NFe) access key.
      is_pis: true                    # Validates a Brazilian individual social security number (PIS).
      is_hetu: true                   # Validates a Finnish personal identity code (HETU).
      is_nip: true                    # Validates a Polish taxpayer identification number (NIP).
      is_pesel: true                  # Validates a Polish national identification number (PESEL).
      is_polish_id_card: true         # Validates a Polish identity card number.
      is_portuguese_nif: true         # Validates a Portuguese taxpayer number (NIF).

      # Logical OR for group of rules. If one of the rules is true, then the column is valid.
      group_or:
        - not_empty: true
          is_int: true
        - length_min: 3

      # Custom functions for validation
      custom_func_1: 'static fn (string $cellValue): bool => $cellValue !== "";' # eval????
      custom_func_2: '\My\Custom\Class::myMethod'
      custom_func_3: 'myFunction'

      # Combination of rules  to make it easier to read
      custom_some_rule:
        - is_int: true
        - length_min: 3

    aggregate_rules:
      # https://github.com/markrogoyski/math-php#statistics---averages
      truncated_mean: [ 1, 25 ]         # 25 percent of observations trimmed from each end of distribution
      generalized_mean: [ 1, 2 ]        # p-power mean
      power_mean: [ 1, 2 ]              # p-power mean
      lehmer_mean: [ 1, 3 ]             # p-power mean
      simple_moving_average: [ 1, n ]   # SMA
      cumulative_moving_average: 1      # CMA
      exponential_moving_average: 1     # EPA

      # Logical OR for group of rules. If one of the rules is true, then the column is valid.
      group_or:
        - is_unique: true
          sorted: [ asc, natural ]
        - sum_min: 1.0
          sum_greater: 2.0

      # Custom function for validation
      custom_func_1: 'static fn (array $cellValue): bool => $cellValue !== [];' # eval????
      custom_func_2: 'My\Custom\Class::myMethod'
      custom_func_3: 'myFunction'

complex_rules:
  - sum_by_group:
      group_column: City
      sum_column: population
      sums:
        - [ New York, 10000 ]
        - [ Los Angeles, 20000 ]

  - count_by_group:
      group_column: City
      value: New York
      count: 10

  - handler: 'static fn (string $colum0, string $column1): bool => $colum0 === $colum1;' # eval????
    handler_args:
      - column:0
      - column:1

  - handler: '\My\Complex\Rule::myMethod'
    handler_args:
      - column:0
      - column:1

  - handler: 'myFunction'
    handler_args:
      - column:0
      - column:1

    # Logical OR for group of rules. If one of the rules is true, then the column is valid.
  - group_or:
      - count_by_group:
          group_column: City
          value: New York
          count: 10
      - sum_by_group:
          group_column: City
          sum_column: population
          sums:
            - [ New York, 10000 ]
            - [ Los Angeles, 20000 ]

analyser:
  rules:
    other:
      - language_code:
          - alpha-2
          - alpha-3
      - credit_card:
          - all
          - by_brand