Source code for fairmd.lipids.schema_validation.validate_yaml

"""
Tools for validating YAML files against predefined JSON Schemas.

This module can be run either as a module or as a script.

Module usage:
-------------
.. code-block:: bash

    python -m fairmd.lipids.schema_validation.validate_yaml --schema readme README.yaml

Script usage:
-------------
.. code-block:: bash

    # Validate a README.yaml file
    python validate_yaml.py --schema readme README.yaml
    python validate_yaml.py --schema info info.yml
    # Multiple files can be validated at once
    python validate_yaml.py README.yaml other/README.yaml

"""

import argparse
import datetime
import json
import logging
import os
import sys
from typing import Literal

import yaml
from jsonschema import Draft7Validator, FormatChecker, SchemaError, ValidationError

logger = logging.getLogger(__name__)

default_info_schema_path = os.path.join(os.path.dirname(__file__), "schema", "info_yml_schema.json")
default_readme_yaml_schema_path = os.path.join(os.path.dirname(__file__), "schema", "readme_yaml_schema.json")


schema_type_options = Literal["info", "readme"]


def _filter_yaml_date_string_type_errors(errors: list[ValidationError]) -> list[ValidationError]:
    """
    PyYAML may parse unquoted ISO dates (e.g. 2021-02-23) into datetime.date.
    JSON Schema expects a string (often with format: date).
    This filters out only that specific type mismatch.
    """
    out: list[ValidationError] = []
    for e in errors:
        if e.validator == "type" and "string" in e.validator_value:
            inst = getattr(e, "instance", None)
            if isinstance(inst, (datetime.date, datetime.datetime)):
                continue
        out.append(e)
    return out


[docs] def validate_info_dict(instance: dict, schema_path: str = default_info_schema_path): """ Validate an info dict against a schema dict. Returns a list of jsonschema.ValidationError objects which is empty with valid. """ with open(schema_path, encoding="utf-8") as f: schema = json.load(f) validator = Draft7Validator(schema, format_checker=FormatChecker()) errors = list(validator.iter_errors(instance)) return _filter_yaml_date_string_type_errors(errors)
[docs] def validate_info_file(info_file_path: str, schema_path: str = default_info_schema_path): """ Validate an info file (YML/YAML) on disk against a JSON schema file. Returns a list of ValidationError objects (empty if valid). """ with open(info_file_path, encoding="utf-8") as f: instance = yaml.safe_load(f) if not isinstance(instance, dict) or not instance: raise ValueError("YAML did not contain a non-empty mapping") return validate_info_dict(instance, schema_path)
[docs] def validate_readme_dict(instance: dict, schema_path: str = default_readme_yaml_schema_path): """ Validate a README.yaml dict against the README JSON schema. Returns a list of jsonschema.ValidationError objects (empty if valid). """ with open(schema_path, encoding="utf-8") as f: schema = json.load(f) validator = Draft7Validator(schema, format_checker=FormatChecker()) errors = list(validator.iter_errors(instance)) return _filter_yaml_date_string_type_errors(errors)
[docs] def validate_readme_file(readme_file_path: str, schema_path: str = default_readme_yaml_schema_path): """ Validate a README.yaml file on disk against the README JSON schema. Returns a list of ValidationError objects (empty if valid). """ with open(readme_file_path, encoding="utf-8") as f: instance = yaml.safe_load(f) if not isinstance(instance, dict) or not instance: raise ValueError("YAML did not contain a non-empty mapping") return validate_readme_dict(instance, schema_path)
[docs] def run_file(path: str, schema_type: schema_type_options) -> None: """ Validate a single YAML file against the selected schema. On success, logs "OK: <path>" and returns normally. On failure, logs detailed schema errors and raises an exception. Raises: FileNotFoundError: If the file does not exist or is not a regular file. RuntimeError: If the file was successfully read but failed schema validation. ValueError: If the YAML does not contain a non-empty mapping (invalid structure). OSError, yaml.YAMLError, json.JSONDecodeError, SchemaError: For I/O errors, invalid YAML, invalid JSON schema, or other runtime failures. """ if not os.path.isfile(path): raise FileNotFoundError(path) if schema_type == "info": errors = validate_info_file(path) else: errors = validate_readme_file(path) if not errors: logger.info("OK: %s", path) return logger.error("INVALID: %s", path) for err in errors: keys = ".".join(str(p) for p in err.path) if err.path else "<root>" logger.error(" -> %s (at %s)", err.message, keys) raise RuntimeError("Schema validation failed")
[docs] def main() -> int: """ Command-line entry point for YAML schema validation. Validates one or more YAML files against either the FAIRMD info.yml schema or the README.yaml schema. :returns: Process exit code: 0 = all files valid 1 = at least one file failed schema validation 2 = at least one file was missing or not a regular file 3 = at least one file could not be read, parsed, or validated due to YAML, JSON, or runtime errors """ logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") parser = argparse.ArgumentParser(description="Validate YAML files against FAIRMD info / README schemas.") parser.add_argument( "--schema", "-s", choices=["info", "readme"], default="readme", help="Schema to use: 'readme' (default) or 'info'.", ) parser.add_argument( "files", nargs="+", help="YAML files to validate.", ) args = parser.parse_args() exit_code = 0 for f in args.files: path = os.path.normpath(f) try: run_file(path, args.schema) except RuntimeError: exit_code = max(exit_code, 1) except FileNotFoundError: logger.error("File not found (or not a file): %s", path) exit_code = max(exit_code, 2) except (ValueError, OSError, yaml.YAMLError, json.JSONDecodeError, SchemaError) as e: logger.error("ERROR: %s", path) logger.error(" -> %s: %s", type(e).__name__, e) exit_code = max(exit_code, 3) return exit_code
if __name__ == "__main__": sys.exit(main())