File Coverage

File:lib/Schema/Validator.pm
Coverage:97.5%

linestmtbrancondsubtimecode
1package Schema::Validator;
2
3# ---------------------------------------------------------------------------
4# Schema::Validator -- ISO 8601 datetime validation and Schema.org vocabulary
5# loading.  Purely functional; all symbols are opt-in via import list.
6# ---------------------------------------------------------------------------
7
8
7
7
7
768237
5
91
use strict;
9
7
7
7
9
5
131
use warnings;
10
7
7
7
1288
40561
13
use autodie qw(:all);
11
12
7
7
7
52565
7
194
use Carp qw(carp croak);
13
7
7
7
1780
1735673
206
use DateTime::Format::ISO8601;
14
7
7
7
334
4372
188
use Encode qw(decode encode);
15
7
7
7
14
5
75
use File::Spec;
16
7
7
7
16
8
173
use JSON::MaybeXS qw(decode_json);
17
7
7
7
2322
140729
153
use LWP::UserAgent;
18
7
7
7
204
4455
182
use Params::Get qw(get_params);
19
7
7
7
302
13991
138
use Params::Validate::Strict qw(validate_strict);
20
7
7
7
13
7
117
use Readonly;
21
7
7
7
22
4
96
use Scalar::Util qw(reftype);
22
23
7
7
7
10
6
5522
use base 'Exporter';
24
25# Only these two symbols may be imported by callers via 'use ... qw(...)'.
26our @EXPORT_OK = qw(is_valid_datetime load_dynamic_vocabulary);
27
28our $VERSION = '0.03';
29
30# ---------------------------------------------------------------------------
31# Package globals: both are populated as a side-effect of
32# load_dynamic_vocabulary().  Callers may read them after that call.
33# ---------------------------------------------------------------------------
34
35# rdfs:Class items from the Schema.org JSON-LD graph, keyed by class label
36our %dynamic_schema;
37
38# rdf:Property items from the Schema.org JSON-LD graph, keyed by property label
39our %dynamic_properties;
40
41# ===========================================================================
42# CONSTANTS
43# ===========================================================================
44# All magic strings and numbers are confined here; nothing below uses bare
45# literals.  Every constant mirrors a key in %config so runtime overrides
46# are possible without re-opening the Readonly namespace.
47# ---------------------------------------------------------------------------
48
49# Default cache directory: $CACHEDIR env var if set, otherwise the system
50# temporary directory.  Evaluated once at module load time.
51Readonly::Scalar my $DEFAULT_CACHE_DIR =>
52        (defined $ENV{CACHEDIR} && length $ENV{CACHEDIR})
53                ? $ENV{CACHEDIR}
54                : File::Spec->tmpdir();
55
56# Default cache filename -- stored in $DEFAULT_CACHE_DIR, never in CWD.
57Readonly::Scalar my $DEFAULT_CACHE_FILE =>
58        File::Spec->catfile($DEFAULT_CACHE_DIR, 'schemaorg_dynamic_vocabulary.jsonld');
59
60# 86400 == 60 * 60 * 24: cache is considered fresh for one full day.
61Readonly::Scalar my $DEFAULT_CACHE_DURATION => 86_400;
62
63# Canonical URL for the Schema.org full vocabulary in JSON-LD format.
64Readonly::Scalar my $DEFAULT_VOCAB_URL => 'https://schema.org/version/latest/schemaorg-current-https.jsonld';
65
66# HTTP timeout for the vocabulary download request, in seconds.
67Readonly::Scalar my $DEFAULT_UA_TIMEOUT => 30;
68
69# JSON-LD structural keys and RDF type labels used when traversing @graph.
70Readonly::Scalar my $AT_GRAPH        => '@graph';
71Readonly::Scalar my $RDF_CLASS       => 'rdfs:Class';
72Readonly::Scalar my $RDF_PROPERTY    => 'rdf:Property';
73Readonly::Scalar my $RDFS_LABEL      => 'rdfs:label';
74Readonly::Scalar my $RDFS_LABEL_FULL => 'http://www.w3.org/2000/01/rdf-schema#label';
75
76# ===========================================================================
77# CONFIGURATION
78# ===========================================================================
79# Callers may override any key before calling an exported function, or inject
80# a full replacement via Object::Configure->configure('Schema::Validator', \%h).
81# ---------------------------------------------------------------------------
82our %config = (
83        cache_file     => $DEFAULT_CACHE_FILE,
84        cache_duration => $DEFAULT_CACHE_DURATION,
85        vocab_url      => $DEFAULT_VOCAB_URL,
86        ua_timeout     => $DEFAULT_UA_TIMEOUT,
87);
88
89# ===========================================================================
90# PUBLIC INTERFACE (POD + code)
91# ===========================================================================
92
93 - 248
=head1 NAME

Schema::Validator - Tools for validating and loading Schema.org vocabulary definitions

=head1 VERSION

Version 0.03

=head1 SYNOPSIS

    use Schema::Validator qw(is_valid_datetime load_dynamic_vocabulary);

    # Validate a date or datetime string
    if (is_valid_datetime('2024-11-14')) {
        print "Valid date\n";
    }

    # Load and query the Schema.org vocabulary
    my $classes = load_dynamic_vocabulary();
    if (exists $classes->{'Person'}) {
        print "Person class is defined\n";
    }

    # Override a config value for a single call
    my $classes = load_dynamic_vocabulary(ua_timeout => 60);

=head1 DESCRIPTION

C<Schema::Validator> provides two utilities for working with Schema.org
structured data:

=over 4

=item * L</is_valid_datetime> -- validates a string against the ISO 8601
date/datetime subset used by Schema.org.

=item * L</load_dynamic_vocabulary> -- downloads (and caches for 24 hours)
the full Schema.org JSON-LD vocabulary and exposes all class and property
definitions as a hashref and via package globals.

=back

=head2 Configuration

Runtime behaviour is controlled by the package-level C<%Schema::Validator::config>
hash.  Supported keys and their defaults:

    cache_file     => "$CACHEDIR/schemaorg_dynamic_vocabulary.jsonld"  # or tmpdir
    cache_duration => 86400                                          # seconds
    vocab_url      => 'https://schema.org/.../schemaorg-current-https.jsonld'
    ua_timeout     => 30                                             # seconds

Override any key before calling an exported function:

    $Schema::Validator::config{ua_timeout} = 60;

Or supply a complete replacement via L<Object::Configure>:

    Object::Configure->configure('Schema::Validator', \%my_config);

=head1 PACKAGE VARIABLES

=head2 %dynamic_schema

Package hash keyed by Schema.org class label (e.g. C<Person>, C<Event>).
Values are the raw item hashrefs from the JSON-LD C<@graph> array.
Populated as a side-effect of L</load_dynamic_vocabulary>.

=head2 %dynamic_properties

Package hash keyed by Schema.org property label (e.g. C<name>, C<startDate>).
Values are the raw item hashrefs from the JSON-LD C<@graph> array.
Populated as a side-effect of L</load_dynamic_vocabulary>.

=head1 FUNCTIONS

=head2 is_valid_datetime

=head3 PURPOSE

Tests whether a scalar string conforms to one of the ISO 8601
date or datetime formats accepted by Schema.org:

    YYYY-MM-DD               (date only)
    YYYY-MM-DDTHH:MM         (T separator, no seconds)
    YYYY-MM-DD HH:MM         (space separator, no seconds)
    YYYY-MM-DDTHH:MM:SS      (T separator, with seconds)
    YYYY-MM-DD HH:MM:SS      (space separator, with seconds)

Optional timezone designators (C<Z>, C<+HH:MM>, C<-HH:MM>) are B<accepted>.
Calendar sanity B<is> enforced: out-of-range values (e.g. month 99) are B<rejected>.

=head3 ARGUMENTS

=over 4

=item * C<string> (required, scalar) -- the candidate string to test.
Both positional (C<is_valid_datetime('2024-11-14')>) and named
(C<is_valid_datetime(string =E<gt> '2024-11-14')>) calling conventions
are accepted.

=back

=head3 RETURNS

C<1> if the string is in a supported format; C<0> otherwise.
Returns C<0> for C<undef> or an empty string without throwing.

=head3 SIDE EFFECTS

None.

=head3 NOTES

Delegates to C<DateTime::Format::ISO8601->parse_datetime()> for semantic
validation, so out-of-range values (e.g. month 99) are rejected.
The space-separator variant (C<YYYY-MM-DD HH:MM>) is normalised to a T
separator before parsing since the module requires strict ISO 8601.
Timezone designators (C<Z>, C<+HH:MM>, C<-HH:MM>) are now accepted.

=head3 EXAMPLE

    use Schema::Validator qw(is_valid_datetime);

    is_valid_datetime('2024-11-14');                 # 1
    is_valid_datetime('2024-11-14T15:30:00');        # 1
    is_valid_datetime('2024-11-14 15:30');           # 1  (space sep normalised)
    is_valid_datetime('2024-11-14T15:30:00Z');       # 1  (UTC timezone)
    is_valid_datetime('2024-11-14T15:30:00+01:00');  # 1  (offset timezone)
    is_valid_datetime('2024-99-01');                 # 0  (invalid month)
    is_valid_datetime('28/06/2025');                 # 0
    is_valid_datetime(undef);                        # 0  (no exception)
    is_valid_datetime('');                           # 0  (no exception)

    # Named calling convention
    is_valid_datetime(string => '2024-11-14');       # 1

=head3 API SPECIFICATION

=head4 Input (Params::Validate::Strict)

    {
        string => {
            type     => 'string',
            optional => 0,
        },
    }

=head4 Output (Return::Set)

    {
        type => 'boolean'
        description  => '1 (valid) or 0 (invalid, undef, or empty input)'
    }

=cut
249
250sub is_valid_datetime {
251        # Accept both positional (is_valid_datetime($s)) and named
252        # (is_valid_datetime(string => $s)) calling conventions.
253        # Validate: value must be a scalar or undef (undef returns 0 cleanly below).
254
113
633022
        my $p = validate_strict(
255                input => get_params('string', \@_),
256                schema   => { 'string' => { type => 'string', optional => 0 } },
257        );
258
259
107
8366
        my $string = $p->{string};
260
261        # Treat undef or empty string as invalid without throwing.
262
107
234
        return 0 unless defined $string && length $string;
263
264        # Normalise the space-separator variant to T before handing off to the
265        # module, which requires strict ISO 8601 (T separator only).
266
90
183
        (my $normalised = $string) =~ s/^(\d{4}-\d{2}-\d{2}) (?=\d{2}:)/$1T/;
267
268        # Delegate to DateTime::Format::ISO8601 for full semantic validation;
269        # a truthy (DateTime) object means valid, undef/$@ means invalid.
270
90
90
75
140
        return eval { DateTime::Format::ISO8601->parse_datetime($normalised) } ? 1 : 0;
271}
272
273# ===========================================================================
274
275 - 381
=head2 load_dynamic_vocabulary

=head3 PURPOSE

Downloads the complete Schema.org vocabulary from the official JSON-LD
endpoint, parses it into class and property lookup tables, caches the raw
JSON-LD locally, and returns the class table as a hashref.

The cache is considered fresh for C<cache_duration> seconds (default 24 hours).
On network failure the function falls back to a stale cache rather than
returning an empty result, and emits a C<carp> warning.

=head3 ARGUMENTS

All arguments are optional; defaults come from C<%Schema::Validator::config>.

=over 4

=item * C<cache_file> (optional, scalar) -- path to the local cache file.
Defaults to C<$config{cache_file}>: C<$CACHEDIR/schemaorg_dynamic_vocabulary.jsonld>
if C<$ENV{CACHEDIR}> is set, otherwise C<File::Spec-E<gt>tmpdir()> is used.

=item * C<cache_duration> (optional, scalar) -- cache validity window in seconds.
Defaults to C<$config{cache_duration}>.

=item * C<vocab_url> (optional, scalar) -- URL of the JSON-LD vocabulary endpoint.
Defaults to C<$config{vocab_url}>.

=item * C<ua_timeout> (optional, scalar) -- LWP::UserAgent timeout in seconds.
Defaults to C<$config{ua_timeout}>.

=back

Both zero-argument and named calling conventions are supported:

    load_dynamic_vocabulary();
    load_dynamic_vocabulary(ua_timeout => 60);

=head3 RETURNS

A hashref mapping class labels (e.g. C<'Person'>) to their raw JSON-LD
definition hashrefs from the C<@graph> array.

Returns an empty hashref C<{}> on all failure paths (network unreachable,
no cache, JSON parse error).  Never throws.

=head3 SIDE EFFECTS

=over 4

=item * Populates C<%Schema::Validator::dynamic_schema> with class definitions.

=item * Populates C<%Schema::Validator::dynamic_properties> with property definitions.

=item * Creates or updates the local cache file on a successful download.

=item * Emits C<carp> warnings on network failures, I/O errors, or JSON
parse errors.

=back

=head3 NOTES

The default cache directory is determined once at module load time: the
C<$CACHEDIR> environment variable is used if set; otherwise C<File::Spec-E<gt>tmpdir()>
is used (typically C</tmp> on Unix).  Override for the session with:

    $Schema::Validator::config{cache_file} = '/my/path/vocab.jsonld';

The C<bin/validate-schema> CLI tool imports this function from the module and
uses C<cache_file =E<gt> $path> to store its cache under C<~/.cache/schema_validator/>.

=head3 EXAMPLE

    use Schema::Validator qw(load_dynamic_vocabulary);

    my $classes = load_dynamic_vocabulary();
    printf "%d classes loaded\n", scalar keys %{$classes};

    # Check for a specific class in the returned hashref
    print "Has Person\n" if exists $classes->{'Person'};

    # Or query the package globals directly after the call
    Schema::Validator::load_dynamic_vocabulary();
    my @names = sort keys %Schema::Validator::dynamic_schema;

=head3 API SPECIFICATION

=head4 Input (Params::Validate::Strict)

    {
        cache_file     => { type => 'string', optional => 1 },
        cache_duration => { type => 'string', optional => 1 },
        vocab_url      => { type => 'string', optional => 1 },
        ua_timeout     => { type => 'string', optional => 1 },
    }

=head4 Output (Return::Set)

    {
        type  => 'hashref',
        description  => 'class-label => JSON-LD item hashref'
        # ON_FAILURE   => 'empty hashref {}; never throws'
        # SIDE_EFFECTS => 'populates %dynamic_schema and %dynamic_properties'
    }

=cut
382
383sub load_dynamic_vocabulary {
384
104
279910
        my $params;
385
386        # Validate types of any supplied overrides (all are optional scalars).
387
104
138
        if(scalar(@_)) {
388
103
172
                $params = validate_strict(
389                        input => get_params(undef, \@_),
390                        schema   => {
391                                cache_file     => { type => 'string', optional => 1 },
392                                cache_duration => { type => 'integer', optional => 1 },
393                                vocab_url      => { type => 'string', optional => 1 },
394                                ua_timeout     => { type => 'integer', optional => 1 },
395                        }
396                );
397        }
398
399        # Merge caller overrides with module-level configuration defaults.
400
102
13253
        my $cache_file     = $params->{cache_file}     // $config{cache_file};
401
102
130
        my $cache_duration = $params->{cache_duration} // $config{cache_duration};
402
102
190
        my $vocab_url      = $params->{vocab_url}      // $config{vocab_url};
403
102
162
        my $ua_timeout     = $params->{ua_timeout}     // $config{ua_timeout};
404
405
102
58
        my $content;
406
407        # Attempt to read a fresh cache file.  Open directly to avoid the TOCTOU
408        # race that would exist between a separate -e test and the open call.
409
102
929
        if (-e $cache_file && (time - (stat($cache_file))[9] < $cache_duration)) {
410
33
33
36
52
                eval { $content = _slurp_file($cache_file) };
411
33
850
                carp "Could not read cache '$cache_file': $@" if $@;
412        }
413
414        # If no usable content yet, try to download the vocabulary.
415
102
314
        unless (defined $content) {
416
71
76
                $content = _fetch_url($vocab_url, $ua_timeout);
417
418
71
222
                if (defined $content) {
419                        # Persist the download to the cache (best-effort; warn, do not die).
420
53
53
40
50
                        eval { _spit_file($cache_file, $content) };
421
53
860
                        carp "Could not write cache '$cache_file': $@" if $@;
422                } else {
423                        # Network failed; fall back to a stale cache if one exists.
424
18
64
                        if (-e $cache_file) {
425
8
8
6
11
                                eval { $content = _slurp_file($cache_file) };
426
8
1045
                                if ($@) {
427
3
31
                                        carp "Could not read stale cache '$cache_file': $@";
428                                } else {
429
5
32
                                        carp "Network unavailable; using stale cache '$cache_file'";
430                                }
431                        }
432                }
433        }
434
435        # All content-acquisition strategies failed; return empty result.
436
102
985
        unless (defined $content) {
437
13
121
                carp 'load_dynamic_vocabulary: no vocabulary content available';
438
13
1450
                return {};
439        }
440
441        # Parse the JSON; treat errors as non-fatal warnings.
442
89
89
60
933
        my $data = eval { decode_json($content) };
443
89
2226
        if ($@) {
444
4
45
                carp "Failed to parse vocabulary JSON: $@";
445
4
572
                return {};
446        }
447
448        # Guard against decode_json returning a non-object (e.g. a JSON array,
449        # a bare number, or any other non-hash type).  Calling exists on a
450        # non-hashref dies; catching it here keeps the "never throws" contract.
451
85
126
        unless (ref($data) eq 'HASH') {
452
2
25
                carp "Vocabulary JSON is not a JSON object";
453
2
208
                return {};
454        }
455
456        # Confirm the expected JSON-LD graph structure is present.
457
83
156
        unless (exists $data->{$AT_GRAPH} && ref($data->{$AT_GRAPH}) eq 'ARRAY') {
458
5
56
                carp "Vocabulary JSON is missing the '\@graph' array";
459
5
680
                return {};
460        }
461
462        # Delegate parsing to the internal graph processor.
463
78
86
        my ($classes, $props) = _parse_graph($data->{$AT_GRAPH});
464
465        # Populate package globals as documented side-effects.
466
78
78
52
486
        %dynamic_schema     = %{$classes};
467
78
78
75
93
        %dynamic_properties = %{$props};
468
469        # Report the result count via carp (informational, not an error).
470
78
859
        carp sprintf(
471                'Dynamic vocabulary loaded: %d classes, %d properties',
472                scalar(keys %dynamic_schema),
473                scalar(keys %dynamic_properties),
474        );
475
476        # Return the class hashref; callers needing properties use the global.
477
78
7452
        return $classes;
478}
479
480# ===========================================================================
481# INTERNAL HELPERS
482# All routines below begin with _ and are not part of the public API.
483# ===========================================================================
484
485# ---------------------------------------------------------------------------
486# _slurp_file($path)
487#
488# Purpose:  Read the complete contents of a file into a scalar.
489# Entry:    $path is a path to an existing, readable file.
490# Returns:  The file contents as a scalar string.
491# Side fx:  None beyond reading the file.
492# Notes:    autodie causes open/close to throw on failure; callers should
493#           wrap in eval { } and handle $@ if a non-fatal path is needed.
494# ---------------------------------------------------------------------------
495sub _slurp_file {
496
38
9111
        my ($path) = @_;
497
498        # Open the file; autodie will throw if this fails.
499
38
64
        open my $fh, '<', $path;
500
501        # Temporarily undefine $/ to read the whole file in one operation.
502
37
5073
        local $/;
503
37
335
        my $content = <$fh>;
504
505
37
55
        close $fh;
506
37
2247
        return $content;
507}
508
509# ---------------------------------------------------------------------------
510# _spit_file($path, $content)
511#
512# Purpose:  Write a scalar string to a file, creating or truncating it.
513# Entry:    $path is a writable path; $content is a defined scalar.
514# Returns:  1 on success.
515# Side fx:  Creates or overwrites $path.
516# Notes:    autodie causes open/close to throw on failure; wrap in eval
517#           when the write is non-critical (e.g. cache population).
518# ---------------------------------------------------------------------------
519sub _spit_file {
520
32
9323
        my ($path, $content) = @_;
521
522        # Open for writing; autodie throws on permission or path errors.
523
32
51
        open my $fh, '>', $path;
524
31
3985
        print $fh $content;
525
31
38
        close $fh;
526
527
31
1980
        return 1;
528}
529
530# ---------------------------------------------------------------------------
531# _fetch_url($url, $timeout)
532#
533# Purpose:  Perform an HTTP GET and return the decoded response body.
534# Entry:    $url is a valid absolute HTTP/HTTPS URL; $timeout is a positive
535#           integer (seconds).
536# Returns:  Decoded response content on success; undef on HTTP error.
537# Side fx:  Network I/O; emits carp on non-success HTTP status.
538# Notes:    Transport-level errors (DNS failure, TLS error) may propagate as
539#           exceptions from LWP::UserAgent; callers should wrap in eval if
540#           they need a guaranteed non-throwing call.
541# ---------------------------------------------------------------------------
542sub _fetch_url {
543
70
11029
        my ($url, $timeout) = @_;
544
545        # Build a minimal UA; timeout prevents indefinite hangs.
546
70
121
        my $ua  = LWP::UserAgent->new(timeout => $timeout);
547
70
163
        my $res = $ua->get($url);
548
549        # Treat any non-2xx status as a soft failure so callers can try fallbacks.
550
70
174
        unless ($res->is_success) {
551
17
53
                carp "Failed to fetch '$url': ", $res->status_line;
552
17
2293
                return;
553        }
554
555
53
124
        return $res->decoded_content;
556}
557
558# ---------------------------------------------------------------------------
559# _extract_label($item)
560#
561# Purpose:  Extract the rdfs:label string from a JSON-LD graph item hashref.
562# Entry:    $item is a hashref that may contain 'rdfs:label' or the full
563#           URI equivalent key.
564# Returns:  The label as a plain string, or undef if no label is found.
565# Side fx:  None.
566# Notes:    Schema.org JSON-LD may represent the label as a scalar string or
567#           as an array (for multi-language entries); this function always
568#           returns the first (or only) value.
569# ---------------------------------------------------------------------------
570sub _extract_label {
571
1177
7048
        my ($item) = @_;
572
573        # Try the compact key first; fall back to the full RDF URI form.
574
1177
891
        my $label = $item->{$RDFS_LABEL} // $item->{$RDFS_LABEL_FULL};
575
1177
784
        return unless defined $label;
576
577        # If the label is multi-valued, take the first entry.
578
1175
1044
        return ref($label) eq 'ARRAY' ? $label->[0] : $label;
579}
580
581# ---------------------------------------------------------------------------
582# _parse_graph(\@graph)
583#
584# Purpose:  Iterate over a JSON-LD @graph array and partition items into
585#           Schema.org class definitions and property definitions.
586# Entry:    $graph_ref is an arrayref of item hashrefs as decoded from the
587#           Schema.org JSON-LD vocabulary.
588# Returns:  Two hashrefs: (\%classes, \%properties), each keyed by label.
589#           Items are also indexed by the short name extracted from their
590#           @id URI so that both 'MusicEvent' and its label resolve correctly.
591# Side fx:  None.
592# Notes:    Items with no recognisable label or @type are silently skipped.
593#           The @id short-name index uses //= so the label always wins if
594#           it differs.
595# ---------------------------------------------------------------------------
596sub _parse_graph {
597
90
17388
        my ($graph_ref) = @_;
598
599
90
61
        my (%classes, %props);
600
601        # Iterate every item in the JSON-LD graph array.
602
90
90
50
85
        for my $item (@{$graph_ref}) {
603
604                # Skip items that do not declare an RDF type.
605
1174
883
                next unless exists $item->{'@type'};
606
1172
685
                my $item_type = $item->{'@type'};
607
608                # Normalise @type: the spec allows either a scalar or an array.
609
1172
2
825
3
                my @types = ref($item_type) eq 'ARRAY' ? @{$item_type} : ($item_type);
610
611                # Extract the human-readable label; skip items with none.
612
1172
720
                my $label = _extract_label($item) or next;
613
614                # Index rdfs:Class items under their label and their @id short name.
615
1167
1169
724
1027
                if (grep { $_ eq $RDF_CLASS } @types) {
616
1092
920
                        $classes{$label} = $item;
617
618                        # Secondary index by short URI fragment (e.g. 'MusicGroup').
619
1092
767
                        if (my $id = $item->{'@id'}) {
620
1090
1048
                                (my $short = $id) =~ s{.*/}{};
621
1090
868
                                $classes{$short} //= $item;
622                        }
623                }
624
625                # Index rdf:Property items under their label and @id short name.
626
1167
1169
630
1076
                if (grep { $_ eq $RDF_PROPERTY } @types) {
627
76
65
                        $props{$label} = $item;
628
629                        # Secondary index by short URI fragment (e.g. 'startDate').
630
76
63
                        if (my $id = $item->{'@id'}) {
631
75
88
                                (my $short = $id) =~ s{.*/}{};
632
75
85
                                $props{$short} //= $item;
633                        }
634                }
635        }
636
637
90
112
        return (\%classes, \%props);
638}
639
640# ===========================================================================
641# END OF MODULE POD
642# ===========================================================================
643
644=encoding utf-8
645
646 - 793
=head1 FILES

=head2 schemaorg_dynamic_vocabulary.jsonld

Cache file written to C<$CACHEDIR> (if set) or the system temporary directory
(C<File::Spec-E<gt>tmpdir()>), unless overridden via C<$config{cache_file}>.
Contains the downloaded Schema.org vocabulary in JSON-LD format.  Refreshed
when older than C<$config{cache_duration}> seconds.

=head1 ERROR HANDLING

The module uses C<carp> rather than C<die> for recoverable failures:

=over 4

=item * Failed HTTP requests emit C<carp> and trigger the stale-cache fallback.

=item * JSON parse errors emit C<carp> and return C<{}>.

=item * File I/O errors emit C<carp>; the download path is attempted next.

=item * C<croak> is reserved for programmer errors (bad argument types).

=back

=head1 BUGS

=over 4

=item * Cache invalidation is time-based only; no checksum or version check.

=back

=head1 SEE ALSO

=over 4

=item * L<Test Dashboard|https://nigelhorne.github.io/Schema-Validator/coverage/>

=back

=head1 REPOSITORY

L<https://github.com/nigelhorne/schema-validator>

=head2 FORMAL SPECIFICATION

=head3 is_valid_datetime

    Let CHAR denote the set of all Unicode code points and
    DIGIT = { c : CHAR | c in {'0'..'9'} }.
    Let seqN(S) = { s : seq S | #s = N }.

    YEAR     â‰œ  seqN(4, DIGIT)
    MONTH    â‰œ  seqN(2, DIGIT)
    DAY      â‰œ  seqN(2, DIGIT)
    HOUR     â‰œ  seqN(2, DIGIT)
    MINUTE   â‰œ  seqN(2, DIGIT)
    SECOND   â‰œ  seqN(2, DIGIT)
    SEP      â‰œ  { 'T', ' ' }

    DATE     â‰œ  { d : seq CHAR | ∃ y ∈ YEAR; mo ∈ MONTH; dy ∈ DAY
                    â€¢ d = y ⌢ ⟨'-'⟩ ⌢ mo ⌢ ⟨'-'⟩ ⌢ dy }

    HHMM     â‰œ  { t : seq CHAR | ∃ h ∈ HOUR; m ∈ MINUTE
                    â€¢ t = h ⌢ ⟨':'⟩ ⌢ m }

    HHMMSS   â‰œ  { t : seq CHAR | ∃ h ∈ HOUR; m ∈ MINUTE; s ∈ SECOND
                    â€¢ t = h ⌢ ⟨':'⟩ ⌢ m ⌢ ⟨':'⟩ ⌢ s }

    TIMEFRAG ≜  { tf : seq CHAR | ∃ sep ∈ SEP; hm ∈ (HHMM ∪ HHMMSS)
                    â€¢ tf = ⟨sep⟩ ⌢ hm }

    DATETIME ≜  DATE ∪ { dt : seq CHAR | ∃ d ∈ DATE; tf ∈ TIMEFRAG
                           â€¢ dt = d ⌢ tf }

    â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
     IsValidDatetime
    â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
     str?    : seq CHAR
     result! : B
    â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
     result! ⟺ str? ∈ DATETIME
    â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€

=head3 load_dynamic_library

    Let FILE, DUR, URL be the resolved config values.
    Let now : N be the current UNIX epoch time.
    Let mtime : PATH -> N map a path to its last-modification time.
    Let readable, writeable : PATH -> B be filesystem predicates.
    Let reachable : URL -> B test HTTP reachability.
    Let slurp : PATH -> seq CHAR and spit : PATH x seq CHAR -> 1.
    Let fetch : URL x N -> seq CHAR (second arg is timeout).
    Let decode_json : seq CHAR -> ITEM.
    Let label : ITEM -> (LABEL | {}) extract rdfs:label.
    Let types : ITEM -> P TYPE extract @type values.

    FRESH ≜ ( -e(FILE) ) ∧ ( (now - mtime(FILE)) < DUR )

    â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
     LoadDynamicVocabulary
    â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
     Î”VocabularyStore
     cache_file?     : PATH
     cache_duration? : N
     vocab_url?      : URL
     ua_timeout?     : N
     result!         : CLASS_LABEL ⇸ ITEM
    â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
     content : seq CHAR

     FRESH ∧ readable(cache_file?)
         â‡’ content = slurp(cache_file?)

     Â¬FRESH ∧ reachable(vocab_url?)
         â‡’ content = fetch(vocab_url?, ua_timeout?)
           âˆ§ ( writeable(cache_file?) ⇒ spit(cache_file?, content) )

     Â¬FRESH ∧ ¬reachable(vocab_url?) ∧ -e(cache_file?)
         â‡’ content = slurp(cache_file?)

     graph ≜ (decode_json content)[AT_GRAPH]

     dynamic_schema' =
         { item ∈ graph | RDF_CLASS ∈ types(item) ∧ label(item) ≠ ∅
           â€¢ label(item) ↦ item }

     dynamic_properties' =
         { item ∈ graph | RDF_PROPERTY ∈ types(item) ∧ label(item) ≠ ∅
           â€¢ label(item) ↦ item }

     result! = dynamic_schema'
    â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€

=head1 AUTHOR

Nigel Horne, C<< <njh at nigelhorne.com> >>

=head1 LICENCE AND COPYRIGHT

Copyright 2025-2026 Nigel Horne.

Usage is subject to the GPL2 licence terms.
If you use it,
please let me know.

=cut
794
7951;