File Coverage

File:blib/lib/App/Test/Generator.pm
Coverage:78.3%

linestmtbrancondsubtimecode
1package App::Test::Generator;
2
3# TODO: Test validator from Params::Validate::Strict 0.16
4# TODO: $seed should be passed to Data::Random::String::Matches
5# TODO: positional args - when config_undef is set, see what happens when not all args are given
6
7
19
19
928042
32
use 5.036;
8
9
19
19
19
31
13
163
use strict;
10
19
19
19
24
14
355
use warnings;
11
19
19
19
2384
77130
43
use autodie qw(:all);
12
13
19
19
19
112574
1844
38
use utf8;
14binmode STDOUT, ':utf8';
15binmode STDERR, ':utf8';
16
17
19
19
19
3568
9462
44
use open qw(:std :encoding(UTF-8));
18
19
19
19
19
129024
25
320
use App::Test::Generator::Template;
20
19
19
19
46
16
474
use Carp qw(carp croak);
21
19
19
19
4178
537235
289
use Config::Abstraction 0.36;
22
19
19
19
2293
26306
461
use Data::Dumper;
23
19
19
19
38
13
273
use Data::Section::Simple;
24
19
19
19
33
16
467
use File::Basename qw(basename);
25
19
19
19
33
14
241
use File::Spec;
26
19
19
19
3954
166161
533
use Module::Load::Conditional qw(check_install can_load);
27
19
19
19
45
14
292
use Params::Get;
28
19
19
19
29
158
217
use Params::Validate::Strict 0.30;
29
19
19
19
30
12
252
use Readonly;
30
19
19
19
31
18
855
use Readonly::Values::Boolean;
31
19
19
19
37
14
350
use Scalar::Util qw(looks_like_number);
32
19
19
19
34
14
1162
use re 'regexp_pattern';
33
19
19
19
4149
142051
311
use Template;
34
19
19
19
2509
18951
487
use YAML::XS qw(LoadFile);
35
36
19
19
19
41
11
535
use Exporter 'import';
37
38our @EXPORT_OK = qw(generate);
39
40our $VERSION = '0.36';
41
42use constant {
43
19
620
        DEFAULT_ITERATIONS => 30,
44        DEFAULT_PROPERTY_TRIALS => 1000
45
19
19
45
29
};
46
47
19
19
19
37
18
119454
use constant CONFIG_TYPES => ('test_nuls', 'test_undef', 'test_empty', 'test_non_ascii', 'dedup', 'properties', 'close_stdin', 'test_security');
48
49# --------------------------------------------------
50# Delimiter pairs tried in order when wrapping a
51# string with q{} — bracket forms are preferred as
52# they are most readable in generated test code
53# --------------------------------------------------
54Readonly my @Q_BRACKET_PAIRS => (
55        ['{', '}'],
56        ['(', ')'],
57        ['[', ']'],
58        ['<', '>'],
59);
60
61# --------------------------------------------------
62# Single-character delimiters tried when no bracket
63# pair is usable — each is tried in order and the
64# first one not present in the string is used.
65# The # character is last since it starts comments
66# in many contexts and is least readable
67# --------------------------------------------------
68Readonly my @Q_SINGLE_DELIMITERS => (
69        '~', '!', '%', '^', '=', '+', ':', ',', ';', '|', '/', '#'
70);
71
72# --------------------------------------------------
73# Sentinel returned by index() when the search
74# string is not found — used to make the >= 0
75# boundary check self-documenting and to prevent
76# NumericBoundary mutants from surviving
77# --------------------------------------------------
78Readonly my $INDEX_NOT_FOUND => -1;
79
80# --------------------------------------------------
81# Readonly constants for schema validation
82# --------------------------------------------------
83Readonly my $CONFIG_PROPERTIES_KEY => 'properties';
84Readonly my $LEGACY_PERL_KEY_1     => '$module';
85Readonly my $LEGACY_PERL_KEY_2     => 'our $module';
86Readonly my $SOURCE_KEY            => '_source';
87
88# --------------------------------------------------
89# Readonly constants for render_hash key detection
90# --------------------------------------------------
91Readonly my $KEY_MATCHES => 'matches';
92Readonly my $KEY_NOMATCH => 'nomatch';
93
94# --------------------------------------------------
95# Reserved module name indicating a Perl builtin
96# function rather than a CPAN or user module
97# --------------------------------------------------
98Readonly my $MODULE_BUILTIN => 'builtin';
99
100# --------------------------------------------------
101# Regex pattern matched against transform names to
102# detect the positive/non-negative idempotence
103# heuristic in _detect_transform_properties
104# --------------------------------------------------
105Readonly my $TRANSFORM_POSITIVE_PATTERN => 'positive';
106
107# --------------------------------------------------
108# Default type assumed for schema fields that declare
109# no explicit type — used in generator selection and
110# dominant-type detection
111# --------------------------------------------------
112Readonly my $DEFAULT_FIELD_TYPE => 'string';
113
114# --------------------------------------------------
115# Default range used by the LectroTest float/integer
116# generators when no min or max constraint is given.
117# Chosen to provide a useful spread without producing
118# values so large they overflow downstream arithmetic.
119# --------------------------------------------------
120Readonly my $DEFAULT_GENERATOR_RANGE => 1000;
121
122# --------------------------------------------------
123# Default upper bound on the number of elements in
124# generated arrayrefs and hashrefs when no max is
125# declared in the schema.
126# --------------------------------------------------
127Readonly my $DEFAULT_MAX_COLLECTION_SIZE => 10;
128
129# --------------------------------------------------
130# Default upper bound on generated string length
131# when no max is declared in the schema.
132# --------------------------------------------------
133Readonly my $DEFAULT_MAX_STRING_LEN => 100;
134
135# --------------------------------------------------
136# Sentinel for the zero boundary used in float
137# generator selection — comparing min/max against
138# this constant makes the boundary intent explicit
139# and prevents NumericBoundary mutants from surviving.
140# --------------------------------------------------
141Readonly my $ZERO_BOUNDARY => 0;
142
143# --------------------------------------------------
144# Environment variable names used to control verbose
145# output and optional load validation in
146# _validate_module. Centralised here so they are
147# easy to find and consistent across the codebase.
148# --------------------------------------------------
149Readonly my $ENV_TEST_VERBOSE       => 'TEST_VERBOSE';
150Readonly my $ENV_GENERATOR_VERBOSE  => 'GENERATOR_VERBOSE';
151Readonly my $ENV_VALIDATE_LOAD      => 'GENERATOR_VALIDATE_LOAD';
152
153 - 1534
=head1 NAME

App::Test::Generator - Fuzz Testing, Mutation Testing, LCSAJ Metrics and Test Dashboard for Perl modules

=head1 VERSION

Version 0.36

=head1 SYNOPSIS

C<App::Test::Generator> is a suite to help the testing of CPAN modules.
It consists of 4 modules:

=over 4

=item * Fuzz Tester

=item * Mutation Testing

=item * LCSAJ Metrics

=item * Test Dashboard

=back

From the command line:

  # Takes the formal definition of a routine, creates tests against that routine, and runs the test
  fuzz-harness-generator -r t/conf/add.yml

  # Attempt to create a formal definition from a routine package, then run tests against that formal definition
  # This is the holy grail of automatic test generation, just by looking at the source code
  extract-schemas bin/extract-schemas lib/Sample/Module.pm && fuzz-harness-generator -r schemas/greet.yaml

From Perl:

  use App::Test::Generator qw(generate);

  # Generate to STDOUT
  App::Test::Generator->generate("t/conf/add.yml");

  # Generate directly to a file
  App::Test::Generator->generate('t/conf/add.yml', 't/add_fuzz.t');

  # Holy grail mode - read a Perl file, generate tests, and run them
  # This is a long way away yet, but see t/schema_input.t for a proof of concept
  my $extractor = App::Test::Generator::SchemaExtractor->new(
    input_file => 'Foo.pm',
    output_dir => $dir
  );
  my $schemas = $extractor->extract_all();
  foreach my $schema(keys %{$schemas}) {
    my $tempfile = '/var/tmp/foo.t';    # Use File::Temp in real life
    App::Test::Generator->generate(
      schema => $schemas->{$schema},
      output_file => $tempfile,
    );
    system("$^X -I$dir $tempfile");
    unlink $tempfile;
  }

=head1 OVERVIEW

This module takes a formal input/output specification for a routine or
method and automatically generates test cases. In effect, it allows you
to easily add comprehensive black-box tests in addition to the more
common white-box tests that are typically written for CPAN modules and other
subroutines.

The generated tests combine:

=over 4

=item * Random fuzzing based on input types

=item * Deterministic edge cases for min/max constraints

=item * Static corpus tests defined in Perl or YAML

=back

This approach strengthens your test suite by probing both expected and
unexpected inputs, helping you to catch boundary errors, invalid data
handling, and regressions without manually writing every case.

=head1 DESCRIPTION

This module implements the logic behind L<fuzz-harness-generator>.
It parses configuration files (fuzz and/or corpus YAML), and
produces a ready-to-run F<.t> test script to run through C<prove>.

It reads configuration files in any format,
and optional YAML corpus files.
All of the examples in this documentation are in C<YAML> format,
other formats may not work as they aren't so heavily tested.
It then generates a L<Test::Most>-based fuzzing harness combining:

=over 4

=item * Randomized fuzzing of inputs (with edge cases)

=item * Optional static corpus tests from Perl C<%cases> or YAML file (C<yaml_cases> key)

=item * Functional or OO mode (via C<$new>)

=item * Reproducible runs via C<$seed> and configurable iterations via C<$iterations>

=back

=head1 MUTATION-GUIDED TEST GENERATION

C<App::Test::Generator> includes a pipeline that automatically closes the
feedback loop between mutation testing, schema extraction, and fuzz
testing. The goal is that surviving mutants drive the creation of new
tests that kill them on the next run, without manual intervention.

=head2 The Pipeline

    mutation survivor
        |
        v
    SchemaExtractor extracts the schema for the enclosing sub
        |
        v
    Schema augmented with boundary values from the mutant
        |
        v
    Augmented schema written to t/conf/
        |
        v
    t/fuzz.t picks up the new schema and runs fuzz tests
        |
        v
    Mutation killed on next run

=head2 How to Use It

The pipeline is driven by three flags passed to
C<bin/test-generator-index>, which is invoked automatically by
C<bin/generate-test-dashboard> on each CI push.

=head3 Step 1: Generate TODO stubs for all survivors

    bin/test-generator-index --generate_mutant_tests=t

Produces C<t/mutant_YYYYMMDD_HHMMSS.t> containing:

=over 4

=item * TODO stubs for HIGH and MEDIUM difficulty survivors, with
boundary value suggestions, environment variable hints, and the
enclosing subroutine name for navigation context.

=item * Comment-only hints for LOW difficulty survivors.

=back

Multiple mutations on the same source line are deduplicated into one
stub. One good test kills all variants on that line.

=head3 Step 2: Generate runnable schemas for NUM_BOUNDARY survivors

    bin/test-generator-index \
        --generate_mutant_tests=t \
        --generate_test=mutant

For each NUM_BOUNDARY survivor, calls
L<App::Test::Generator::SchemaExtractor> to extract the schema for
the enclosing subroutine. If the confidence level is sufficient, the
schema is augmented with the boundary value from the mutant (plus one
value either side) and written to C<t/conf/> as a runnable YAML file.
L<t/fuzz.t> picks it up automatically on the next test run.

Falls back to a TODO stub if:

=over 4

=item * SchemaExtractor cannot parse the file

=item * The enclosing sub cannot be determined

=item * The extracted schema confidence is C<very_low> or C<none>

=back

=head3 Step 3: Augment existing schemas with survivor boundary values

    bin/test-generator-index \
        --generate_mutant_tests=t \
        --generate_test=mutant \
        --generate_fuzz

Scans C<t/conf/> for existing YAML schema files (hand-written or
previously generated) and writes augmented copies with boundary values
from surviving NUM_BOUNDARY mutants merged in. The original schema is
never modified. Augmented copies are written as
C<t/conf/mutant_fuzz_YYYYMMDD_HHMMSS_FUNCTION.yml> and picked up
automatically by C<t/fuzz.t>.

Schemas whose filename already starts with C<mutant_fuzz_> are skipped
to prevent cascading augmentation. Schemas with no matching survivors
are skipped, with a note if C<--verbose> is active.

=head3 Putting It All Together

The recommended invocation in C<bin/generate-test-dashboard>
Step 7 runs all three stages together:

    bin/test-generator-index \
        --generate_mutant_tests=t \
        --generate_test=mutant \
        --generate_fuzz

The GitHub Actions workflow in C<.github/workflows/dashboard.yml>
then commits any new C<t/mutant_*.t> and C<t/conf/mutant_*.yml> files
to the repository so they accumulate over time as the test suite
improves.

=head2 Confidence Levels

L<App::Test::Generator::SchemaExtractor> assigns a confidence level
to each extracted schema:

=over 4

=item * C<high> / C<medium> / C<low> - Schema is used for test generation

=item * C<very_low> / C<none> - Falls back to TODO stub

=back

Confidence is based on how much type and constraint information could
be inferred from the source code and its POD documentation. Methods
with explicit parameter validation (L<Params::Validate::Strict>,
L<Params::Get>) or comprehensive POD will produce higher-confidence
schemas.

=head2 Files Produced

=over 4

=item * C<t/mutant_YYYYMMDD_HHMMSS.t>

TODO stub file for all survivors. Committed to the repository by the
GitHub Actions workflow.

=item * C<t/conf/mutant_MODNAME_FUNCTION_YYYYMMDD_HHMMSS.yml>

Runnable YAML schema for a NUM_BOUNDARY survivor where SchemaExtractor
confidence was sufficient. Picked up by C<t/fuzz.t>.

=item * C<t/conf/mutant_fuzz_YYYYMMDD_HHMMSS_FUNCTION.yml>

Augmented copy of an existing schema with survivor boundary values
merged in. Picked up by C<t/fuzz.t>.

=back

=head2 See Also

=over 4

=item * L<App::Test::Generator::SchemaExtractor> - Schema extraction
from Perl source code

=item * L<bin/test-generator-index> - Dashboard generator and
pipeline driver

=item * L<bin/generate-test-dashboard> - Full pipeline runner

=back

=encoding utf8

=head1 CONFIGURATION

The configuration file,
for each set of tests to be produced,
is a file containing a schema that can be read by L<Config::Abstraction>.

=head2 SCHEMA

The schema is split into several sections.

=head3 C<%input> - input params with keys => type/optional specs

When using named parameters

  input:
    name:
      type: string
      optional: false
    age:
      type: integer
      optional: true

Supported basic types used by the fuzzer: C<string>, C<integer>, C<float>, C<number>, C<boolean>, C<arrayref>, C<hashref>.
See also L<Params::Validate::Strict>.
You can add more custom types using properties.

For routines with one unnamed parameter

  input:
    type: string

For routines with more than one named parameter, use the C<position> keyword.

  module: Math::Simple::MinMax
  fuction: max

  input:
    left:
      type: number
      position: 0
    right:
      type: number
      position: 1

  output:
    type: number

The keyword C<undef> is used to indicate that the C<function> takes no arguments.

=head3 C<%output> - output param types for L<Return::Set> checking

  output:
    type: string

If the output hash contains the key _STATUS, and if that key is set to DIES,
the routine should die with the given arguments; otherwise, it should live.
If it's set to WARNS,
the routine should warn with the given arguments.
The output can be set to the string 'undef' if the routine should return the undefined value:

  ---
  module: Scalar::Util
  function: blessed

  input:
    type: string

  output: undef

The keyword C<undef> is used to indicate that the C<function> returns nothing.

=head3 C<%config> - optional hash of configuration.

The current supported variables are

=over 4

=item * C<close_stdin>

Tests should not attempt to read from STDIN (default: 1).
This is ignored on Windows, when never closes STDIN.

=item * C<test_nuls>, inject NUL bytes into strings (default: 1)

With this test enabled, the function is expected to die when a NUL byte is passed in.

=item * C<test_undef>, test with undefined value (default: 1)

=item * C<test_empty>, test with empty strings (default: 1)

=item * C<test_non_ascii>, test with strings that contain non ascii characters (default: 1)

=item * C<timeout>, ensure tests don't hang (default: 10)

Setting this to 0 disables timeout testing.

=item * C<dedup>, fuzzing can create duplicate tests, go some way to remove duplicates (default: 1)

=item * C<properties>, enable L<Test::LectroTest> Property tests (default: 0)

*item * C<test_security>, send some security string based tests (default: 0)

=back

All values default to C<true>.

=head3 C<%accessor> - this is an accessor routine

  accessor:
    property: ua
    type: getset

Has two mandatory elements:

=over 4

=item * C<property>

The name of the property in the object that the routine controls.

=item * C<type>

One of C<getter>, C<setter>, C<getset>.

=back

=head3 C<%transforms> - list of transformations from input sets to output sets

Transforms allow you to define how input data should be transformed into output data.
This is useful for testing functions that convert between formats, normalize data,
or apply business logic transformations on a set of data to different set of data.
It takes a list of subsets of the input and output definitions,
and verifies that data from each input subset is correctly transformed into data from the matching output subset.

=head4 Transform Validation Rules

For each transform:

=over 4

=item 1. Generate test cases using the transform's input schema

=item 2. Call the function with those inputs

=item 3. Validate the output matches the transform's output schema

=item 4. If output has a specific 'value', check exact match

=item 5. If output has constraints (min/max), validate within bounds

=back

=head4 Example 1

  ---
  module: builtin
  function: abs

  config:
    test_undef: no
    test_empty: no
    test_nuls: no
    test_non_ascii: no

  input:
    number:
      type: number
      position: 0

  output:
    type: number
    min: 0

  transforms:
    positive:
      input:
        number:
          type: number
          position: 0
          min: 0
      output:
        type: number
        min: 0
    negative:
      input:
        number:
          type: number
          position: 0
          max: 0
      output:
        type: number
        min: 0
    error:
      input:
        undef
      output:
        _STATUS: DIES

If the output hash contains the key _STATUS, and if that key is set to DIES,
the routine should die with the given arguments; otherwise, it should live.
If it's set to WARNS, the routine should warn with the given arguments.

The keyword C<undef> is used to indicate that the C<function> returns nothing.

=head4 Example 2

  ---
  module: Math::Utils
  function: normalize_number

  input:
    value:
      type: number
      position: 0

  output:
    type: number

  transforms:
    positive_stays_positive:
      input:
        value:
          type: number
          min: 0
          max: 1000
      output:
        type: number
        min: 0
        max: 1

    negative_becomes_zero:
      input:
        value:
          type: number
          max: 0
      output:
        type: number
        value: 0

    preserves_zero:
      input:
        value:
          type: number
          value: 0
      output:
        type: number
        value: 0

=head3 C<$module>

The name of the module (optional).

Using the reserved word C<builtin> means you're testing a Perl builtin function.

If omitted, the generator will guess from the config filename:
C<My-Widget.conf> -> C<My::Widget>.

=head3 C<$function>

The function/method to test.

This defaults to C<run>.

=head3 C<%new>

An optional hashref of args to pass to the module's constructor.

  new:
    api_key: ABC123
    verbose: true

To ensure C<new()> is called with no arguments, you still need to define new, thus:

  module: MyModule
  function: my_function

  new:

=head3 C<%cases>

An optional Perl static corpus, when the output is a simple string (expected => [ args... ]).

Maps the expected output string to the input and _STATUS

  cases:
    ok:
      input: ping
      _STATUS: OK
    error:
      input: ""
      _STATUS: DIES

=head3 C<$yaml_cases> - optional path to a YAML file with the same shape as C<%cases>.

=head3 C<$seed>

An optional integer.
When provided, the generated C<t/fuzz.t> will call C<srand($seed)> so fuzz runs are reproducible.

=head3 C<$iterations>

An optional integer controlling how many fuzz iterations to perform (default 30).

=head3 C<%edge_cases>

An optional hash mapping of extra values to inject.

        # Two named parameters
        edge_cases:
                name: [ '', 'a' x 1024, \"\x{263A}" ]
                age: [ -1, 0, 99999999 ]

        # Takes a string input
        edge_cases: [ 'foo', 'bar' ]

Values can be strings or numbers; strings will be properly quoted.
Note that this only works with routines that take named parameters.

=head3 C<%type_edge_cases>

An optional hash mapping types to arrayrefs of extra values to try for any field of that type:

        type_edge_cases:
                string: [ '', ' ', "\t", "\n", "\0", 'long' x 1024, chr(0x1F600) ]
                number: [ 0, 1.0, -1.0, 1e308, -1e308, 1e-308, -1e-308, 'NaN', 'Infinity' ]
                integer: [ 0, 1, -1, 2**31-1, -(2**31), 2**63-1, -(2**63) ]

=head3 C<%edge_case_array>

Specify edge case values for routines that accept a single unnamed parameter.
This is specifically designed for simple functions that take one argument without a parameter name.
These edge cases supplement the normal random string generation, ensuring specific problematic values are always tested.
During fuzzing iterations, there's a 40% probability that a test case will use a value from edge_case_array instead of randomly generated data.

  ---
  module: Text::Processor
  function: sanitize

  input:
    type: string
    min: 1
    max: 1000

  edge_case_array:
    - "<script>alert('xss')</script>"
    - "'; DROP TABLE users; --"
    - "\0null\0byte"
    - "emoji😊test"
    - ""
    - " "

  seed: 42
  iterations: 30

=head3 Semantic Data Generators

For property-based testing with L<Test::LectroTest>,
you can use semantic generators to create realistic test data.

C<unix_timestamp> is currently fully supported,
other fuzz testing support for C<semantic> entries is being developed.

  input:
    email:
      type: string
      semantic: email

    user_id:
      type: string
      semantic: uuid

    phone:
      type: string
      semantic: phone_us

=head4 Available Semantic Types

=over 4

=item * C<email> - Valid email addresses (user@domain.tld)

=item * C<url> - HTTP/HTTPS URLs

=item * C<uuid> - UUIDv4 identifiers

=item * C<phone_us> - US phone numbers (XXX-XXX-XXXX)

=item * C<phone_e164> - International E.164 format (+XXXXXXXXXXXX)

=item * C<ipv4> - IPv4 addresses (0.0.0.0 - 255.255.255.255)

=item * C<ipv6> - IPv6 addresses

=item * C<username> - Alphanumeric usernames with _ and -

=item * C<slug> - URL slugs (lowercase-with-hyphens)

=item * C<hex_color> - Hex color codes (#RRGGBB)

=item * C<iso_date> - ISO 8601 dates (YYYY-MM-DD)

=item * C<iso_datetime> - ISO 8601 datetimes (YYYY-MM-DDTHH:MM:SSZ)

=item * C<semver> - Semantic version strings (major.minor.patch)

=item * C<jwt> - JWT-like tokens (base64url format)

=item * C<json> - Simple JSON objects

=item * C<base64> - Base64-encoded strings

=item * C<md5> - MD5 hashes (32 hex chars)

=item * C<sha256> - SHA-256 hashes (64 hex chars)

=item * C<unix_timestamp>

=back

=head2 EDGE CASE GENERATION

In addition to purely random fuzz cases, the harness generates
deterministic edge cases for parameters that declare C<min>, C<max> or C<len> in their schema definitions.

For each constraint, three edge cases are added:

=over 4

=item * Just inside the allowable range

This case should succeed, since it lies strictly within the bounds.

=item * Exactly on the boundary

This case should succeed, since it meets the constraint exactly.

=item * Just outside the boundary

This case is annotated with C<_STATUS = 'DIES'> in the corpus and
should cause the harness to fail validation or croak.

=back

Supported constraint types:

=over 4

=item * C<number>, C<integer>, C<float>

Uses numeric values one below, equal to, and one above the boundary.

=item * C<string>

Uses strings of lengths one below, equal to, and one above the boundary.

=item * C<arrayref>

Uses references to arrays of with the number of elements one below, equal to, and one above the boundary.

=item * C<hashref>

Uses hashes with key counts one below, equal to, and one above the
boundary (C<min> = minimum number of keys, C<max> = maximum number
of keys).

=item * C<memberof> - arrayref of allowed values for a parameter

This example is for a routine called C<input()> that takes two arguments: C<status> and C<level>.
C<status> is a string that must have the value C<ok>, C<error> or C<pending>.
The C<level> argument is an integer that must be one of C<1>, C<5> or C<111>.

  ---
  input:
    status:
      type: string
      memberof:
        - ok
        - error
        - pending
    level:
      type: integer
      memberof:
        - 1
        - 5
        - 111

The generator will automatically create test cases for each allowed value (inside the member list),
and at least one value outside the list (which should die or C<croak>, C<_STATUS = 'DIES'>).
This works for strings, integers, and numbers.

=item * C<enum> - synonym of C<memberof>

=item * C<boolean> - automatic boundary tests for boolean fields

  input:
    flag:
      type: boolean

The generator will automatically create test cases for 0 and 1; true and false; off and on, and values that should trigger C<_STATUS = 'DIES'>.

=back

These edge cases are inserted automatically, in addition to the random
fuzzing inputs, so each run will reliably probe boundary conditions
without relying solely on randomness.

=head1 EXAMPLES

See the files in C<t/conf> for examples.

=head2 Adding Scheduled fuzz Testing with GitHub Actions to Your Code

To automatically create and run tests on a regular basis on GitHub Actions,
you need to create a configuration file for each method and subroutine that you're testing,
and a GitHub Actions configuration file.

This example takes you through testing the online_render method of L<HTML::Genealogy::Map>.

=head3 t/conf/online_render.yml

  ---

  module: HTML::Genealogy::Map
  function: onload_render

  input:
    gedcom:
      type: object
      can: individuals
    geocoder:
      type: object
      can: geocode
    debug:
      type: boolean
      optional: true
    google_key:
      type: string
      optional: true
      min: 39
      max: 39
      matches: "^AIza[0-9A-Za-z_-]{35}$"

  config:
    test_undef: 0

=head3 .github/actions/fuzz.t

  ---
  name: Fuzz Testing

  permissions:
    contents: read

  on:
    push:
      branches: [main, master]
    pull_request:
      branches: [main, master]
    schedule:
      - cron: '29 5 14 * *'

  jobs:
    generate-fuzz-tests:
      strategy:
        fail-fast: false
        matrix:
          os:
            - macos-latest
            - ubuntu-latest
            - windows-latest
          perl: ['5.42', '5.40', '5.38', '5.36', '5.34', '5.32', '5.30', '5.28', '5.22']

      runs-on: ${{ matrix.os }}
      name: Fuzz testing with perl ${{ matrix.perl }} on ${{ matrix.os }}

      steps:
        - uses: actions/checkout@v5

        - name: Set up Perl
          uses: shogo82148/actions-setup-perl@v1
          with:
            perl-version: ${{ matrix.perl }}

        - name: Install App::Test::Generator this module's dependencies
          run: |
            cpanm App::Test::Generator
            cpanm --installdeps .

        - name: Make Module
          run: |
            perl Makefile.PL
            make
          env:
            AUTOMATED_TESTING: 1
            NONINTERACTIVE_TESTING: 1

        - name: Generate fuzz tests
          run: |
            mkdir t/fuzz
            find t/conf -name '*.yml' | while read config; do
              test_name=$(basename "$config" .conf)
              fuzz-harness-generator "$config" > "t/fuzz/${test_name}_fuzz.t"
            done

        - name: Run generated fuzz tests
          run: |
            prove -lr t/fuzz/
          env:
            AUTOMATED_TESTING: 1
            NONINTERACTIVE_TESTING: 1

=head2 Fuzz Testing your CPAN Module

Running fuzz tests when you run C<make test> in your CPAN module.

Create a directory <t/conf> which contains the schemas.

Then create this file as <t/fuzz.t>:

  #!/usr/bin/env perl

  use strict;
  use warnings;

  use FindBin qw($Bin);
  use IPC::Run3;
  use IPC::System::Simple qw(system);
  use Test::Needs 'App::Test::Generator';
  use Test::Most;

  my $dirname = "$Bin/conf";

  if((-d $dirname) && opendir(my $dh, $dirname)) {
        while (my $filename = readdir($dh)) {
                # Skip '.' and '..' entries and vi temporary files
                next if ($filename eq '.' || $filename eq '..') || ($filename =~ /\.swp$/);

                my $filepath = "$dirname/$filename";

                if(-f $filepath) {      # Check if it's a regular file
                        my ($stdout, $stderr);
                        run3 ['fuzz-harness-generator', '-r', $filepath], undef, \$stdout, \$stderr;

                        ok($? == 0, 'Generated test script exits successfully');

                        if($? == 0) {
                                ok($stdout =~ /^Result: PASS/ms);
                                if($stdout =~ /Files=1, Tests=(\d+)/ms) {
                                        diag("$1 tests run");
                                }
                        } else {
                                diag("$filepath: STDOUT:\n$stdout");
                                diag($stderr) if(length($stderr));
                                diag("$filepath Failed");
                                last;
                        }
                        diag($stderr) if(length($stderr));
                }
        }
        closedir($dh);
  }

  done_testing();

=head2 Property-Based Testing with Transforms

The generator can create property-based tests using L<Test::LectroTest> when the
C<properties> configuration option is enabled.
This provides more comprehensive
testing by automatically generating thousands of test cases and verifying that
mathematical properties hold across all inputs.

=head3 Basic Property-Based Transform Example

Here's a complete example testing the C<abs> builtin function:

B<t/conf/abs.yml>:

  ---
  module: builtin
  function: abs

  config:
    test_undef: no
    test_empty: no
    test_nuls: no
    properties:
      enable: true
      trials: 1000

  input:
    number:
      type: number
      position: 0

  output:
    type: number
    min: 0

  transforms:
    positive:
      input:
        number:
          type: number
          min: 0
      output:
        type: number
        min: 0

    negative:
      input:
        number:
          type: number
          max: 0
      output:
        type: number
        min: 0

This configuration:

=over 4

=item * Enables property-based testing with 1000 trials per property

=item * Defines two transforms: one for positive numbers, one for negative

=item * Automatically generates properties that verify C<abs()> always returns non-negative numbers

=back

Generate the test:

  fuzz-harness-generator t/conf/abs.yml > t/abs_property.t

The generated test will include:

=over 4

=item * Traditional edge-case tests for boundary conditions

=item * Random fuzzing with 30 iterations (or as configured)

=item * Property-based tests that verify the transforms with 1000 trials each

=back

=head3 What Properties Are Tested?

The generator automatically detects and tests these properties based on your transform specifications:

=over 4

=item * B<Range constraints> - If output has C<min> or C<max>, verifies results stay within bounds

=item * B<Type preservation> - Ensures numeric inputs produce numeric outputs

=item * B<Definedness> - Verifies the function doesn't return C<undef> unexpectedly

=item * B<Specific values> - If output specifies a C<value>, checks exact equality

=back

For the C<abs> example above, the generated properties verify:

  # For the "positive" transform:
  - Given a positive number, abs() returns >= 0
  - The result is a valid number
  - The result is defined

  # For the "negative" transform:
  - Given a negative number, abs() returns >= 0
  - The result is a valid number
  - The result is defined

=head3 Advanced Example: String Normalization

Here's a more complex example testing a string normalization function:

B<t/conf/normalize.yml>:

  ---
  module: Text::Processor
  function: normalize_whitespace

  config:
    properties:
      enable: true
      trials: 500

  input:
    text:
      type: string
      min: 0
      max: 1000
      position: 0

  output:
    type: string
    min: 0
    max: 1000

  transforms:
    empty_preserved:
      input:
        text:
          type: string
          value: ""
      output:
        type: string
        value: ""

    single_space:
      input:
        text:
          type: string
          min: 1
          matches: '^\S+(\s+\S+)*$'
      output:
        type: string
        matches: '^\S+( \S+)*$'

    length_bounded:
      input:
        text:
          type: string
          min: 1
          max: 100
      output:
        type: string
        min: 1
        max: 100

This tests that the normalization function:

=over 4

=item * Preserves empty strings (C<empty_preserved> transform)

=item * Collapses multiple spaces into single spaces (C<single_space> transform)

=item * Maintains length constraints (C<length_bounded> transform)

=back

=head3 Interpreting Property Test Results

When property-based tests run, you'll see output like:

  ok 123 - negative property holds (1000 trials)
  ok 124 - positive property holds (1000 trials)

If a property fails, Test::LectroTest will attempt to find the minimal failing
case and display it:

  not ok 123 - positive property holds (47 trials)
  # Property failed
  # Reason: counterexample found

This helps you quickly identify edge cases that your function doesn't handle correctly.

=head3 Configuration Options for Property-Based Testing

In the C<config> section:

  config:
    properties:
      enable: true     # Enable property-based testing (default: false)
      trials: 1000     # Number of test cases per property (default: 1000)

You can also disable traditional fuzzing and only use property-based tests:

  config:
    properties:
      enable: true
      trials: 5000

  iterations: 0  # Disable random fuzzing, use only property tests

=head3 When to Use Property-Based Testing

Property-based testing with transforms is particularly useful for:

=over 4

=item * Mathematical functions (C<abs>, C<sqrt>, C<min>, C<max>, etc.)

=item * Data transformations (encoding, normalization, sanitization)

=item * Parsers and formatters

=item * Functions with clear input-output relationships

=item * Code that should satisfy mathematical properties (commutativity, associativity, idempotence)

=back

=head3 Requirements

Property-based testing requires L<Test::LectroTest> to be installed:

  cpanm Test::LectroTest

If not installed, the generated tests will automatically skip the property-based
portion with a message.

=head3 Testing Email Validation

  ---
  module: Email::Valid
  function: rfc822

  config:
    properties:
      enable: true
      trials: 200
    close_stdin: true
    test_undef: no
    test_empty: no
    test_nuls: no

  input:
    email:
      type: string
      semantic: email
      position: 0

  output:
    type: boolean

  transforms:
    valid_emails:
      input:
        email:
          type: string
          semantic: email
      output:
        type: boolean

This generates 200 realistic email addresses for testing, rather than random strings.

=head3 Combining Semantic with Regex

You can combine semantic generators with regex validation:

  input:
    corporate_email:
      type: string
      semantic: email
      matches: '@company\.com$'

The semantic generator creates realistic emails, and the regex ensures they match your domain.

=head3 Custom Properties for Transforms

You can define additional properties that should hold for your transforms beyond
the automatically detected ones.

=head4 Using Built-in Properties

  transforms:
    positive:
      input:
        number:
          type: number
          min: 0
      output:
        type: number
        min: 0
      properties:
        - idempotent       # f(f(x)) == f(x)
        - non_negative     # result >= 0
        - positive         # result > 0

Available built-in properties:

=over 4

=item * C<idempotent> - Function is idempotent: f(f(x)) == f(x)

=item * C<non_negative> - Result is always >= 0

=item * C<positive> - Result is always > 0

=item * C<non_empty> - String result is never empty

=item * C<length_preserved> - Output length equals input length

=item * C<uppercase> - Result is all uppercase

=item * C<lowercase> - Result is all lowercase

=item * C<trimmed> - No leading/trailing whitespace

=item * C<sorted_ascending> - Array is sorted ascending

=item * C<sorted_descending> - Array is sorted descending

=item * C<unique_elements> - Array has no duplicates

=item * C<preserves_keys> - Hash has same keys as input

=back

=head4 Custom Property Code

Custom properties allows the definition additional invariants and relationships that should hold for their transforms,
beyond what's auto-detected.
For example:

=over 4

=item * Idempotence: f(f(x)) == f(x)

=item * Commutativity: f(x, y) == f(y, x)

=item * Associativity: f(f(x, y), z) == f(x, f(y, z))

=item * Inverse relationships: decode(encode(x)) == x

=item * Domain-specific invariants: Custom business logic

=back

Define your own properties with custom Perl code:

  transforms:
    normalize:
      input:
        text:
          type: string
      output:
        type: string
      properties:
        - name: single_spaces
          description: "No multiple consecutive spaces"
          code: $result !~ /  /

        - name: no_leading_space
          description: "No space at start"
          code: $result !~ /^\s/

        - name: reversible
          description: "Can be reversed back"
          code: length($result) == length($text)

The code has access to:

=over 4

=item * C<$result> - The function's return value

=item * Input variables - All input parameters (e.g., C<$text>, C<$number>)

=item * The function itself - Can call it again for idempotence checks

=back

=head4 Combining Auto-detected and Custom Properties

The generator automatically detects properties from your output spec, and adds
your custom properties:

  transforms:
    sanitize:
      input:
        html:
          type: string
      output:
        type: string
        min: 0              # Auto-detects: defined, min_length >= 0
        max: 10000
      properties:           # Additional custom checks:
        - name: no_scripts
          code: $result !~ /<script/i
        - name: no_iframes
          code: $result !~ /<iframe/i

=head2 GENERATED OUTPUT

The generated test:

=over 4

=item * Seeds RND (if configured) for reproducible fuzz runs

=item * Uses edge cases (per-field and per-type) with configurable probability

=item * Runs C<$iterations> fuzz cases plus appended edge-case runs

=item * Validates inputs with Params::Get / Params::Validate::Strict

=item * Validates outputs with L<Return::Set>

=item * Runs static C<is(... )> corpus tests from Perl and/or YAML corpus

=item * Runs L<Test::LectroTest> tests

=back

=head1 METHODS

=head2 generate

  generate($schema_file, $test_file)

Takes a schema file and produces a test file (or STDOUT).

=cut
1535
1536sub generate
1537{
1538
86
3113355
        croak 'Usage: generate(schema_file [, outfile])' if(scalar(@_) <= 1);
1539
1540
82
95
        my $class = shift;
1541
82
88
        my $args = $_[0];
1542
1543
82
182
        my ($schema_file, $test_file, $schema);
1544        # Globals loaded from the user's conf (all optional except function maybe)
1545
82
0
        my ($module, $function, $new, $yaml_cases);
1546
82
0
        my ($seed, $iterations);
1547
1548
82
270
        if((ref($args) eq 'HASH') || defined($_[2])) {
1549                # Modern API
1550
11
61
                my $params = Params::Validate::Strict::validate_strict({
1551                        args => Params::Get::get_params(undef, \@_),
1552                        schema => {
1553                                input_file => { type => 'string', optional => 1 },
1554                                schema_file => { type => 'string', optional => 1 },
1555                                output_file => { type => 'string', optional => 1 },
1556                                schema => { type => 'hashref', optional => 1 },
1557                                quiet => { type => 'boolean', optional => 1 }, # Not yet used
1558                        }
1559                });
1560
11
2213
                if($params->{'schema_file'}) {
1561
4
4
                        $schema_file = $params->{'schema_file'};
1562                } elsif($params->{'input_file'}) {
1563
1
2
                        $schema_file = $params->{'input_file'};
1564                } elsif($params->{'schema'}) {
1565
6
12
                        $schema = $params->{'schema'};
1566                } else {
1567
0
0
                        croak(__PACKAGE__, ': Usage: generate(input_file|schema [, output_file]');
1568                }
1569
11
26
                if(defined($schema_file)) {
1570
5
9
                        $schema = _load_schema($schema_file);
1571                }
1572
11
58
                $test_file = $params->{'output_file'};
1573        } else {
1574                # Legacy API
1575
71
100
                ($schema_file, $test_file) = @_;
1576
71
91
                if(defined($schema_file)) {
1577
70
131
                        $schema = _load_schema($schema_file);
1578                } else {
1579
1
5
                        croak 'Usage: generate(schema_file [, outfile])';
1580                }
1581        }
1582
1583        # Parse the schema file and load into our structures
1584
78
78
641
170
        my %input = %{_load_schema_section($schema, 'input', $schema_file)};
1585
77
77
86
84
        my %output = %{_load_schema_section($schema, 'output', $schema_file)};
1586
76
76
95
80
        my %transforms = %{_load_schema_section($schema, 'transforms', $schema_file)};
1587
75
75
82
78
        my %accessor = %{_load_schema_section($schema, 'accessor', $schema_file)};
1588
1589
75
2
129
3
        my %cases = %{$schema->{cases}} if(exists($schema->{cases}));
1590
75
0
113
0
        my %edge_cases = %{$schema->{edge_cases}} if(exists($schema->{edge_cases}));
1591
75
1
111
2
        my %type_edge_cases = %{$schema->{type_edge_cases}} if(exists($schema->{type_edge_cases}));
1592
1593
75
270
        $module = $schema->{module} if(exists($schema->{module}) && length($schema->{module}));
1594
75
128
        $function = $schema->{function} if(exists($schema->{function}));
1595
75
108
        if(exists($schema->{new})) {
1596
10
20
                $new = defined($schema->{'new'}) ? $schema->{new} : '_UNDEF';
1597        }
1598
75
114
        $yaml_cases = $schema->{yaml_cases} if(exists($schema->{yaml_cases}));
1599
75
103
        $seed = $schema->{seed} if(exists($schema->{seed}));
1600
75
91
        $iterations = $schema->{iterations} if(exists($schema->{iterations}));
1601
1602
75
3
99
5
        my @edge_case_array = @{$schema->{edge_case_array}} if(exists($schema->{edge_case_array}));
1603
75
148
        _validate_config($schema);
1604
1605
72
9
101
34
        my %config = %{$schema->{config}} if(exists($schema->{config}));
1606
1607
72
150
        _normalize_config(\%config);
1608
1609        # Guess module name from config file if not set
1610
72
356
        if(!$module) {
1611
0
0
                if($schema_file) {
1612
0
0
                        ($module = basename($schema_file)) =~ s/\.(conf|pl|pm|yml|yaml)$//;
1613
0
0
                        $module =~ s/-/::/g;
1614                        # Guard against Perl builtin function names being mistaken
1615                        # for module names — builtins have no module to load
1616
0
0
                        if(_is_perl_builtin($module)) {
1617
0
0
                                undef $module;
1618                        }
1619                }
1620        } elsif($module eq $MODULE_BUILTIN) {
1621
50
134
                undef $module;
1622        }
1623
1624
72
202
        if($module && length($module) && ($module ne 'builtin')) {
1625
22
39
                _validate_module($module, $schema_file);
1626        }
1627
1628        # sensible defaults
1629
72
96
        $function ||= 'run';
1630
72
149
        $iterations ||= DEFAULT_ITERATIONS;              # default fuzz runs if not specified
1631
72
102
        $seed = undef if defined $seed && $seed eq '';  # treat empty as undef
1632
1633        # --- YAML corpus support (yaml_cases is filename string) ---
1634
72
63
        my %yaml_corpus_data;
1635
72
86
        if (defined $yaml_cases) {
1636
5
54
                croak("$yaml_cases: $!") if(!-f $yaml_cases);
1637
1638
4
13
                my $yaml_data = LoadFile(Encode::decode('utf8', $yaml_cases));
1639
4
243
                if ($yaml_data && ref($yaml_data) eq 'HASH') {
1640                        # Validate that the corpus inputs are arrayrefs
1641                        # e.g: "FooBar":      ["foo_bar"]
1642                        # Skip only invalid entries:
1643
4
4
3
7
                        for my $expected (keys %{$yaml_data}) {
1644
6
8
                                my $outputs = $yaml_data->{$expected};
1645
6
22
                                unless($outputs && (ref $outputs eq 'ARRAY')) {
1646
2
29
                                        carp("$yaml_cases: $expected does not point to an array ref, ignoring");
1647
2
171
                                        next;
1648                                }
1649
4
5
                                $yaml_corpus_data{$expected} = $outputs;
1650                        }
1651                }
1652        }
1653
1654        # Merge Perl %cases and YAML corpus safely
1655        # my %all_cases = (%cases, %yaml_corpus_data);
1656
71
105
        my %all_cases = (%yaml_corpus_data, %cases);
1657
71
112
        for my $k (keys %yaml_corpus_data) {
1658
4
9
                if (exists $cases{$k} && ref($cases{$k}) eq 'ARRAY' && ref($yaml_corpus_data{$k}) eq 'ARRAY') {
1659
1
1
1
1
1
2
                        $all_cases{$k} = [ @{$yaml_corpus_data{$k}}, @{$cases{$k}} ];
1660                }
1661        }
1662
1663
71
127
        if(my $hints = delete $schema->{_yamltest_hints}) {
1664
4
10
                if(my $boundaries = $hints->{boundary_values}) {
1665
4
4
6
7
                        push @edge_case_array, @{$boundaries};
1666                }
1667
4
7
                if(my $invalid = $hints->{invalid}) {
1668
0
0
                        carp('TODO: handle yamltest_hints->invalid');
1669                }
1670        }
1671
1672        # If the schema says the type is numeric, normalize
1673
71
141
        if ($schema->{type} && $schema->{type} =~ /^(integer|number|float)$/) {
1674
0
0
                for (@edge_case_array) {
1675
0
0
                        next unless defined $_;
1676
0
0
                        $_ += 0 if Scalar::Util::looks_like_number($_);
1677                }
1678        }
1679
1680        # Load relationships from the schema if present and well-formed.
1681        # SchemaExtractor may set this to undef or an empty arrayref when
1682        # no relationships were detected, so guard both existence and type.
1683
71
56
        my @relationships;
1684
71
124
        if(exists($schema->{relationships}) && ref($schema->{relationships}) eq 'ARRAY') {
1685
0
0
0
0
                @relationships = @{$schema->{relationships}};
1686        }
1687
1688        # Serialise the relationships array from the schema into Perl source
1689        # code for embedding in the generated test file. Each relationship
1690        # type is rendered as a hashref in the @relationships array.
1691
1692
71
70
        my $relationships_code = '';
1693
1694        # Walk each relationship in the order SchemaExtractor produced them
1695
71
92
        for my $rel (@relationships) {
1696
0
0
                my $type = $rel->{type} // '';
1697
1698                # Mutually exclusive: both params being set should cause the method to die
1699
0
0
                if($type eq 'mutually_exclusive') {
1700                        $relationships_code .= "{ type => 'mutually_exclusive', params => [" .
1701
0
0
0
0
0
0
                                join(', ', map { perl_quote($_) } @{$rel->{params}}) .
1702                                "] },\n";
1703
1704                # Required group: at least one of the params must be present
1705                } elsif($type eq 'required_group') {
1706                        $relationships_code .= "{ type => 'required_group', params => [" .
1707
0
0
0
0
                                join(', ', map { perl_quote($_) } @{$rel->{params}}) .
1708
0
0
                                "], logic => " . perl_quote($rel->{logic} // 'or') . " },\n";
1709
1710                # Conditional requirement: if one param is set, another becomes mandatory
1711                } elsif($type eq 'conditional_requirement') {
1712                        $relationships_code .= "{ type => 'conditional_requirement', if => " .
1713                                perl_quote($rel->{'if'}) . ", then_required => " .
1714
0
0
                                perl_quote($rel->{then_required}) . " },\n";
1715
1716                # Dependency: one param requires another to also be present
1717                } elsif($type eq 'dependency') {
1718                        $relationships_code .= "{ type => 'dependency', param => " .
1719                                perl_quote($rel->{param}) . ", requires => " .
1720
0
0
                                perl_quote($rel->{requires}) . " },\n";
1721
1722                # Value constraint: one param being set forces another to a specific value
1723                } elsif($type eq 'value_constraint') {
1724                        $relationships_code .= "{ type => 'value_constraint', if => " .
1725                                perl_quote($rel->{'if'}) . ", then => " .
1726                                perl_quote($rel->{then}) . ", operator => " .
1727                                perl_quote($rel->{operator}) . ", value => " .
1728
0
0
                                perl_quote($rel->{value}) . " },\n";
1729
1730                # Value conditional: one param equalling a specific value requires another param
1731                } elsif($type eq 'value_conditional') {
1732                        $relationships_code .= "{ type => 'value_conditional', if => " .
1733                                perl_quote($rel->{'if'}) . ", equals => " .
1734                                perl_quote($rel->{equals}) . ", then_required => " .
1735
0
0
                                perl_quote($rel->{then_required}) . " },\n";
1736
1737                # Unknown type — warn and skip rather than emitting broken code
1738                } else {
1739
0
0
                        carp "Unknown relationship type '$type', skipping";
1740                }
1741        }
1742
1743        # Dedup the edge cases
1744
71
46
        my %seen;
1745        @edge_case_array = grep {
1746
71
84
108
138
                my $key = defined($_) ? (Scalar::Util::looks_like_number($_) ? "N:$_" : "S:$_") : 'U';
1747
84
138
                !$seen{$key}++;
1748        } @edge_case_array;
1749
1750        # Sort the edge cases to keep it consistent across runs
1751        @edge_case_array = sort {
1752
71
121
136
96
                return -1 if !defined $a;
1753
121
80
                return 1 if !defined $b;
1754
1755
121
95
                my $na = Scalar::Util::looks_like_number($a);
1756
121
84
                my $nb = Scalar::Util::looks_like_number($b);
1757
1758
121
179
                return $a <=> $b if $na && $nb;
1759
9
6
                return -1 if $na;
1760
9
7
                return 1 if $nb;
1761
9
11
                return $a cmp $b;
1762        } @edge_case_array;
1763
1764        # render edge case maps for inclusion in the .t
1765
71
140
        my $edge_cases_code = render_arrayref_map(\%edge_cases);
1766
71
78
        my $type_edge_cases_code = render_arrayref_map(\%type_edge_cases);
1767
1768
71
64
        my $edge_case_array_code = '';
1769
71
93
        if(scalar(@edge_case_array)) {
1770
17
79
23
77
                $edge_case_array_code = join(', ', map { q_wrap($_) } @edge_case_array);
1771        }
1772
1773        # Render configuration - all the values are integers for now, if that changes, wrap the $config{$key} in single quotes
1774
71
74
        my $config_code = '';
1775
71
231
        foreach my $key (sort keys %config) {
1776                # Skip nested structures like 'properties' - they're used during
1777                # generation but don't need to be in the generated test
1778
568
444
                if(ref($config{$key}) eq 'HASH') {
1779
71
55
                        next;
1780                }
1781
497
549
                if((!defined($config{$key})) || !$config{$key}) {
1782                        # YAML will strip the word 'false'
1783                        # e.g. in 'test_undef: false'
1784
33
24
                        $config_code .= "'$key' => 0,\n";
1785                } else {
1786
464
390
                        $config_code .= "'$key' => $config{$key},\n";
1787                }
1788        }
1789
1790        # Render input/output
1791
71
82
        my $input_code = '';
1792
71
205
        if(((scalar keys %input) == 1) && exists($input{'type'}) && !ref($input{'type'})) {
1793                # %input = ( type => 'string' );
1794
42
48
                foreach my $key (sort keys %input) {
1795
42
55
                        $input_code .= "'$key' => '$input{$key}',\n";
1796                }
1797        } else {
1798                # %input = ( str => { type => 'string' } );
1799
29
45
                $input_code = render_hash(\%input);
1800        }
1801
71
131
        if(defined(my $re = $output{'matches'})) {
1802
0
0
                if(ref($re) ne 'Regexp') {
1803                        # Use eval to compile safely — qr/$re/ would interpolate
1804                        # the string first, corrupting patterns containing [ or \
1805
0
0
0
0
                        my $compiled = eval { qr/$re/ };
1806
0
0
                        if($@) {
1807
0
0
                                carp("Invalid matches pattern '$re': $@");
1808                        } else {
1809
0
0
                                $output{'matches'} = $compiled;
1810                        }
1811                }
1812        }
1813
1814        # Compile nomatch pattern to a Regexp object so it renders
1815        # as qr{} in the generated test rather than a raw string.
1816        # Without this, patterns containing [ or other regex
1817        # metacharacters cause compilation failures in validators
1818
71
91
        if(defined(my $re = $output{'nomatch'})) {
1819
0
0
                if(ref($re) ne 'Regexp') {
1820                        # Use eval to compile safely — qr/$re/ would interpolate
1821                        # the string first, corrupting patterns containing [ or \
1822
0
0
0
0
                        my $compiled = eval { qr/$re/ };
1823
0
0
                        if($@) {
1824
0
0
                                carp("Invalid nomatch pattern '$re': $@");
1825                        } else {
1826
0
0
                                $output{'nomatch'} = $compiled;
1827                        }
1828                }
1829        }
1830
1831
71
117
        my $output_code = render_args_hash(\%output);
1832
71
139
        my $new_code = ($new && (ref $new eq 'HASH')) ? render_args_hash($new) : '';
1833
1834
71
60
        my $transforms_code;
1835
71
87
        if(keys %transforms) {
1836
4
5
                foreach my $transform(keys %transforms) {
1837
7
13
                        my $properties = render_fallback($transforms{$transform}->{'properties'});
1838
1839
7
10
                        if($transforms_code) {
1840
3
3
                                $transforms_code .= "},\n";
1841                        }
1842                        $transforms_code .= "$transform => {\n" .
1843                                "\t'input' => { " .
1844                                render_args_hash($transforms{$transform}->{'input'}) .
1845                                "\t}, 'output' => { " .
1846
7
10
                                render_args_hash($transforms{$transform}->{'output'}) .
1847                                "\t}, 'properties' => $properties\n" .
1848                                "\t,\n";
1849                }
1850
4
6
                $transforms_code .= "}\n";
1851        }
1852
1853
71
68
        my $transform_properties_code = '';
1854
71
52
        my $use_properties = 0;
1855
1856
71
98
        if (keys %transforms && ($config{properties}{enable} // 0)) {
1857
3
2
                $use_properties = 1;
1858
1859                # Generate property-based tests for transforms
1860
3
5
                my $properties = _generate_transform_properties(
1861                        \%transforms,
1862                        $function,
1863                        $module,
1864                        \%input,
1865                        \%config,
1866                        $new
1867                );
1868
1869                # Convert to code for template
1870
3
3
                $transform_properties_code = _render_properties($properties);
1871        }
1872
1873
71
86
        if(keys %accessor) {
1874                # Sanity test
1875
2
3
                my $property = $accessor{property};
1876
2
2
                my $type = $accessor{type};
1877
1878
2
2
                if(!defined($new)) {
1879
0
0
                        croak("BUG: $property: accessor $type can only work on an object, incorrectly tagged as $type");
1880                }
1881
2
4
                if($type eq 'getset') {
1882
0
0
                        if(scalar(keys %input) != 1) {
1883
0
0
                                croak("BUG: $property: getset must take one input argument, incorrectly tagged as getset");
1884                        }
1885
0
0
                        if(scalar(keys %output) == 0) {
1886
0
0
                                croak("BUG: $property: getset must give one output, incorrectly tagged as getset");
1887                        }
1888                }
1889        }
1890
1891        # Setup / call code (always load module)
1892
71
99
        my $setup_code = ($module) ? "BEGIN { use_ok('$module') }" : '';
1893
71
59
        my $call_code;  # Code to call the function being test when used with named arguments
1894        my $position_code;      # Code to call the function being test when used with position arguments
1895
71
109
        my $has_positions = _has_positions(\%input);
1896
71
186
        if(defined($new) && defined($module)) {
1897                # keep use_ok regardless (user found earlier issue)
1898
9
12
                if($new_code eq '') {
1899
8
8
                        $new_code = "new_ok('$module')";
1900                } else {
1901
1
1
                        $new_code = "new_ok('$module' => [ { $new_code } ] )";
1902                }
1903
9
11
                $setup_code .= "\nmy \$obj = $new_code;";
1904
9
13
                if($has_positions) {
1905
4
5
                        $position_code = "\$result = (scalar(\@alist) == 1) ? \$obj->$function(\$alist[0]) : (scalar(\@alist) == 0) ? \$obj->$function() : \$obj->$function(\@alist);";
1906
4
8
                        if(defined($accessor{type})) {
1907
0
0
                                if($accessor{type} eq 'getter') {
1908
0
0
                                        $position_code .= "my \$prev_value = \$obj->{$accessor{property}};";
1909                                } elsif($accessor{type} eq 'getset') {
1910
0
0
                                        $position_code .= 'if(scalar(@alist) == 1) { ';
1911
0
0
                                        $position_code .= "cmp_ok(\$result, 'eq', \$alist[0], 'getset function returns what was put in'); ok(\$obj->$function() eq \$result, 'test getset accessor');";
1912
0
0
                                        $position_code .= '}';
1913                                }
1914
0
0
                                if(($accessor{type} eq 'getset') || ($accessor{type} eq 'getter')) {
1915                                        # Since Perl doesn't support data encapsulation, we can test the getter returns the correct item
1916
0
0
                                        $position_code .= 'if(scalar(@alist) == 1) { ';
1917
0
0
                                        $position_code .= "cmp_ok(\$result, 'eq', \$obj->{$accessor{property}}, 'getset function returns correct item');";
1918
0
0
                                        if($accessor{type} eq 'getter') {
1919
0
0
                                                $position_code .= "if(defined(\$prev_value)) { cmp_ok(\$result, 'eq', \$prev_value, 'getter does not change value'); } ";
1920                                        }
1921
0
0
                                        $position_code .= '}';
1922                                }
1923
0
0
                                if($output{'_returns_self'}) {
1924
0
0
                                        croak("$accessor{type} for $accessor{property} cannot return \$self");
1925                                }
1926                        }
1927                } else {
1928
5
5
                        $call_code = "\$result = \$obj->$function(\$input);";
1929
5
15
                        if($output{'_returns_self'}) {
1930
0
0
                                $call_code .= "ok(defined(\$result)); ok(\$result eq \$obj, '$function returns self')";
1931                        } elsif(defined($accessor{type}) && ($accessor{type} eq 'getset')) {
1932
0
0
                                $call_code .= "ok(\$obj->$function() eq \$result, 'test getset accessor');"
1933                        }
1934
5
9
                        if(scalar(keys %input) == 0) {
1935
2
5
                                if(defined($accessor{type}) && ($accessor{type} eq 'getter')) {
1936
2
3
                                        $call_code .= "cmp_ok(\$result, 'eq', \$obj->{$accessor{property}}, 'getter function returns correct item') if(defined(\$result));";
1937                                }
1938                        }
1939                }
1940        } elsif(defined($module) && length($module)) {
1941
12
17
                if($function eq 'new') {
1942
2
6
                        if($has_positions) {
1943
0
0
                                $position_code = "\$result = (scalar(\@alist) == 1) ? ${module}\->$function(\$alist[0]) : (scalar(\@alist) == 0) ? ${module}\->$function() : ${module}\->$function(\@alist);";
1944                        } else {
1945
2
7
                                $call_code = "\$result = ${module}\->$function(\$input);";
1946                        }
1947                } else {
1948
10
8
                        if($has_positions) {
1949
0
0
                                $position_code = "\$result = (scalar(\@alist) == 1) ? ${module}::$function(\$alist[0]) : (scalar(\@alist) == 0) ? ${module}::$function() : ${module}::$function(\@alist);";
1950                        } else {
1951
10
11
                                $call_code = "\$result = ${module}::$function(\$input);";
1952                        }
1953                }
1954        } else {
1955
50
45
                if($has_positions) {
1956
7
7
                        $position_code = "\$result = $function(\@alist);";
1957                } else {
1958
43
46
                        $call_code = "\$result = $function(\$input);";
1959                }
1960        }
1961
1962        # Build static corpus code
1963
71
78
        my $corpus_code = '';
1964
71
88
        if (%all_cases) {
1965
3
4
                $corpus_code = "\n# --- Static Corpus Tests ---\n" .
1966                        "diag('Running " . scalar(keys %all_cases) . " corpus tests');\n";
1967
1968
3
7
                for my $expected (sort keys %all_cases) {
1969
6
6
                        my $inputs = $all_cases{$expected};
1970
6
7
                        next unless($inputs);
1971
1972
6
5
                        my $expected_str = perl_quote($expected);
1973
6
10
                        my $status = ((ref($inputs) eq 'HASH') && $inputs->{'_STATUS'}) // 'OK';
1974
6
12
                        if($expected_str eq "'_STATUS:DIES'") {
1975
0
0
                                $status = 'DIES';
1976                        } elsif($expected_str eq "'_STATUS:WARNS'") {
1977
0
0
                                $status = 'WARNS';
1978                        }
1979
1980
6
5
                        if(ref($inputs) eq 'HASH') {
1981
0
0
                                $inputs = $inputs->{'input'};
1982                        }
1983
6
6
                        my $input_str;
1984
6
9
                        if(ref($inputs) eq 'ARRAY') {
1985
6
9
6
4
5
5
                                $input_str = join(', ', map { perl_quote($_) } @{$inputs});
1986                        } elsif(ref($inputs) eq 'HASH') {
1987
0
0
                                $input_str = Dumper($inputs);
1988
0
0
                                $input_str =~ s/\$VAR1 =//;
1989
0
0
                                $input_str =~ s/;//;
1990
0
0
                                $input_str =~ s/=> 'undef'/=> undef/gms;
1991                        } else {
1992
0
0
                                $input_str = $inputs;
1993                        }
1994
6
10
                        if(($input_str eq 'undef') && (!$config{'test_undef'})) {
1995
0
0
                                carp('corpus case set to undef, yet test_undef is not set in config');
1996                        }
1997
6
8
                        if($new) {
1998
0
0
                                if($status eq 'DIES') {
1999                                        $corpus_code .= "dies_ok { \$obj->$function($input_str) } " .
2000
0
0
0
0
                                                        "'$function(" . join(', ', map { $_ // '' } @$inputs ) . ") dies';\n";
2001                                } elsif($status eq 'WARNS') {
2002                                        $corpus_code .= "warnings_exist { \$obj->$function($input_str) } qr/./, " .
2003
0
0
0
0
                                                        "'$function(" . join(', ', map { $_ // '' } @$inputs ) . ") warns';\n";
2004                                } else {
2005                                        my $desc = sprintf("$function(%s) returns %s",
2006
0
0
0
0
                                                perl_quote(join(', ', map { $_ // '' } @$inputs )),
2007                                                $expected_str
2008                                        );
2009
0
0
                                        if(($output{'type'} // '') eq 'boolean') {
2010
0
0
                                                if($expected_str eq '1') {
2011
0
0
                                                        $corpus_code .= "ok(\$obj->$function($input_str), " . q_wrap($desc) . ");\n";
2012                                                } elsif($expected_str eq '0') {
2013
0
0
                                                        $corpus_code .= "ok(!\$obj->$function($input_str), " . q_wrap($desc) . ");\n";
2014                                                } else {
2015
0
0
                                                        croak("Boolean is expected to return $expected_str");
2016                                                }
2017                                        } else {
2018
0
0
                                                $corpus_code .= "is(\$obj->$function($input_str), $expected_str, " . q_wrap($desc) . ");\n";
2019                                        }
2020                                }
2021                        } else {
2022
6
7
                                if($status eq 'DIES') {
2023
0
0
                                        if($module) {
2024
0
0
                                                $corpus_code .= "dies_ok { $module\::$function($input_str) } " .
2025                                                        "'Corpus $expected dies';\n";
2026                                        } else {
2027
0
0
                                                $corpus_code .= "dies_ok { $function($input_str) } " .
2028                                                        "'Corpus $expected dies';\n";
2029                                        }
2030                                } elsif($status eq 'WARNS') {
2031
0
0
                                        if($module) {
2032
0
0
                                                $corpus_code .= "warnings_exist { $module\::$function($input_str) } qr/./, " .
2033                                                        "'Corpus $expected warns';\n";
2034                                        } else {
2035
0
0
                                                $corpus_code .= "warnings_exist { $function($input_str) } qr/./, " .
2036                                                        "'Corpus $expected warns';\n";
2037                                        }
2038                                } else {
2039                                        my $desc = sprintf("$function(%s) returns %s",
2040
6
9
6
10
22
5
                                                perl_quote((ref $inputs eq 'ARRAY') ? (join(', ', map { $_ // '' } @{$inputs})) : $inputs),
2041                                                $expected_str
2042                                        );
2043
6
11
                                        if(($output{'type'} // '') eq 'boolean') {
2044
0
0
                                                if($expected_str eq '1') {
2045
0
0
                                                        $corpus_code .= "ok(\$obj->$function($input_str), " . q_wrap($desc) . ");\n";
2046                                                } elsif($expected_str eq '0') {
2047
0
0
                                                        $corpus_code .= "ok(!\$obj->$function($input_str), " . q_wrap($desc) . ");\n";
2048                                                } else {
2049
0
0
                                                        croak("Boolean is expected to return $expected_str");
2050                                                }
2051                                        } else {
2052
6
9
                                                $corpus_code .= "is(\$obj->$function($input_str), $expected_str, " . q_wrap($desc) . ");\n";
2053                                        }
2054                                }
2055                        }
2056                }
2057        }
2058
2059        # Prepare seed/iterations code fragment for the generated test
2060
71
71
        my $seed_code = '';
2061
71
75
        if (defined $seed) {
2062                # ensure integer-ish
2063
7
9
                $seed = int($seed);
2064
7
9
                $seed_code = "srand($seed);\n";
2065        }
2066
2067
71
96
        my $determinism_code = 'my $result2;' .
2068                'eval { $result2 = do { ' . (defined($position_code) ? $position_code : $call_code) . " }; };\n" .
2069                'is_deeply($result2, $result, "deterministic result for same input");' .
2070                "\n";
2071
2072        # Generate the test content
2073
71
389
        my $tt = Template->new({ ENCODING => 'utf8', TRIM => 1 });
2074
2075        # Read template from DATA handle
2076
71
126665
        my $template_package = __PACKAGE__ . '::Template';
2077
71
351
        my $template = $template_package->get_data_section('test.tt');
2078
2079        my $vars = {
2080                setup_code => $setup_code,
2081                edge_cases_code => $edge_cases_code,
2082                edge_case_array_code => $edge_case_array_code,
2083                type_edge_cases_code => $type_edge_cases_code,
2084                config_code => $config_code,
2085                seed_code => $seed_code,
2086                input_code => $input_code,
2087                output_code => $output_code,
2088                transforms_code => $transforms_code,
2089                corpus_code => $corpus_code,
2090                call_code => $call_code,
2091                position_code => $position_code,
2092                determinism_code => $determinism_code,
2093                function => $function,
2094                iterations_code => int($iterations),
2095                use_properties => $use_properties,
2096                transform_properties_code => $transform_properties_code,
2097
71
77772
                property_trials => $config{properties}{trials} // DEFAULT_PROPERTY_TRIALS,
2098                relationships_code => $relationships_code,
2099                module => $module
2100        };
2101
2102
71
64
        my $test;
2103
71
181
        $tt->process($template, $vars, \$test) or croak($tt->error());
2104
2105
71
1398838
        if ($test_file) {
2106
30
100
                open my $fh, '>:encoding(UTF-8)', $test_file or croak "Cannot open $test_file: $!";
2107
30
22532
                print $fh "$test\n";
2108
30
100
                close $fh;
2109
30
4783
                if($module) {
2110
17
282
                        print "Generated $test_file for $module\::$function with fuzzing + corpus support\n";
2111                } else {
2112
13
222
                        print "Generated $test_file for $function with fuzzing + corpus support\n";
2113                }
2114        } else {
2115
41
54564
                print "$test\n";
2116        }
2117}
2118
2119# --- Helpers for rendering data structures into Perl code for the generated test ---
2120
2121# --------------------------------------------------
2122# _is_perl_builtin
2123#
2124# Purpose:    Return true if a string is the name of
2125#             a Perl core builtin function, to prevent
2126#             it being used as a module name in
2127#             use_ok() calls in generated tests.
2128#
2129# Entry:      $name - the string to check.
2130# Exit:       Returns 1 if builtin, 0 otherwise.
2131# Side effects: None.
2132# --------------------------------------------------
2133sub _is_perl_builtin {
2134
23
11903
        my $name = $_[0];
2135
23
27
        return 0 unless defined $name;
2136
2137
22
202
16
163
        state %BUILTINS = map { $_ => 1 } qw(
2138                abs accept alarm atan2 bind binmode bless
2139                caller chdir chmod chomp chop chown chr chroot
2140                close closedir connect cos crypt
2141                dbmclose dbmopen defined delete die do dump
2142                each endgrent endhostent endnetent endprotoent endpwent endservent
2143                eof eval exec exists exit exp
2144                fcntl fileno flock fork format formline
2145                getc getgrent getgrgid getgrnam gethostbyaddr gethostbyname
2146                gethostent getlogin getnetbyaddr getnetbyname getnetent
2147                getpeername getpgrp getppid getpriority getprotobyname
2148                getprotobynumber getprotoent getpwent getpwnam getpwuid
2149                getservbyname getservbyport getservent getsockname getsockopt
2150                glob gmtime goto grep
2151                hex
2152                index int ioctl
2153                join
2154                keys kill
2155                last lc lcfirst length link listen local localtime log lstat
2156                map mkdir msgctl msgget msgrcv msgsnd my
2157                next no
2158                oct open opendir ord our
2159                pack pipe pop pos print printf prototype push
2160                quotemeta
2161                rand read readdir readline readlink readpipe recv redo
2162                ref rename require reset return reverse rewinddir rindex rmdir
2163                say scalar seek seekdir select semctl semget semop send
2164                setgrent sethostent setnetent setpgrp setpriority setprotoent
2165                setpwent setservent setsockopt shift shmctl shmget shmread
2166                shmwrite shutdown sin sleep socket socketpair sort splice split
2167                sprintf sqrt srand stat study sub substr symlink syscall
2168                sysopen sysread sysseek system syswrite
2169                tell telldir tie tied time times truncate
2170                uc ucfirst umask undef unlink unpack unshift untie use
2171                utime values vec wait waitpid wantarray warn write
2172        );
2173
22
50
        return $BUILTINS{lc $name} // 0;
2174}
2175
2176# --------------------------------------------------
2177# _load_schema
2178#
2179# Load and parse a schema file using
2180#     Config::Abstraction, returning the
2181#     schema as a hashref.
2182#
2183# Entry:      $schema_file - path to the schema file.
2184#             Must be defined, non-empty, and readable.
2185#
2186# Exit:       Returns a hashref of the parsed schema
2187#             with a '_source' key added containing
2188#             the originating file path.
2189#             Croaks on any error.
2190#
2191# Side effects: Reads from the filesystem.
2192#
2193# Notes:      Legacy Perl-file configs (containing
2194#             '$module' or 'our $module' keys) are
2195#             rejected with a clear error. Config::
2196#             Abstraction is used rather than require()
2197#             to avoid executing arbitrary code from
2198#             user-supplied config files.
2199# --------------------------------------------------
2200sub _load_schema {
2201
83
14428
        my $schema_file = $_[0];
2202
2203        # Validate the argument before touching the filesystem
2204
83
126
        croak(__PACKAGE__, ': Usage: _load_schema($schema_file)') unless defined $schema_file;
2205
2206
81
116
        croak(__PACKAGE__, ': _load_schema given empty filename') unless length($schema_file);
2207
2208        # Confirm the file exists and is readable before attempting
2209        # to load it — gives a clearer error than Config::Abstraction would
2210
79
535
        croak(__PACKAGE__, ": _load_schema($schema_file): $!") unless -r $schema_file;
2211
2212        # Load configuration via Config::Abstraction which supports
2213        # YAML, JSON, and other formats without executing arbitrary code.
2214        # no_fixate prevents automatic type coercion that could alter values
2215
74
409
        if(my $schema = Config::Abstraction->new(
2216                config_dirs  => ['.', ''],
2217                config_file  => $schema_file,
2218                no_fixate    => 1,
2219        )) {
2220
74
76081
                if($schema = $schema->all()) {
2221                        # Detect legacy Perl config files by the presence of
2222                        # variable declaration keys — these are no longer supported
2223
74
576
                        if(exists($schema->{$LEGACY_PERL_KEY_1}) ||
2224                           exists($schema->{$LEGACY_PERL_KEY_2})) {
2225
1
9
                                croak("$schema_file: Loading perl files as configs is no longer supported");
2226                        }
2227
2228                        # Tag the schema with its source path for error messages
2229
73
587
                        $schema->{$SOURCE_KEY} = $schema_file;
2230
73
466
                        return $schema;
2231                }
2232        }
2233
2234
0
0
        croak "Failed to load schema from $schema_file";
2235}
2236
2237# --------------------------------------------------
2238# _load_schema_section
2239#
2240# Purpose:    Extract a named section from a parsed
2241#             schema hashref, validating that it is
2242#             a hashref if present.
2243#
2244# Entry:      $schema      - the full parsed schema hashref.
2245#             $section     - name of the section to extract
2246#                            (e.g. 'input', 'output').
2247#             $schema_file - path of the schema file,
2248#                            used in error messages only.
2249#
2250# Exit:       Returns the section hashref if present,
2251#             or an empty hashref {} if absent.
2252#             Croaks if the section exists but is not
2253#             a hashref (and not the string 'undef').
2254#
2255# Side effects: None.
2256#
2257# Notes:      The string 'undef' is treated as an
2258#             absent section — callers that set a
2259#             section to 'undef' in YAML get the same
2260#             result as omitting it entirely.
2261# --------------------------------------------------
2262sub _load_schema_section {
2263
314
6379
        my ($schema, $section, $schema_file) = @_;
2264
2265        # Section absent — return empty hash as the safe default
2266
314
395
        return {} unless exists $schema->{$section};
2267
2268        # Section present and is a hashref — return it directly
2269        return $schema->{$section}
2270
158
404
                if ref($schema->{$section}) eq 'HASH';
2271
2272        # Treat the YAML scalar 'undef' as equivalent to absent
2273        return {}
2274                if defined($schema->{$section}) &&
2275
8
31
                   $schema->{$section} eq 'undef';
2276
2277        # Section present but wrong type — croak with a clear message
2278        # showing what type was found so the user can fix their schema
2279        croak(
2280                "$schema_file: $section should be a hash, not ",
2281
5
39
                ref($schema->{$section}) || $schema->{$section}
2282        );
2283}
2284
2285# --------------------------------------------------
2286# _validate_config
2287#
2288# Purpose:    Validate the top-level schema hashref
2289#             loaded from a schema file, checking that
2290#             required fields are present and that all
2291#             input parameters, types, positions, and
2292#             transform properties are well-formed.
2293#
2294# Entry:      $schema - the full parsed schema hashref
2295#             as returned by _load_schema().
2296#
2297# Exit:       Returns nothing on success.
2298#             Croaks on any structural error.
2299#             Carps on non-fatal warnings (unknown
2300#             semantic types, position gaps, missing
2301#             input/output definitions).
2302#
2303# Side effects: May delete $schema->{input} if its
2304#               value is the string 'undef'.
2305#
2306# Notes:      The parameter is named $schema throughout
2307#             to distinguish the top-level schema from
2308#             the nested config sub-hash. _validate_config
2309#             is called before _normalize_config so config
2310#             boolean normalisation has not yet occurred.
2311# --------------------------------------------------
2312sub _validate_config {
2313
87
17296
        my $schema = $_[0];
2314
2315        # At least one of module or function must be present —
2316        # without these we cannot generate any meaningful test
2317
87
149
        if(!defined($schema->{'module'}) && !defined($schema->{'function'})) {
2318
4
19
                croak('At least one of function and module must be defined');
2319        }
2320
2321        # Warn if neither input nor output is defined — a few
2322        # generic tests can still be generated but it is unusual
2323
83
129
        if(!defined($schema->{'input'}) && !defined($schema->{'output'})) {
2324
7
30
                carp('Neither input nor output is defined, only a few tests will be generated');
2325        }
2326
2327        # Normalise input: the string 'undef' means no input defined
2328
83
3528
        if($schema->{'input'} && ref($schema->{input}) ne 'HASH') {
2329
3
4
                if($schema->{'input'} eq 'undef') {
2330
1
1
                        delete $schema->{'input'};
2331                } else {
2332
2
9
                        croak("Invalid input specification: expected hash, got '$schema->{'input'}'");
2333                }
2334        }
2335
2336        # Validate each input parameter if input is defined
2337
81
118
        if($schema->{input}) {
2338
69
148
                _validate_input_params($schema);
2339
68
131
                _validate_input_positions($schema);
2340
68
104
                _validate_input_semantics($schema);
2341        }
2342
2343        # Validate transform property definitions if present
2344
80
162
        if(exists($schema->{transforms}) && ref($schema->{transforms}) eq 'HASH') {
2345
8
47
                _validate_transform_properties($schema);
2346        }
2347
2348        # Validate any nested config sub-hash keys against known types
2349
80
153
        if(ref($schema->{config}) eq 'HASH') {
2350
11
11
14
22
                for my $k (keys %{$schema->{'config'}}) {
2351                        # CONFIG_TYPES is the authoritative list of valid keys
2352                        croak "unknown config setting '$k'"
2353
48
384
59
295
                                unless grep { $_ eq $k } CONFIG_TYPES;
2354                }
2355        }
2356}
2357
2358# --------------------------------------------------
2359# _validate_input_params
2360#
2361# Purpose:    Validate type specifications for each
2362#             named input parameter.
2363#
2364# Entry:      $schema - the full parsed schema hashref.
2365#             $schema->{input} must be a hashref.
2366#
2367# Exit:       Returns nothing. Croaks on invalid type.
2368# Side effects: None.
2369# --------------------------------------------------
2370sub _validate_input_params {
2371
73
7954
        my $schema = $_[0];
2372
2373
73
73
62
130
        for my $param (keys %{$schema->{input}}) {
2374                # Catch empty parameter names — these would produce
2375                # broken Perl variable names in the generated test
2376
76
98
                croak 'Empty input parameter name'
2377                        unless length($param);
2378
2379
75
94
                my $spec = $schema->{input}{$param};
2380
2381                # Validate the type field — required for all parameters
2382
75
92
                if(ref($spec)) {
2383                        croak("Missing type for parameter '$param'")
2384
32
118
                                unless defined $spec->{type};
2385                        croak("Invalid type '$spec->{type}' for parameter '$param'")
2386
31
64
                                unless _valid_type($spec->{type});
2387                } else {
2388
43
68
                        croak("Invalid type '$spec' for parameter '$param'")
2389                                unless _valid_type($spec);
2390                }
2391        }
2392}
2393
2394# --------------------------------------------------
2395# _validate_input_positions
2396#
2397# Purpose:    Validate positional argument declarations
2398#             in the input schema — positions must be
2399#             non-negative integers with no duplicates,
2400#             and either all or no parameters must have
2401#             positions.
2402#
2403# Entry:      $schema - the full parsed schema hashref.
2404#             $schema->{input} must be a hashref.
2405#
2406# Exit:       Returns nothing. Croaks on invalid or
2407#             duplicate positions. Carps on gaps.
2408# Side effects: None.
2409# --------------------------------------------------
2410sub _validate_input_positions {
2411
77
12397
        my $schema = $_[0];
2412
2413
77
78
        my $has_positions = 0;
2414
77
66
        my %positions;
2415
2416
77
77
70
115
        for my $param (keys %{$schema->{input}}) {
2417
86
89
                my $spec = $schema->{input}{$param};
2418
2419                # Only process params that explicitly declare a position
2420
86
204
                next unless ref($spec) eq 'HASH' && defined($spec->{position});
2421
2422
27
17
                $has_positions = 1;
2423
27
32
                my $pos = $spec->{position};
2424
2425                # Position must be a non-negative integer
2426
27
83
                croak "Position for '$param' must be a non-negative integer"
2427                        unless $pos =~ /^\d+$/;
2428
2429                # Duplicate positions would produce ambiguous generated tests
2430                croak "Duplicate position $pos for parameters '$positions{$pos}' and '$param'"
2431
26
38
                        if exists $positions{$pos};
2432
2433
24
38
                $positions{$pos} = $param;
2434        }
2435
2436        # If any param has a position, all params must have one
2437
74
113
        if($has_positions) {
2438
16
16
14
24
                for my $param (keys %{$schema->{input}}) {
2439
24
24
                        my $spec = $schema->{input}{$param};
2440
24
83
                        unless(ref($spec) eq 'HASH' && defined($spec->{position})) {
2441
2
9
                                croak "Parameter '$param' missing position " .
2442                                        '(all params must have positions if any do)';
2443                        }
2444                }
2445
2446                # Check for gaps — positions must be a contiguous sequence
2447                # starting at 0, otherwise the generated test will be wrong
2448
14
6
42
12
                my @sorted = sort { $a <=> $b } keys %positions;
2449
14
63
                for my $i (0 .. $#sorted) {
2450
20
42
                        if($sorted[$i] != $i) {
2451
1
5
                                carp "Position sequence has gaps (positions: @sorted)";
2452
1
171
                                last;
2453                        }
2454                }
2455        }
2456}
2457
2458# --------------------------------------------------
2459# _validate_input_semantics
2460#
2461# Purpose:    Validate semantic type annotations and
2462#             enum/memberof constraints on input params.
2463#
2464# Entry:      $schema - the full parsed schema hashref.
2465#             $schema->{input} must be a hashref.
2466#
2467# Exit:       Returns nothing. Croaks on conflicting
2468#             or malformed enum/memberof. Carps on
2469#             unknown semantic types.
2470# Side effects: None.
2471# --------------------------------------------------
2472sub _validate_input_semantics {
2473
79
16028
        my $schema = $_[0];
2474
2475
79
165
        my $semantic_generators = _get_semantic_generators();
2476
2477
79
79
67
134
        for my $param (keys %{$schema->{input}}) {
2478
81
81
                my $spec = $schema->{input}{$param};
2479
81
258
                next unless ref($spec) eq 'HASH';
2480
2481                # Warn on unknown semantic types rather than croaking —
2482                # new semantic types may be added without updating this list
2483
37
59
                if(defined($spec->{semantic})) {
2484
4
3
                        my $semantic = $spec->{semantic};
2485
4
7
                        unless(exists $semantic_generators->{$semantic}) {
2486                                carp "Unknown semantic type '$semantic' for parameter '$param'. " .
2487                                        'Available types: ' .
2488
2
2
4
20
                                        join(', ', sort keys %{$semantic_generators});
2489                        }
2490                }
2491
2492                # enum and memberof are mutually exclusive representations
2493                # of the same concept — having both is always a schema error
2494
37
278
                if($spec->{'enum'} && $spec->{'memberof'}) {
2495
2
8
                        croak "$param: has both enum and memberof";
2496                }
2497
2498                # Both enum and memberof must be arrayrefs when present
2499
35
37
                for my $type ('enum', 'memberof') {
2500
69
178
                        if(exists $spec->{$type}) {
2501                                croak "$type must be an arrayref"
2502
4
15
                                        unless ref($spec->{$type}) eq 'ARRAY';
2503                        }
2504                }
2505        }
2506}
2507
2508# --------------------------------------------------
2509# _validate_transform_properties
2510#
2511# Purpose:    Validate the properties array in each
2512#             transform definition, checking that each
2513#             property is either a known builtin name
2514#             or a custom hashref with name and code.
2515#
2516# Entry:      $schema - the full parsed schema hashref.
2517#             $schema->{transforms} must be a hashref.
2518#
2519# Exit:       Returns nothing. Croaks on invalid property
2520#             definitions. Carps on unknown builtins.
2521# Side effects: None.
2522# --------------------------------------------------
2523sub _validate_transform_properties {
2524
15
10199
        my $schema = $_[0];
2525
2526
15
33
        my $builtin_props = _get_builtin_properties();
2527
2528
15
15
18
83
        for my $transform_name (keys %{$schema->{transforms}}) {
2529
14
11
                my $transform = $schema->{transforms}{$transform_name};
2530
2531                # properties is optional — skip transforms that don't define it
2532
14
81
                next unless exists $transform->{properties};
2533
2534                croak "Transform '$transform_name': properties must be an array"
2535
6
11
                        unless ref($transform->{properties}) eq 'ARRAY';
2536
2537
5
5
3
4
                for my $prop (@{$transform->{properties}}) {
2538
5
8
                        if(!ref($prop)) {
2539                                # Plain string — must be a known builtin property name
2540
2
15
                                unless(exists $builtin_props->{$prop}) {
2541                                        carp "Transform '$transform_name': unknown built-in property '$prop'. " .
2542                                                'Available: ' .
2543
1
1
2
8
                                                join(', ', sort keys %{$builtin_props});
2544                                }
2545                        } elsif(ref($prop) eq 'HASH') {
2546                                # Custom property — must have both name and code fields
2547
2
15
                                unless($prop->{name} && $prop->{code}) {
2548
1
4
                                        croak "Transform '$transform_name': " .
2549                                                "custom properties must have 'name' and 'code' fields";
2550                                }
2551                        } else {
2552
1
5
                                croak "Transform '$transform_name': invalid property definition";
2553                        }
2554                }
2555        }
2556}
2557
2558# --------------------------------------------------
2559# _normalize_config
2560#
2561# Purpose:    Normalise boolean string values in the
2562#             config sub-hash to Perl integers (1/0),
2563#             and default absent boolean fields to 1
2564#             (enabled). The 'properties' field is a
2565#             hashref not a boolean and is handled
2566#             separately.
2567#
2568# Entry:      $config - the config sub-hash extracted
2569#             from the schema (i.e. $schema->{config}).
2570#             May be empty.
2571#
2572# Exit:       Returns nothing. Modifies $config in place.
2573#
2574# Side effects: Modifies the caller's config hashref.
2575#
2576# Notes:      String-to-boolean conversion is delegated
2577#             to %Readonly::Values::Boolean::booleans
2578#             which handles 'yes'/'no', 'on'/'off',
2579#             'true'/'false' etc. Fields not present in
2580#             the config hash are defaulted to 1 so
2581#             that test generation is maximally thorough
2582#             unless the schema explicitly disables a
2583#             feature.
2584# --------------------------------------------------
2585sub _normalize_config {
2586
81
12477
        my $config = $_[0];
2587
2588
81
132
        for my $field (CONFIG_TYPES) {
2589                # The properties field is a hashref not a boolean —
2590                # it is handled at the end of this function separately
2591
648
780
                next if $field eq $CONFIG_PROPERTIES_KEY;
2592
2593
567
1501
                if(exists($config->{$field}) && defined($config->{$field})) {
2594                        # Convert string boolean representations to integers
2595                        # using the lookup table from Readonly::Values::Boolean
2596
418
678
                        if(defined(my $b = $Readonly::Values::Boolean::booleans{$config->{$field}})) {
2597
418
1392
                                $config->{$field} = $b;
2598                        }
2599                } else {
2600                        # Default absent boolean fields to enabled (1) so that
2601                        # test generation is comprehensive unless explicitly disabled
2602
149
156
                        $config->{$field} = 1;
2603                }
2604        }
2605
2606        # Ensure properties is always a hashref — if absent or set to
2607        # a non-hash value, replace with a disabled default so that
2608        # downstream code can safely dereference it without checking ref()
2609
81
91
        $config->{$CONFIG_PROPERTIES_KEY} = { enable => 0 } unless ref($config->{$CONFIG_PROPERTIES_KEY}) eq 'HASH';
2610}
2611
2612# --------------------------------------------------
2613# _valid_type
2614#
2615# Determine whether a string is a
2616#     recognised schema field type accepted
2617#     by the generator.
2618#
2619# Entry:      $type - the type string to validate.
2620#             May be undef.
2621#
2622# Exit:       Returns 1 if the type is known,
2623#             0 if the type is unknown or undef.
2624#
2625# Side effects: None.
2626#
2627# Notes:      The lookup hash is declared with
2628#             'state' so it is built only once per
2629#             process rather than on every call —
2630#             important since _valid_type is called
2631#             in a loop over all input parameters.
2632#
2633#             'int' and 'bool' are accepted as
2634#             aliases for 'integer' and 'boolean'
2635#             respectively, for compatibility with
2636#             schemas generated by external tools
2637#             that use the shorter forms.
2638# --------------------------------------------------
2639sub _valid_type {
2640
134
19083
        my $type = $_[0];
2641
2642        # Undef is never a valid type
2643
134
197
        return 0 unless defined($type);
2644
2645        # Build the lookup table once and cache it for
2646        # the lifetime of the process via 'state'
2647
131
130
109
164
        state %VALID = map { $_ => 1 } qw(
2648                string boolean integer number float
2649                hashref arrayref object int bool
2650        );
2651
2652
131
340
        return($VALID{$type} // 0);
2653}
2654
2655# --------------------------------------------------
2656# _validate_module
2657#
2658# Purpose:    Check whether the module named in a
2659#             schema can be found in @INC during
2660#             test generation. Optionally also
2661#             attempts to load it if the
2662#             GENERATOR_VALIDATE_LOAD environment
2663#             variable is set.
2664#
2665# Entry:      $module      - the module name to
2666#                            check. If undef or
2667#                            empty, returns 1
2668#                            immediately (builtin
2669#                            functions need no
2670#                            module).
2671#             $schema_file - path to the schema
2672#                            file, used in warning
2673#                            messages only.
2674#
2675# Exit:       Returns 1 if the module was found
2676#             (and loaded, if validation was
2677#             requested).
2678#             Returns 0 if the module was not
2679#             found or failed to load — this is
2680#             non-fatal; generation continues.
2681#             Returns 1 immediately for undef or
2682#             empty $module.
2683#
2684# Side effects: Prints to STDERR when TEST_VERBOSE
2685#               or GENERATOR_VERBOSE is set.
2686#               Carps (non-fatally) when the module
2687#               cannot be found or loaded.
2688#               May attempt to load the module into
2689#               the current process when
2690#               GENERATOR_VALIDATE_LOAD is set —
2691#               this can have side effects depending
2692#               on the module.
2693#
2694# Notes:      Not finding a module during generation
2695#             is intentionally non-fatal — the module
2696#             may be available on the target machine
2697#             even if not on the generation machine.
2698#             Verbose output goes to STDERR via
2699#             print rather than carp since it is
2700#             informational, not a warning.
2701# --------------------------------------------------
2702sub _validate_module {
2703
26
6141
        my ($module, $schema_file) = @_;
2704
2705        # Builtin functions have no module to validate
2706
26
36
        return 1 unless $module;
2707
2708        # Check whether the module is findable in @INC
2709
24
77
        my $mod_info = check_install(module => $module);
2710
2711
24
42947
        if($schema_file && !$mod_info) {
2712                # Non-fatal — emit a single consolidated warning so
2713                # the caller sees one message rather than four
2714
6
168
                carp(
2715                        "Module '$module' not found in \@INC during generation.\n" .
2716                        "  Config file: $schema_file\n" .
2717                        "  This is OK if the module will be available when tests run.\n" .
2718                        '  If unexpected, check your module name and installation.'
2719                );
2720
6
4226
                return 0;
2721        }
2722
2723        # Check once and reuse — avoids evaluating two env vars twice
2724
18
37
        my $verbose = $ENV{$ENV_TEST_VERBOSE} || $ENV{$ENV_GENERATOR_VERBOSE};
2725
2726
18
113
        if($verbose) {
2727                print STDERR "Found module '$module' at: $mod_info->{'file'}\n",
2728
0
0
                        '  Version: ', ($mod_info->{'version'} || 'unknown'), "\n";
2729        }
2730
2731        # Optional load validation — disabled by default because
2732        # loading a module can have side effects (e.g. BEGIN blocks,
2733        # database connections, file I/O) that are undesirable
2734        # during generation
2735
18
37
        if($ENV{$ENV_VALIDATE_LOAD}) {
2736
0
0
                my $loaded = can_load(modules => { $module => undef }, verbose => 0);
2737
2738
0
0
                if(!$loaded) {
2739
0
0
                        my $err = $Module::Load::Conditional::ERROR || 'unknown error';
2740
0
0
                        carp(
2741                                "Module '$module' found but failed to load: $err\n" .
2742                                '  This might indicate a broken installation or missing dependencies.'
2743                        );
2744
0
0
                        return 0;
2745                }
2746
2747
0
0
                if($verbose) {
2748
0
0
                        print STDERR "Successfully loaded module '$module'\n";
2749                }
2750        }
2751
2752
18
64
        return 1;
2753}
2754
2755 - 2801
=head2 render_fallback

Render any Perl value into a compact Perl source-code string using
L<Data::Dumper>. Used as a catch-all when no more specific renderer
applies.

    my $code = render_fallback({ key => 'value' });
    # returns: "{'key' => 'value'}"

=head3 Arguments

=over 4

=item * C<$v>

Any Perl value, including undef, scalars, refs, and blessed objects.

=back

=head3 Returns

A string of Perl source code that reproduces the value when evaluated.
Returns the string C<'undef'> when C<$v> is undef.

=head3 Side effects

Temporarily sets C<$Data::Dumper::Terse> and C<$Data::Dumper::Indent>
to produce compact single-line output. Both are restored on return via
C<local>.

=head3 Notes

The output is always a single line with no trailing newline. Suitable
for embedding in generated test code where readability is secondary to
correctness.

=head3 API specification

=head4 input

    { v => { type => SCALAR|REF, optional => 1 } }

=head4 output

    { type => SCALAR }

=cut
2802
2803sub render_fallback {
2804
30
8639
        my $v = $_[0];
2805
2806        # Handle undef explicitly rather than letting Dumper produce
2807        # 'undef' without the localised settings applied
2808
30
41
        return 'undef' unless defined $v;
2809
2810        # Use Terse+Indent=0 to produce compact single-line output
2811        # suitable for embedding in generated test code
2812
21
28
        local $Data::Dumper::Terse  = 1;
2813
21
77
        local $Data::Dumper::Indent = 0;
2814
2815
21
58
        my $s = Dumper($v);
2816
2817        # Remove trailing newline that Dumper always appends
2818
21
790
        chomp $s;
2819
21
46
        return $s;
2820}
2821
2822 - 2875
=head2 render_hash

Render a two-level hashref (parameter name => spec hashref) into Perl
source code suitable for embedding in a generated test file as the
input specification passed to L<Params::Validate::Strict>.

    my $code = render_hash(\%input);

=head3 Arguments

=over 4

=item * C<$href>

A hashref whose values are themselves hashrefs containing field
specifications. Keys whose values are not hashrefs are skipped with
a warning.

=back

=head3 Returns

A string of comma-separated Perl source-code lines, one per key, of
the form:

    'key' => { subkey => value, ... }

Returns an empty string if C<$href> is undef, empty, or not a hashref.

=head3 Side effects

None. Does not modify C<$href>.

=head3 Notes

The C<matches> and C<nomatch> sub-keys are treated specially — their
values are compiled to C<Regexp> objects via C<eval { qr/.../ }> and
then rendered using C<perl_quote> so they appear as C<qr{...}> in the
generated test. This prevents unmatched bracket characters in the
pattern from causing compilation failures.

Other sub-keys are rendered via C<perl_quote>.

=head3 API specification

=head4 input

    { href => { type => HASHREF, optional => 1 } }

=head4 output

    { type => SCALAR }

=cut
2876
2877sub render_hash {
2878
47
18187
        my $href = $_[0];
2879
2880        # Return empty string for absent or non-hash input — callers
2881        # treat '' as "no input specification" in the generated test
2882
47
136
        return '' unless $href && ref($href) eq 'HASH';
2883
2884
42
36
        my @lines;
2885
2886
42
42
33
74
        for my $k (sort keys %{$href}) {
2887
35
36
                my $def = $href->{$k};
2888
2889                # Handle scalar shorthand — 'arg1: string' is equivalent to
2890                # 'arg1: { type: string }' and is explicitly supported by the
2891                # validation layer in _validate_input_params
2892
35
88
                unless(defined($def) && ref($def) eq 'HASH') {
2893
5
20
                        if(defined($def) && !ref($def) && _valid_type($def)) {
2894                                # Expand scalar type shorthand to a full spec hashref
2895
4
7
                                $def = { type => $def };
2896                        } else {
2897
1
7
                                carp "render_hash: skipping key '$k' — value is not a hashref or recognised type string";
2898
1
96
                                next;
2899                        }
2900                }
2901
2902
34
25
                my @pairs;
2903
2904
34
34
30
59
                for my $subk (sort keys %{$def}) {
2905                        # Skip undef sub-values — they contribute nothing to the spec
2906
70
70
                        next unless defined $def->{$subk};
2907
2908                        # Validate that reference types are ones we can render —
2909                        # nested hashrefs are not yet supported
2910
69
71
                        if(ref($def->{$subk})) {
2911
0
0
                                unless((ref($def->{$subk}) eq 'ARRAY') ||
2912                                       (ref($def->{$subk}) eq 'Regexp')) {
2913                                        croak(
2914                                                __PACKAGE__,
2915                                                ": $subk is a nested element, not yet supported (",
2916
0
0
                                                ref($def->{$subk}), ')'
2917                                        );
2918                                }
2919                        }
2920
2921                        # matches and nomatch values must be Regexp objects in the
2922                        # generated test — compile raw strings safely via eval so
2923                        # patterns containing [ or \ don't cause compile failures
2924
69
94
                        if(($subk eq $KEY_MATCHES) || ($subk eq $KEY_NOMATCH)) {
2925                                my $re = ref($def->{$subk}) eq 'Regexp'
2926                                        ? $def->{$subk}
2927
3
3
15
75
                                        : eval { qr/$def->{$subk}/ };
2928
3
10
                                if($@ || !defined($re)) {
2929
0
0
                                        carp "render_hash: invalid $subk pattern '$def->{$subk}': $@";
2930
0
0
                                        next;
2931                                }
2932
3
6
                                push @pairs, "$subk => " . perl_quote($re);
2933                        } else {
2934                                # All other sub-keys are rendered via perl_quote which
2935                                # handles scalars, arrayrefs, and Regexp objects correctly
2936
66
341
                                push @pairs, "$subk => " . perl_quote($def->{$subk});
2937                        }
2938                }
2939
2940                # Use "\t" rather than a literal tab for clarity and grep-ability
2941
34
42
                push @lines, "\t" . perl_quote($k) . ' => { ' . join(', ', @pairs) . ' }';
2942        }
2943
2944
42
71
        return join(",\n", @lines);
2945}
2946
2947 - 2993
=head2 render_args_hash

Render a flat hashref into a Perl source-code argument list of the
form C<'key' => value, ...>, suitable for embedding in a function call
in a generated test file.

    my $code = render_args_hash({ type => 'string', min => 1 });
    # returns: "'min' => 1, 'type' => 'string'"

=head3 Arguments

=over 4

=item * C<$href>

A flat hashref of key-value pairs. Values may be scalars, arrayrefs,
or Regexp objects — all are handled by C<perl_quote>.

=back

=head3 Returns

A comma-separated string of C<key => value> pairs sorted by key.
Returns an empty string if C<$href> is undef, empty, or not a hashref.

=head3 Side effects

None.

=head3 Notes

Keys and values are both rendered via C<perl_quote>. In particular,
C<Regexp> values are rendered as C<qr{...}> which is correct for
L<Params::Validate::Strict> and L<Return::Set> schema arguments in
the generated test.

=head3 API specification

=head4 input

    { href => { type => HASHREF, optional => 1 } }

=head4 output

    { type => SCALAR }

=cut
2994
2995sub render_args_hash {
2996
101
13415
        my $href = $_[0];
2997
2998        # Return empty string for absent or non-hash input
2999
101
228
        return '' unless $href && ref($href) eq 'HASH';
3000
3001        # Sort keys for deterministic output across runs — important for
3002        # generated test files that are committed to version control
3003        my @pairs = map {
3004
123
142
                perl_quote($_) . ' => ' . perl_quote($href->{$_})
3005
98
98
78
144
        } sort keys %{$href};
3006
3007
98
172
        return join(', ', @pairs);
3008}
3009
3010 - 3056
=head2 render_arrayref_map

Render a hashref whose values are arrayrefs into a Perl source-code
fragment suitable for use as a hash literal in a generated test file.

    my $code = render_arrayref_map({ name => ['', 'a' x 100] });

=head3 Arguments

=over 4

=item * C<$href>

A hashref whose values are arrayrefs. Keys whose values are not
arrayrefs are silently skipped.

=back

=head3 Returns

A comma-separated string of C<'key' => [ val, ... ]> entries, one per
qualifying key, sorted alphabetically. Returns the string C<'()'> if
C<$href> is undef, empty, or not a hashref — this produces an empty
hash assignment in the generated test rather than a syntax error.

=head3 Side effects

None.

=head3 Notes

Array element values are rendered via C<perl_quote> which handles
scalars, arrayrefs, and Regexp objects. Non-arrayref values are
skipped without warning — this is intentional since callers may pass
mixed-value hashes and only want the arrayref entries rendered.

=head3 API specification

=head4 input

    { href => { type => HASHREF, optional => 1 } }

=head4 output

    { type => SCALAR }

=cut
3057
3058sub render_arrayref_map {
3059
155
14290
        my $href = $_[0];
3060
3061        # Return '()' rather than '' so callers get a valid empty hash
3062        # literal rather than a syntax error in the generated test
3063
155
303
        return '()' unless $href && ref($href) eq 'HASH';
3064
3065
152
103
        my @entries;
3066
3067
152
152
128
197
        for my $k (sort keys %{$href}) {
3068
10
10
                my $aref = $href->{$k};
3069
3070                # Skip non-arrayref values — mixed hashes are allowed by callers
3071
10
16
                next unless ref($aref) eq 'ARRAY';
3072
3073                # Render each array element via perl_quote so strings are
3074                # properly quoted and numbers are left unquoted
3075
7
12
7
5
10
8
                my $vals = join(', ', map { perl_quote($_) } @{$aref});
3076
3077                # Use "\t" rather than a literal tab for clarity
3078
7
8
                push @entries, "\t" . perl_quote($k) . " => [ $vals ]";
3079        }
3080
3081
152
228
        return join(",\n", @entries);
3082}
3083
3084# --------------------------------------------------
3085# _has_positions
3086#
3087# Purpose:    Determine whether any field in an input
3088#             spec hashref declares a positional argument
3089#             via the 'position' key.
3090#
3091# Entry:      $input_spec - the input section of a parsed
3092#             schema, expected to be a hashref whose values
3093#             are themselves hashrefs containing field specs.
3094#             May be undef or a non-hash ref.
3095#
3096# Exit:       Returns 1 if any field has a defined
3097#             'position' key, 0 otherwise.
3098#
3099# Side effects: None.
3100#
3101# Notes:      Returns 0 immediately for undef or non-hash
3102#             input rather than throwing — callers use the
3103#             return value as a boolean and do not expect
3104#             exceptions from this function.
3105# --------------------------------------------------
3106sub _has_positions {
3107
98
15463
        my $input_spec = $_[0];
3108
3109        # Guard against undef or non-hash input — keys %$undef would throw
3110
98
203
        return 0 unless defined($input_spec) && ref($input_spec) eq 'HASH';
3111
3112
94
94
76
115
        for my $field (keys %{$input_spec}) {
3113                # Only examine fields whose spec is a hashref — scalar specs
3114                # (e.g. input: { type: string }) cannot have positions
3115
86
133
                next unless ref($input_spec->{$field}) eq 'HASH';
3116
3117                # Return immediately on first match — no need to scan further
3118
40
73
                return 1 if defined $input_spec->{$field}{position};
3119        }
3120
3121        # No positional arguments found in any field
3122
69
76
        return 0;
3123}
3124
3125# --------------------------------------------------
3126# q_wrap
3127#
3128# Purpose:    Wrap a string in the most readable
3129#             q{} form that does not require escaping,
3130#             falling back to single-quoted form with
3131#             escaped apostrophes if no delimiter is
3132#             available.
3133#
3134# Entry:      $s - the string to wrap. May be undef.
3135# Exit:       Returns a Perl source-code fragment that
3136#             evaluates to the original string value,
3137#             or the string 'undef' if $s is undef.
3138#
3139# Side effects: None.
3140#
3141# Notes:      index() returns -1 when not found and
3142#             any value >= 0 when found, including 0
3143#             for a delimiter at the start of the
3144#             string. We compare against $INDEX_NOT_FOUND
3145#             to make this boundary explicit and to
3146#             prevent off-by-one mutation survivors.
3147#             See GitHub issue #1.
3148# --------------------------------------------------
3149sub q_wrap {
3150
103
21474
        my $s = $_[0];
3151
3152        # Return empty string for undef — this function is a low-level
3153        # string quoter only. Callers that need the Perl literal 'undef'
3154        # for undefined values should use perl_quote() instead, which
3155        # handles the undef -> 'undef' semantic conversion correctly.
3156        # Returning '' here preserves the original behaviour and avoids
3157        # injecting the bare word 'undef' into contexts that expect a
3158        # quoted string value.
3159
103
100
        return "''" unless defined $s;
3160
3161        # Try bracket-form q{} delimiters first — most readable
3162
100
162
        for my $p (@Q_BRACKET_PAIRS) {
3163
114
114
271
125
                my ($l, $r) = @{$p};
3164
3165                # Only use this bracket pair if neither bracket
3166                # appears in the string — both must be checked
3167
114
1498
                return "q$l$s$r" unless $s =~ /\Q$l\E|\Q$r\E/;
3168        }
3169
3170        # Try single-character delimiters in preference order
3171
3
10
        for my $d (@Q_SINGLE_DELIMITERS) {
3172                # index() returns $INDEX_NOT_FOUND (-1) when not found.
3173                # Must use != $INDEX_NOT_FOUND rather than > 0 since
3174                # the delimiter may legitimately appear at position 0
3175
14
87
                return "q$d$s$d" if index($s, $d) == $INDEX_NOT_FOUND;
3176        }
3177
3178        # Last resort — single-quoted string with escaped apostrophes
3179
1
6
        (my $esc = $s) =~ s/'/\\'/g;
3180
1
1
        return "'$esc'";
3181}
3182
3183# --------------------------------------------------
3184# perl_sq
3185#
3186# Purpose:    Escape a string for safe inclusion
3187#             inside a single-quoted Perl string
3188#             literal in generated test code.
3189#
3190# Entry:      $s - the string to escape.
3191# Exit:       Returns the escaped string, or an
3192#             empty string if $s is undef.
3193#
3194# Side effects: None.
3195#
3196# Notes:      NUL byte replacement produces the
3197#             two-character sequence \0 which is
3198#             only correct when the result is used
3199#             inside a double-quoted string context
3200#             in the generated test.
3201#
3202#             The \b substitution (backspace) is
3203#             intentionally omitted — in Perl regex
3204#             context \b means word boundary, not
3205#             backspace, so substituting it here
3206#             would corrupt strings containing word
3207#             boundaries.
3208# --------------------------------------------------
3209sub perl_sq {
3210
347
192010
        my $s = $_[0];
3211
3212        # Return empty string for undef — callers that need
3213        # 'undef' literal should use perl_quote instead
3214
347
323
        return '' unless defined $s;
3215
3216        # Escape backslashes first so later substitutions
3217        # don't double-escape already-escaped sequences
3218
345
278
        $s =~ s/\\/\\\\/g;
3219
3220        # Escape apostrophes so they don't terminate the
3221        # surrounding single-quoted string literal
3222
345
251
        $s =~ s/'/\\'/g;
3223
3224        # Escape common control characters to their
3225        # printable two-character escape sequences
3226
345
206
        $s =~ s/\n/\\n/g;
3227
345
240
        $s =~ s/\r/\\r/g;
3228
345
213
        $s =~ s/\t/\\t/g;
3229
345
260
        $s =~ s/\f/\\f/g;
3230
3231        # Replace NUL bytes with \0 — valid only in
3232        # double-quoted string context in generated code
3233
345
199
        $s =~ s/\0/\\0/g;
3234
3235
345
618
        return $s;
3236}
3237
3238# --------------------------------------------------
3239# perl_quote
3240#
3241# Purpose:    Convert a Perl value into a source-code
3242#             fragment that reproduces that value when
3243#             evaluated in a generated test file.
3244#
3245# Entry:      $v - the value to quote. May be undef,
3246#             a scalar, an arrayref, a Regexp, or any
3247#             other reference type.
3248#
3249# Exit:       Returns a string of Perl source code.
3250#             Undef produces the literal 'undef'.
3251#             Numbers are returned unquoted.
3252#             Strings are returned single-quoted via
3253#             perl_sq(). Arrays are recursively quoted.
3254#             Regexps are rendered as qr{...}.
3255#             Other refs fall through to render_fallback.
3256#
3257# Side effects: None.
3258#
3259# Notes:      The boolean string literals 'true' and
3260#             'false' are converted to Perl boolean
3261#             constants !!1 and !!0 respectively so
3262#             that YAML boolean values round-trip
3263#             correctly into generated tests.
3264# --------------------------------------------------
3265sub perl_quote {
3266
459
267474
        my $v = $_[0];
3267
3268        # Undef produces the Perl literal 'undef'
3269
459
418
        return 'undef' unless defined $v;
3270
3271        # Convert YAML boolean string literals to Perl
3272        # boolean constants so they survive round-tripping
3273
454
404
        return '!!1' if $v eq 'true';
3274
451
393
        return '!!0' if $v eq 'false';
3275
3276
448
347
        if(ref($v)) {
3277                # Recursively quote each element of an arrayref
3278
35
57
                if(ref($v) eq 'ARRAY') {
3279
10
24
10
8
26
10
                        my @quoted_v = map { perl_quote($_) } @{$v};
3280
10
26
                        return '[ ' . join(', ', @quoted_v) . ' ]';
3281                }
3282
3283                # Render Regexp objects as qr{} with modifiers
3284
25
32
                if(ref($v) eq 'Regexp') {
3285
12
26
                        my ($pat, $mods) = regexp_pattern($v);
3286
12
15
                        my $re = "qr{$pat}";
3287
3288                        # Append modifiers (e.g. 'i', 'x') if present
3289
12
80
                        $re .= $mods if $mods;
3290
12
37
                        return $re;
3291                }
3292
3293                # Hashrefs and other reference types fall through
3294                # to render_fallback which uses Data::Dumper
3295
13
21
                return render_fallback($v);
3296        }
3297
3298        # Numeric values are emitted unquoted so the generated
3299        # test performs numeric rather than string comparison
3300
413
664
        return looks_like_number($v) ? $v : "'" . perl_sq($v) . "'";
3301}
3302
3303# --------------------------------------------------
3304# _generate_transform_properties
3305#
3306# Convert a hashref of transform
3307#     specifications into an arrayref of
3308#     LectroTest property definition hashrefs,
3309#     one per transform. Each hashref contains
3310#     all the information needed by
3311#     _render_properties to emit a runnable
3312#     Test::LectroTest property block.
3313#
3314# Entry:      $transforms  - hashref of transform name
3315#                            => transform spec, as
3316#                            loaded from the schema.
3317#             $function    - name of the function under
3318#                            test.
3319#             $module      - module name, or undef for
3320#                            builtin functions.
3321#             $input       - the top-level input spec
3322#                            hashref from the schema
3323#                            (used for position sorting).
3324#             $config      - the normalised config
3325#                            hashref, used to read
3326#                            properties.trials.
3327#             $new         - defined if the function is
3328#                            an object method; the value
3329#                            is not used here since
3330#                            property tests always
3331#                            construct a fresh object
3332#                            via new_ok() with no args.
3333#                            Presence vs absence is the
3334#                            only signal used.
3335#
3336# Exit:       Returns an arrayref of property hashrefs.
3337#             Returns an empty arrayref if no transforms
3338#             produce any testable properties.
3339#             Never returns undef.
3340#
3341# Side effects: None. Does not modify any argument.
3342#
3343# Notes:      Transforms whose input is the string
3344#             'undef' or whose input spec is not a
3345#             hashref are silently skipped — they
3346#             represent error-case transforms that have
3347#             no meaningful generator.
3348#
3349#             The 'WARN' vs 'WARNS' distinction in
3350#             _STATUS: the schema convention uses
3351#             'WARNS' throughout. This function checks
3352#             for 'WARNS' to match that convention.
3353# --------------------------------------------------
3354sub _generate_transform_properties {
3355
10
10219
        my ($transforms, $function, $module, $input, $config, $new) = @_;
3356
3357
10
13
        my @properties;
3358
3359
10
10
5
13
        for my $transform_name (sort keys %{$transforms}) {
3360
12
10
                my $transform   = $transforms->{$transform_name};
3361
3362
12
11
                my $input_spec  = $transform->{input};
3363
3364                # Guard: skip transforms with no input or with the
3365                # YAML scalar 'undef' as their input — these have no
3366                # generator and cannot produce meaningful properties
3367
12
30
                if(!defined($input_spec) ||
3368                   (!ref($input_spec) && $input_spec eq 'undef')) {
3369
2
2
                        next;
3370                }
3371
3372                # Guard: skip transforms whose input is not a hashref —
3373                # must come before the helper calls below so we never
3374                # pass a non-hash to _detect_transform_properties or
3375                # _process_custom_properties
3376
10
15
                next unless ref($input_spec) eq 'HASH';
3377
3378                # Default output spec to empty hash so _STATUS lookups
3379                # below are always safe regardless of schema content
3380
9
11
                my $output_spec = $transform->{output} // {};
3381
3382                # Detect automatic properties from the transform spec
3383                # (range constraints, type preservation, definedness)
3384
9
22
                my @detected_props = _detect_transform_properties(
3385                        $transform_name,
3386                        $input_spec,
3387                        $output_spec
3388                );
3389
3390                # Process any custom properties defined in the schema
3391
9
8
                my @custom_props = ();
3392
9
12
                if(exists($transform->{properties}) &&
3393                   ref($transform->{properties}) eq 'ARRAY') {
3394                        @custom_props = _process_custom_properties(
3395                                $transform->{properties},
3396
0
0
                                $function,
3397                                $module,
3398                                $input_spec,
3399                                $output_spec,
3400                                $new
3401                        );
3402                }
3403
3404                # Combine auto-detected and custom properties into one list
3405
9
11
                my @all_props = (@detected_props, @custom_props);
3406
3407                # Skip this transform if no properties were produced —
3408                # nothing useful to render into the generated test
3409
9
9
                next unless @all_props;
3410
3411                # Build the LectroTest generator specification string,
3412                # one entry per input field that has a generator
3413
9
6
                my @generators;
3414                my @var_names;
3415
3416
9
9
11
9
                for my $field (sort keys %{$input_spec}) {
3417
9
8
                        my $spec = $input_spec->{$field};
3418
3419                        # Skip non-hashref field specs — scalar types
3420                        # like 'string' have no generator sub-structure
3421
9
9
                        next unless ref($spec) eq 'HASH';
3422
3423
9
11
                        my $gen = _schema_to_lectrotest_generator($field, $spec);
3424
9
32
                        if(defined($gen) && length($gen)) {
3425
9
6
                                push @generators, $gen;
3426
9
9
                                push @var_names, $field;
3427                        }
3428                }
3429
3430
9
13
                my $gen_spec = join(', ', @generators);
3431
3432                # Build the call expression for the function under test.
3433                # Note: property tests always construct a fresh object
3434                # via new_ok() with no constructor arguments, regardless
3435                # of what $new holds in the caller — the intent here is
3436                # to test the method in isolation, not with specific
3437                # construction state.
3438
9
8
                my $call_code;
3439
9
22
                if($module && defined($new)) {
3440                        # OO mode — construct a fresh object for each trial
3441
1
1
                        $call_code  = "my \$obj = new_ok('$module');";
3442
1
1
                        $call_code .= "\$obj->$function";
3443                } elsif($module && $module ne $MODULE_BUILTIN) {
3444                        # Functional mode with a named module
3445
0
0
                        $call_code = "$module\::$function";
3446                } else {
3447                        # Builtin or unqualified function call
3448
8
7
                        $call_code = $function;
3449                }
3450
3451                # Build the argument list, respecting positional order
3452                # if the input spec declares positions
3453
9
7
                my @args;
3454
9
11
                if(_has_positions($input_spec)) {
3455                        # Sort fields by declared position so the generated
3456                        # call passes arguments in the correct order
3457                        my @sorted = sort {
3458                                $input_spec->{$a}{position} <=>
3459                                $input_spec->{$b}{position}
3460
9
0
9
8
0
12
                        } keys %{$input_spec};
3461
9
9
9
12
                        @args = map { "\$$_" } @sorted;
3462                } else {
3463                        # No positions — use alphabetical order from @var_names
3464
0
0
0
0
                        @args = map { "\$$_" } @var_names;
3465                }
3466
3467
9
12
                my $args_str = join(', ', @args);
3468
3469                # Concatenate all property check expressions with &&
3470                # so the generated property block passes only when
3471                # every check holds
3472
9
29
7
25
                my @checks = map { $_->{code} } @all_props;
3473
9
13
                my $property_checks = join(" &&\n\t", @checks);
3474
3475                # Determine expected behaviour from output _STATUS.
3476                # Note: the schema convention uses 'WARNS' not 'WARN'
3477
9
19
                my $should_die  = ($output_spec->{'_STATUS'} // '') eq 'DIES';
3478
9
13
                my $should_warn = ($output_spec->{'_STATUS'} // '') eq 'WARNS';
3479
3480                push @properties, {
3481                        name             => $transform_name,
3482                        generator_spec   => $gen_spec,
3483                        call_code        => "$call_code($args_str)",
3484                        property_checks  => $property_checks,
3485                        should_die       => $should_die,
3486                        should_warn      => $should_warn,
3487
9
42
                        trials           => $config->{'properties'}{'trials'} // DEFAULT_PROPERTY_TRIALS,
3488                };
3489        }
3490
3491
10
14
        return \@properties;
3492}
3493
3494# --------------------------------------------------
3495# _get_semantic_generators
3496#
3497# Return a hashref of named semantic
3498#     generator definitions for use in
3499#     LectroTest property-based tests.
3500#     Each entry contains a 'code' key
3501#     holding a Gen {} block string and a
3502#     'description' key for documentation
3503#     and validation messages.
3504#
3505# Entry:      None.
3506#
3507# Exit:       Returns a hashref keyed by semantic
3508#             type name. Each value is a hashref
3509#             with 'code' and 'description' keys.
3510#
3511# Side effects: None.
3512#
3513# Notes:      The returned hashref is built fresh
3514#             on every call — callers that need it
3515#             repeatedly should cache the result.
3516#             The 'code' strings are multi-line
3517#             Gen {} blocks; callers are responsible
3518#             for compressing whitespace before
3519#             embedding them in generated test files.
3520# --------------------------------------------------
3521sub _get_semantic_generators {
3522        return {
3523
84
6506
                email => {
3524                        code => q{
3525                                Gen {
3526                                        my $len = 5 + int(rand(10));
3527                                        my @addr;
3528                                        my @tlds = qw(com org net edu gov io co uk de fr);
3529
3530                                        for(my $i = 0; $i < $len; $i++) {
3531                                                push @addr, pack('c', (int(rand 26))+97);
3532                                        }
3533                                        push @addr, '@';
3534                                        $len = 5 + int(rand(10));
3535                                        for(my $i = 0; $i < $len; $i++) {
3536                                                push @addr, pack('c', (int(rand 26))+97);
3537                                        }
3538                                        push @addr, '.';
3539                                        $len = rand($#tlds+1);
3540                                        push @addr, $tlds[$len];
3541                                        return join('', @addr);
3542                                }
3543                        },
3544                        description => 'Valid email addresses',
3545                },
3546
3547                url => {
3548                        code => q{
3549                                Gen {
3550                                        my @schemes = qw(http https);
3551                                        my @tlds = qw(com org net io);
3552                                        my $scheme = $schemes[int(rand(@schemes))];
3553                                        my $domain = join('', map { ('a'..'z')[int(rand(26))] } 1..(5 + int(rand(10))));
3554                                        my $tld = $tlds[int(rand(@tlds))];
3555                                        my $path = join('', map { ('a'..'z', '0'..'9', '-', '_')[int(rand(38))] } 1..int(rand(20)));
3556
3557                                        return "$scheme://$domain.$tld" . ($path ? "/$path" : '');
3558                                }
3559                        },
3560                        description => 'Valid HTTP/HTTPS URLs',
3561                },
3562
3563                uuid => {
3564                        code => q{
3565                                Gen {
3566                                        require UUID::Tiny;
3567                                        UUID::Tiny::create_uuid_as_string(UUID::Tiny::UUID_V4());
3568                                }
3569                        },
3570                        description => 'Valid UUIDv4 identifiers',
3571                },
3572
3573                phone_us => {
3574                        code => q{
3575                                Gen {
3576                                        my $area = 200 + int(rand(800));
3577                                        my $exchange = 200 + int(rand(800));
3578                                        my $subscriber = int(rand(10000));
3579                                        sprintf('%03d-%03d-%04d', $area, $exchange, $subscriber);
3580                                }
3581                        },
3582                        description => 'US phone numbers (XXX-XXX-XXXX format)',
3583                },
3584
3585                phone_e164 => {
3586                        code => q{
3587                                Gen {
3588                                        my $country = 1 + int(rand(999));
3589                                        my $area = 100 + int(rand(900));
3590                                        my $number = int(rand(10000000));
3591                                        sprintf('+%d%03d%07d', $country, $area, $number);
3592                                }
3593                        },
3594                        description => 'E.164 international phone numbers',
3595                },
3596
3597                ipv4 => {
3598                        code => q{
3599                                Gen {
3600                                        join('.', map { int(rand(256)) } 1..4);
3601                                }
3602                        },
3603                        description => 'IPv4 addresses',
3604                },
3605
3606                ipv6 => {
3607                        code => q{
3608                                Gen {
3609                                        join(':', map { sprintf('%04x', int(rand(0x10000))) } 1..8);
3610                                }
3611                        },
3612                        description => 'IPv6 addresses',
3613                },
3614
3615                username => {
3616                        code => q{
3617                                Gen {
3618                                        my $len = 3 + int(rand(13));
3619                                        my @chars = ('a'..'z', '0'..'9', '_', '-');
3620                                        my $first = ('a'..'z')[int(rand(26))];
3621                                        $first . join('', map { $chars[int(rand(@chars))] } 1..($len-1));
3622                                }
3623                        },
3624                        description => 'Valid usernames (alphanumeric with _ and -)',
3625                },
3626
3627                slug => {
3628                        code => q{
3629                                Gen {
3630                                        my @words = qw(quick brown fox jumps over lazy dog hello world test data);
3631                                        my $count = 1 + int(rand(4));
3632                                        join('-', map { $words[int(rand(@words))] } 1..$count);
3633                                }
3634                        },
3635                        description => 'URL slugs (lowercase words separated by hyphens)',
3636                },
3637
3638                hex_color => {
3639                        code => q{
3640                                Gen {
3641                                        sprintf('#%06x', int(rand(0x1000000)));
3642                                }
3643                        },
3644                        description => 'Hex color codes (#RRGGBB)',
3645                },
3646
3647                iso_date => {
3648                        code => q{
3649                                Gen {
3650                                        my $year = 2000 + int(rand(25));
3651                                        my $month = 1 + int(rand(12));
3652                                        my $day = 1 + int(rand(28));
3653                                        sprintf('%04d-%02d-%02d', $year, $month, $day);
3654                                }
3655                        },
3656                        description => 'ISO 8601 date format (YYYY-MM-DD)',
3657                },
3658
3659                iso_datetime => {
3660                        code => q{
3661                                Gen {
3662                                        my $year = 2000 + int(rand(25));
3663                                        my $month = 1 + int(rand(12));
3664                                        my $day = 1 + int(rand(28));
3665                                        my $hour = int(rand(24));
3666                                        my $minute = int(rand(60));
3667                                        my $second = int(rand(60));
3668                                        sprintf('%04d-%02d-%02dT%02d:%02d:%02dZ',
3669                                                $year, $month, $day, $hour, $minute, $second);
3670                                }
3671                        },
3672                        description => 'ISO 8601 datetime format (YYYY-MM-DDTHH:MM:SSZ)',
3673                },
3674
3675                semver => {
3676                        code => q{
3677                                Gen {
3678                                        my $major = int(rand(10));
3679                                        my $minor = int(rand(20));
3680                                        my $patch = int(rand(50));
3681                                        "$major.$minor.$patch";
3682                                }
3683                        },
3684                        description => 'Semantic version strings (major.minor.patch)',
3685                },
3686
3687                jwt => {
3688                        code => q{
3689                                Gen {
3690                                        my @chars = ('A'..'Z', 'a'..'z', '0'..'9', '-', '_');
3691                                        my $header    = join('', map { $chars[int(rand(@chars))] } 1..20);
3692                                        my $payload   = join('', map { $chars[int(rand(@chars))] } 1..40);
3693                                        my $signature = join('', map { $chars[int(rand(@chars))] } 1..30);
3694                                        "$header.$payload.$signature";
3695                                }
3696                        },
3697                        description => 'JWT-like tokens (base64url format)',
3698                },
3699
3700                json => {
3701                        code => q{
3702                                Gen {
3703                                        my @keys = qw(id name value status count);
3704                                        my $key = $keys[int(rand(@keys))];
3705                                        my $value = 1 + int(rand(1000));
3706                                        qq({"$key":$value});
3707                                }
3708                        },
3709                        description => 'Simple JSON objects',
3710                },
3711
3712                base64 => {
3713                        code => q{
3714                                Gen {
3715                                        my @chars = ('A'..'Z', 'a'..'z', '0'..'9', '+', '/');
3716                                        my $len = 12 + int(rand(20));
3717                                        my $str = join('', map { $chars[int(rand(@chars))] } 1..$len);
3718                                        $str .= '=' x (4 - ($len % 4)) if $len % 4;
3719                                        $str;
3720                                }
3721                        },
3722                        description => 'Base64-encoded strings',
3723                },
3724
3725                md5 => {
3726                        code => q{
3727                                Gen {
3728                                        join('', map { sprintf('%x', int(rand(16))) } 1..32);
3729                                }
3730                        },
3731                        description => 'MD5 hashes (32 hex characters)',
3732                },
3733
3734                sha256 => {
3735                        code => q{
3736                                Gen {
3737                                        join('', map { sprintf('%x', int(rand(16))) } 1..64);
3738                                }
3739                        },
3740                        description => 'SHA-256 hashes (64 hex characters)',
3741                },
3742
3743                unix_timestamp => {
3744                        code => q{
3745                                Gen {
3746                                        time;
3747                                }
3748                        },
3749                        description => 'Unix timestamps (seconds since epoch)',
3750                },
3751        };
3752}
3753
3754# --------------------------------------------------
3755# _get_builtin_properties
3756#
3757# Purpose:    Return a hashref of named built-in
3758#             property templates that can be
3759#             referenced by name in a transform's
3760#             'properties' list in the schema.
3761#             Each entry contains a 'description'
3762#             string, a 'code_template' coderef, and
3763#             an 'applicable_to' arrayref.
3764#
3765# Entry:      None.
3766#
3767# Exit:       Returns a hashref keyed by property
3768#             name. Each value is a hashref with
3769#             'description', 'code_template', and
3770#             'applicable_to' keys.
3771#
3772# Side effects: None.
3773#
3774# Notes:      'applicable_to' lists the types for
3775#             which each property is meaningful. It
3776#             is stored for documentation purposes
3777#             and potential future filtering — it is
3778#             not currently enforced by any caller.
3779#
3780#             Each 'code_template' coderef receives
3781#             three arguments: ($function, $call_code,
3782#             $input_vars). Most templates use only
3783#             $call_code; $function and $input_vars
3784#             are provided for templates that need
3785#             them (e.g. idempotent, length_preserved,
3786#             preserves_keys).
3787#
3788#             'monotonic_increasing' has been
3789#             intentionally omitted. A correct
3790#             implementation requires calling the
3791#             function twice with ordered inputs,
3792#             which the current single-call property
3793#             framework does not support. A
3794#             placeholder that unconditionally returns
3795#             true would give false confidence and has
3796#             therefore been removed.
3797# --------------------------------------------------
3798sub _get_builtin_properties {
3799        return {
3800                idempotent => {
3801                        description   => 'Function is idempotent: f(f(x)) == f(x)',
3802                        code_template => sub {
3803
2
13238
                                my ($function, $call_code, $input_vars) = @_;
3804
3805                                # String comparison works for all scalar types in Perl —
3806                                # numeric values stringify consistently for eq
3807
2
4
                                return "do { my \$tmp = $call_code; \$result eq \$tmp }";
3808                        },
3809                        applicable_to => ['all'],
3810                },
3811
3812                non_negative => {
3813                        description   => 'Result is always non-negative',
3814                        code_template => sub {
3815
3
136
                                my ($function, $call_code, $input_vars) = @_;
3816
3
4
                                return '$result >= 0';
3817                        },
3818                        applicable_to => ['number', 'integer', 'float'],
3819                },
3820
3821                positive => {
3822                        description   => 'Result is always positive (> 0)',
3823                        code_template => sub {
3824
2
255
                                my ($function, $call_code, $input_vars) = @_;
3825
2
4
                                return '$result > 0';
3826                        },
3827                        applicable_to => ['number', 'integer', 'float'],
3828                },
3829
3830                non_empty => {
3831                        description   => 'Result is never empty',
3832                        code_template => sub {
3833
2
259
                                my ($function, $call_code, $input_vars) = @_;
3834
2
3
                                return 'length($result) > 0';
3835                        },
3836                        applicable_to => ['string'],
3837                },
3838
3839                length_preserved => {
3840                        description   => 'Output length equals input length',
3841                        code_template => sub {
3842
2
273
                                my ($function, $call_code, $input_vars) = @_;
3843
2
3
                                my $first_var = $input_vars->[0];
3844
2
2
                                return "length(\$result) == length(\$$first_var)";
3845                        },
3846                        applicable_to => ['string'],
3847                },
3848
3849                uppercase => {
3850                        description   => 'Result is all uppercase',
3851                        code_template => sub {
3852
2
286
                                my ($function, $call_code, $input_vars) = @_;
3853
2
4
                                return '$result eq uc($result)';
3854                        },
3855                        applicable_to => ['string'],
3856                },
3857
3858                lowercase => {
3859                        description   => 'Result is all lowercase',
3860                        code_template => sub {
3861
2
260
                                my ($function, $call_code, $input_vars) = @_;
3862
2
2
                                return '$result eq lc($result)';
3863                        },
3864                        applicable_to => ['string'],
3865                },
3866
3867                trimmed => {
3868                        description   => 'Result has no leading or trailing whitespace',
3869                        code_template => sub {
3870
2
278
                                my ($function, $call_code, $input_vars) = @_;
3871
2
2
                                return '$result !~ /^\s/ && $result !~ /\s$/';
3872                        },
3873                        applicable_to => ['string'],
3874                },
3875
3876                sorted_ascending => {
3877                        description   => 'Array is sorted in ascending order',
3878                        code_template => sub {
3879
2
279
                                my ($function, $call_code, $input_vars) = @_;
3880
2
3
                                return 'do { my @arr = @$result; my $sorted = 1; ' .
3881                                        'for my $i (1..$#arr) { $sorted = 0 if $arr[$i] < $arr[$i-1]; } ' .
3882                                        '$sorted }';
3883                        },
3884                        applicable_to => ['arrayref'],
3885                },
3886
3887                sorted_descending => {
3888                        description   => 'Array is sorted in descending order',
3889                        code_template => sub {
3890
2
260
                                my ($function, $call_code, $input_vars) = @_;
3891
2
4
                                return 'do { my @arr = @$result; my $sorted = 1; ' .
3892                                        'for my $i (1..$#arr) { $sorted = 0 if $arr[$i] > $arr[$i-1]; } ' .
3893                                        '$sorted }';
3894                        },
3895                        applicable_to => ['arrayref'],
3896                },
3897
3898                unique_elements => {
3899                        description   => 'Array has no duplicate elements',
3900                        code_template => sub {
3901
2
256
                                my ($function, $call_code, $input_vars) = @_;
3902
2
2
                                return 'do { my @arr = @$result; my %seen; !grep { $seen{$_}++ } @arr }';
3903                        },
3904                        applicable_to => ['arrayref'],
3905                },
3906
3907                preserves_keys => {
3908                        description   => 'Hash has same keys as input',
3909                        code_template => sub {
3910
2
256
                                my ($function, $call_code, $input_vars) = @_;
3911
2
2
                                my $first_var = $input_vars->[0];
3912
2
4
                                return 'do { my @in  = sort keys %{$' . $first_var . '}; ' .
3913                                        'my @out = sort keys %$result; ' .
3914                                        'join(",", @in) eq join(",", @out) }';
3915                        },
3916
26
32459
                        applicable_to => ['hashref'],
3917                },
3918        };
3919}
3920
3921# --------------------------------------------------
3922# _schema_to_lectrotest_generator
3923#
3924# Purpose:    Convert a single schema field spec
3925#             hashref into a LectroTest generator
3926#             declaration string of the form
3927#             '$field <- Generator(...)'.
3928#             Used to build the ##[ ... ]## generator
3929#             block inside a Property definition.
3930#
3931# Entry:      $field_name - the parameter name as it
3932#                           will appear in the
3933#                           generated test code.
3934#             $spec       - hashref containing at
3935#                           minimum a 'type' key.
3936#                           May also contain 'min',
3937#                           'max', 'semantic', and
3938#                           'matches' keys depending
3939#                           on type.
3940#
3941# Exit:       Returns a string of the form
3942#             '$field <- Generator(...)' on success.
3943#             Returns undef if the spec is not a
3944#             hashref or if range constraints are
3945#             invalid (min >= max for numeric types).
3946#             Returns a String generator with a carp
3947#             warning for unknown types.
3948#
3949# Side effects: Carps on unknown semantic types,
3950#               invalid numeric ranges, and unknown
3951#               field types.
3952#
3953# Notes:      Semantic generators are checked first
3954#             for string fields and take precedence
3955#             over the regular string generator.
3956#             The $input_spec parameter in the type-
3957#             detection helpers is reserved for future
3958#             use and is currently unused.
3959# --------------------------------------------------
3960sub _schema_to_lectrotest_generator {
3961
33
16962
        my ($field_name, $spec) = @_;
3962
3963        # Guard: must be a hashref to dereference safely
3964
33
77
        return unless defined($spec) && ref($spec) eq 'HASH';
3965
3966        # Default to string when no type is declared
3967
30
34
        my $type = $spec->{'type'} || $DEFAULT_FIELD_TYPE;
3968
3969        # --------------------------------------------------
3970        # Semantic generators take precedence for string
3971        # fields — they produce realistic domain-specific
3972        # values rather than random character sequences
3973        # --------------------------------------------------
3974
30
45
        if($type eq 'string' && defined($spec->{'semantic'})) {
3975
1
1
                my $semantic_type = $spec->{'semantic'};
3976
1
1
                my $generators    = _get_semantic_generators();
3977
3978
1
2
                if(exists($generators->{$semantic_type})) {
3979
1
1
                        my $gen_code = $generators->{$semantic_type}{'code'};
3980
3981                        # Compress the multi-line generator code into a
3982                        # single line for embedding in the ##[ ]## block
3983
1
2
                        $gen_code =~ s/^\s+//;
3984
1
7
                        $gen_code =~ s/\s+$//;
3985
1
5
                        $gen_code =~ s/\n\s+/ /g;
3986
3987
1
5
                        return "$field_name <- $gen_code";
3988                } else {
3989
0
0
                        carp "Unknown semantic type '$semantic_type', " .
3990                                "falling back to regular string generator";
3991                        # Fall through to regular string generation below
3992                }
3993        }
3994
3995        # --------------------------------------------------
3996        # Integer generator
3997        # --------------------------------------------------
3998
29
30
        if($type eq 'integer') {
3999
6
6
                my $min = $spec->{'min'};
4000
6
4
                my $max = $spec->{'max'};
4001
4002
6
16
                if(!defined($min) && !defined($max)) {
4003                        # Unconstrained — use LectroTest's built-in Int
4004
3
6
                        return "$field_name <- Int";
4005                } elsif(!defined($min)) {
4006                        # Only max defined — generate 0 to max
4007
0
0
                        return "$field_name <- Int(sized => sub { int(rand($max + 1)) })";
4008                } elsif(!defined($max)) {
4009                        # Only min defined — generate min to min + range
4010
0
0
                        return "$field_name <- Int(sized => sub { $min + int(rand($DEFAULT_GENERATOR_RANGE)) })";
4011                } else {
4012                        # Both defined — generate within [min, max]
4013
3
3
                        my $range = $max - $min;
4014
3
10
                        return "$field_name <- Int(sized => sub { $min + int(rand($range + 1)) })";
4015                }
4016        }
4017
4018        # --------------------------------------------------
4019        # Float / number generator
4020        # --------------------------------------------------
4021
23
38
        if($type eq 'number' || $type eq 'float') {
4022
12
11
                my $min = $spec->{'min'};
4023
12
11
                my $max = $spec->{'max'};
4024
4025
12
31
                if(!defined($min) && !defined($max)) {
4026                        # Unconstrained — symmetric range around zero
4027
3
8
                        return "$field_name <- Float(sized => sub { rand($DEFAULT_GENERATOR_RANGE) - $DEFAULT_GENERATOR_RANGE / 2 })";
4028
4029                } elsif(!defined($min)) {
4030                        # Only max defined — choose range based on sign of max
4031
4
7
                        if($max == $ZERO_BOUNDARY) {
4032                                # max=0: negative numbers only
4033
4
13
                                return "$field_name <- Float(sized => sub { -rand($DEFAULT_GENERATOR_RANGE) })";
4034                        } elsif($max > $ZERO_BOUNDARY) {
4035                                # Positive max: generate 0 to max
4036
0
0
                                return "$field_name <- Float(sized => sub { rand($max) })";
4037                        } else {
4038                                # Negative max: generate from (max - range) to max
4039
0
0
                                return "$field_name <- Float(sized => sub { ($max - $DEFAULT_GENERATOR_RANGE) + rand($DEFAULT_GENERATOR_RANGE + $max) })";
4040                        }
4041
4042                } elsif(!defined($max)) {
4043                        # Only min defined — choose range based on sign of min
4044
3
3
                        if($min == $ZERO_BOUNDARY) {
4045                                # min=0: positive numbers only
4046
3
9
                                return "$field_name <- Float(sized => sub { rand($DEFAULT_GENERATOR_RANGE) })";
4047                        } elsif($min > $ZERO_BOUNDARY) {
4048                                # Positive min: generate min to min + range
4049
0
0
                                return "$field_name <- Float(sized => sub { $min + rand($DEFAULT_GENERATOR_RANGE) })";
4050                        } else {
4051                                # Negative min: generate from min to min + range
4052
0
0
                                return "$field_name <- Float(sized => sub { $min + rand(-$min + $DEFAULT_GENERATOR_RANGE) })";
4053                        }
4054
4055                } else {
4056                        # Both min and max defined — validate then generate
4057
2
3
                        my $range = $max - $min;
4058
2
4
                        if($range <= $ZERO_BOUNDARY) {
4059
2
21
                                carp "Invalid range for '$field_name': min=$min, max=$max";
4060                                # Return undef rather than emitting a degenerate
4061                                # generator that would silently produce wrong values
4062
2
215
                                return;
4063                        }
4064
0
0
                        return "$field_name <- Float(sized => sub { $min + rand($range) })";
4065                }
4066        }
4067
4068        # --------------------------------------------------
4069        # String generator
4070        # --------------------------------------------------
4071
11
11
        if($type eq 'string') {
4072
6
13
                my $min_len = $spec->{'min'} // 0;
4073
6
13
                my $max_len = $spec->{'max'} // $DEFAULT_MAX_STRING_LEN;
4074
4075                # If a regex pattern is declared, delegate to
4076                # Data::Random::String::Matches for pattern-aware generation
4077
6
22
                if(defined($spec->{'matches'})) {
4078
2
3
                        my $pattern = $spec->{'matches'};
4079
4080
2
5
                        if(defined($spec->{'max'})) {
4081
0
0
                                return "$field_name <- Gen { Data::Random::String::Matches->create_random_string({ regex => qr/$pattern/, length => $spec->{'max'} }) }";
4082                        } elsif(defined($spec->{'min'})) {
4083
0
0
                                return "$field_name <- Gen { Data::Random::String::Matches->create_random_string({ regex => qr/$pattern/, length => $spec->{'min'} }) }";
4084                        } else {
4085
2
4
                                return "$field_name <- Gen { Data::Random::String::Matches->create_random_string({ regex => qr/$pattern/ }) }";
4086                        }
4087                }
4088
4089
4
10
                return "$field_name <- String(length => [$min_len, $max_len])";
4090        }
4091
4092        # --------------------------------------------------
4093        # Boolean generator
4094        # --------------------------------------------------
4095
5
7
        if($type eq 'boolean') {
4096
2
3
                return "$field_name <- Bool";
4097        }
4098
4099        # --------------------------------------------------
4100        # Arrayref generator
4101        # --------------------------------------------------
4102
3
4
        if($type eq 'arrayref') {
4103
2
5
                my $min_size = $spec->{'min'} // 0;
4104
2
6
                my $max_size = $spec->{'max'} // $DEFAULT_MAX_COLLECTION_SIZE;
4105
2
8
                return "$field_name <- List(Int, length => [$min_size, $max_size])";
4106        }
4107
4108        # --------------------------------------------------
4109        # Hashref generator
4110        # LectroTest has no built-in Hash generator so we
4111        # use Elements over a pre-built list of hashrefs
4112        # --------------------------------------------------
4113
1
1
        if($type eq 'hashref') {
4114
1
1
                my $min_keys = $spec->{'min'} // 0;
4115
1
4
                my $max_keys = $spec->{'max'} // $DEFAULT_MAX_COLLECTION_SIZE;
4116
1
2
                return "$field_name <- Elements(map { my \%h; for (1..\$_) { \$h{'key'.\$_} = \$_ }; \\\%h } $min_keys..$max_keys)";
4117        }
4118
4119        # --------------------------------------------------
4120        # Unknown type — fall back to String with a warning
4121        # --------------------------------------------------
4122
0
0
        carp "Unknown type '$type' for '$field_name' LectroTest generator, using String";
4123
0
0
        return "$field_name <- String";
4124}
4125
4126# --------------------------------------------------
4127# _is_numeric_transform
4128#
4129# Determine whether a transform's output
4130#     spec declares a numeric type, indicating
4131#     that numeric range properties should be
4132#     generated for it.
4133#
4134# Entry:      $input_spec  - the transform's input
4135#                            spec hashref. Currently
4136#                            unused; reserved for
4137#                            future input-type checks.
4138#             $output_spec - the transform's output
4139#                            spec hashref.
4140#
4141# Exit:       Returns 1 if the output type is one of
4142#             'number', 'integer', or 'float'.
4143#             Returns 0 otherwise.
4144#
4145# Side effects: None.
4146# --------------------------------------------------
4147sub _is_numeric_transform {
4148
37
4228
        my ($input_spec, $output_spec) = @_;
4149
4150        # $input_spec is currently unused — reserved for future
4151        # input-side type checking when detecting mixed transforms
4152
37
61
        my $out_type = ($output_spec // {})->{'type'} // '';
4153
4154
37
100
        return($out_type eq 'number' || $out_type eq 'integer' || $out_type eq 'float');
4155}
4156
4157# --------------------------------------------------
4158# _is_string_transform
4159#
4160# Purpose:    Determine whether a transform's output
4161#             spec declares a string type, indicating
4162#             that string length and pattern properties
4163#             should be generated for it.
4164#
4165# Entry:      $input_spec  - the transform's input
4166#                            spec hashref. Currently
4167#                            unused; reserved for
4168#                            future input-type checks.
4169#             $output_spec - the transform's output
4170#                            spec hashref.
4171#
4172# Exit:       Returns 1 if the output type is 'string'.
4173#             Returns 0 otherwise.
4174#
4175# Side effects: None.
4176# --------------------------------------------------
4177sub _is_string_transform {
4178
31
3650
        my ($input_spec, $output_spec) = @_;
4179
4180        # $input_spec is currently unused — reserved for future
4181        # input-side type checking when detecting mixed transforms
4182
31
61
        my $out_type = ($output_spec // {})->{'type'} // '';
4183
4184
31
44
        return($out_type eq 'string');
4185}
4186
4187# --------------------------------------------------
4188# _same_type
4189#
4190# Purpose:    Determine whether the dominant type of
4191#             a transform's input and output specs
4192#             match, indicating that type-preservation
4193#             properties are meaningful.
4194#
4195# Entry:      $input_spec  - the transform's input
4196#                            spec hashref, or a nested
4197#                            multi-field hashref.
4198#             $output_spec - the transform's output
4199#                            spec hashref.
4200#
4201# Exit:       Returns 1 if the dominant input and
4202#             output types are identical strings.
4203#             Returns 0 otherwise.
4204#
4205# Side effects: None.
4206#
4207# Notes:      Uses _get_dominant_type for both sides.
4208#             For multi-field input specs, dominant
4209#             type is the type of the first field
4210#             encountered — this is a simplification.
4211#             TODO: extend to handle mixed-type inputs
4212#             by checking all fields, not just the
4213#             first one found.
4214# --------------------------------------------------
4215sub _same_type {
4216
31
4582
        my ($input_spec, $output_spec) = @_;
4217
4218        # Guard: treat missing specs as untyped — two untyped
4219        # specs both default to $DEFAULT_FIELD_TYPE and would
4220        # compare equal, which is intentionally conservative
4221
31
49
        my $in_type  = _get_dominant_type($input_spec  // {});
4222
31
50
        my $out_type = _get_dominant_type($output_spec // {});
4223
4224
31
50
        return($in_type eq $out_type);
4225}
4226
4227# --------------------------------------------------
4228# _get_dominant_type
4229#
4230# Purpose:    Extract the most representative type
4231#             string from a spec hashref. For flat
4232#             output specs this is simply the 'type'
4233#             key. For multi-field input specs it is
4234#             the type of the first sub-field found
4235#             that declares one.
4236#
4237# Entry:      $spec - a spec hashref. May be a flat
4238#                     output spec ({ type => '...' })
4239#                     or a multi-field input spec
4240#                     ({ field => { type => '...' } }).
4241#                     May be undef or empty.
4242#
4243# Exit:       Returns a type string. Returns
4244#             $DEFAULT_FIELD_TYPE ('string') if no
4245#             type can be determined.
4246#
4247# Side effects: None.
4248# --------------------------------------------------
4249sub _get_dominant_type {
4250
93
6634
        my $spec = $_[0];
4251
4252        # Guard: return default for undef or non-hash input
4253
93
136
        return $DEFAULT_FIELD_TYPE
4254                unless defined($spec) && ref($spec) eq 'HASH';
4255
4256        # Flat spec — type declared directly
4257
91
94
        return $spec->{'type'} if defined($spec->{'type'});
4258
4259        # Multi-field spec — return the type of the first
4260        # sub-field that declares one
4261
36
36
25
33
        for my $field (keys %{$spec}) {
4262
30
37
                next unless ref($spec->{$field}) eq 'HASH';
4263                return $spec->{$field}{'type'}
4264
28
46
                        if defined($spec->{$field}{'type'});
4265        }
4266
4267        # No type found anywhere — return the safe default
4268
8
22
        return $DEFAULT_FIELD_TYPE;
4269}
4270
4271# --------------------------------------------------
4272# _render_properties
4273#
4274# Purpose:    Render an arrayref of property definition
4275#             hashrefs (as produced by
4276#             _generate_transform_properties) into a
4277#             string of Perl source code suitable for
4278#             embedding in a generated test file.
4279#             The output uses Test::LectroTest::Compat
4280#             to run each property as a holds() check.
4281#
4282# Entry:      $properties - arrayref of property
4283#             hashrefs, each containing: name,
4284#             generator_spec, call_code,
4285#             property_checks, should_die,
4286#             should_warn, trials.
4287#             May be undef or an empty arrayref.
4288#
4289# Exit:       Returns a string of Perl source code.
4290#             Returns an empty string if $properties
4291#             is undef, not an arrayref, or empty.
4292#
4293# Side effects: None.
4294#
4295# Notes:      The generated code uses 4-space
4296#             indentation deliberately — this is the
4297#             indentation style of the generated test
4298#             file, not of this module. Tabs are used
4299#             in this module's own source; spaces are
4300#             emitted into generated output for
4301#             readability of the produced test files.
4302# --------------------------------------------------
4303sub _render_properties {
4304
12
9796
        my $properties = $_[0];
4305
4306        # Return empty string for absent or non-array input —
4307        # callers treat '' as no property block to emit
4308
12
55
        return '' unless defined($properties) && ref($properties) eq 'ARRAY';
4309
9
9
6
14
        return '' unless @{$properties};
4310
4311
7
8
        my $code = "use_ok('Test::LectroTest::Compat');\n\n";
4312
4313
7
7
5
7
        for my $prop (@{$properties}) {
4314                # Emit a labelled Property block for each transform property
4315
10
10
                $code .= "# Transform property: $prop->{'name'}\n";
4316
10
11
                $code .= "my \$$prop->{'name'} = Property {\n";
4317
10
9
                $code .= "    ##[ $prop->{'generator_spec'} ]##\n";
4318
10
7
                $code .= "    \n";
4319
10
8
                $code .= "    my \$result = eval { $prop->{'call_code'} };\n";
4320
4321
10
11
                if($prop->{'should_die'}) {
4322                        # For transforms that expect death, pass if the
4323                        # eval caught an exception
4324
2
3
                        $code .= "    my \$died = defined(\$\@) && \$\@;\n";
4325
2
2
                        $code .= "    \$died;\n";
4326                } else {
4327                        # For normal transforms, pass only if no exception
4328                        # was thrown and all property checks hold
4329
8
7
                        $code .= "    my \$error = \$\@;\n";
4330
8
4
                        $code .= "    \n";
4331
8
8
                        $code .= "    !\$error && (\n";
4332
8
12
                        $code .= "        $prop->{'property_checks'}\n";
4333
8
8
                        $code .= "    );\n";
4334                }
4335
4336
10
11
                $code .= "}, name => '$prop->{'name'}', trials => $prop->{'trials'};\n\n";
4337
10
9
                $code .= "holds(\$$prop->{'name'});\n";
4338        }
4339
4340
7
12
        return $code;
4341}
4342
4343# --------------------------------------------------
4344# _detect_transform_properties
4345#
4346# Purpose:    Automatically derive a list of testable
4347#             LectroTest property hashrefs from a
4348#             transform's input and output specs.
4349#             Detects numeric range constraints, exact
4350#             value matches, string length constraints,
4351#             type preservation, and definedness.
4352#
4353# Entry:      $transform_name - string name of the
4354#                               transform, used for
4355#                               heuristic matching
4356#                               (e.g. 'positive').
4357#             $input_spec     - the transform's input
4358#                               hashref, or the string
4359#                               'undef'.
4360#             $output_spec    - the transform's output
4361#                               hashref, or undef if
4362#                               absent.
4363#
4364# Exit:       Returns a list of property hashrefs,
4365#             each containing 'name' and 'code' keys.
4366#             Returns an empty list if no properties
4367#             can be detected or if $input_spec is
4368#             undef or the string 'undef'.
4369#
4370# Side effects: None.
4371#
4372# Notes:      The 'positive' heuristic checks the
4373#             transform name case-insensitively against
4374#             $TRANSFORM_POSITIVE_PATTERN and adds a
4375#             non-negative constraint if matched.
4376#             This is intentionally a rough heuristic
4377#             rather than a precise semantic check.
4378# --------------------------------------------------
4379sub _detect_transform_properties {
4380
28
15070
        my ($transform_name, $input_spec, $output_spec) = @_;
4381
4382
28
21
        my @properties;
4383
4384        # Guard: skip undef input and the YAML scalar 'undef'
4385
28
33
        return @properties unless defined($input_spec);
4386
26
35
        return @properties if(!ref($input_spec) && $input_spec eq 'undef');
4387
4388        # Default output spec to empty hash so all key lookups
4389        # below are safe regardless of what the schema provides
4390
24
45
        $output_spec //= {};
4391
4392        # --------------------------------------------------
4393        # Property 1: Output range constraints (numeric)
4394        # --------------------------------------------------
4395
24
26
        if(_is_numeric_transform($input_spec, $output_spec)) {
4396
15
24
                if(defined($output_spec->{'min'})) {
4397
11
12
                        my $min = $output_spec->{'min'};
4398
11
21
                        push @properties, {
4399                                name => 'min_constraint',
4400                                code => "defined(\$result) && looks_like_number(\$result) && \$result >= $min",
4401                        };
4402                }
4403
4404
15
21
                if(defined($output_spec->{'max'})) {
4405
2
3
                        my $max = $output_spec->{'max'};
4406
2
4
                        push @properties, {
4407                                name => 'max_constraint',
4408                                code => "defined(\$result) && looks_like_number(\$result) && \$result <= $max",
4409                        };
4410                }
4411
4412                # Heuristic: transforms named 'positive' (case-insensitive)
4413                # imply a non-negative result constraint
4414
15
30
                if($transform_name =~ /$TRANSFORM_POSITIVE_PATTERN/i) {
4415
6
29
                        push @properties, {
4416                                name => 'non_negative',
4417                                code => "defined(\$result) && looks_like_number(\$result) && \$result >= 0",
4418                        };
4419                }
4420        }
4421
4422        # --------------------------------------------------
4423        # Property 2: Specific value output
4424        # --------------------------------------------------
4425
24
71
        if(defined($output_spec->{'value'})) {
4426
2
3
                my $expected = $output_spec->{'value'};
4427
4428                # Numeric refs use == for comparison; scalars use eq
4429                # via perl_quote to produce the correct quoted literal
4430
2
6
                push @properties, {
4431                        name => 'exact_value',
4432                        code => ref($expected)
4433                                ? "\$result == $expected"
4434                                : "\$result eq " . perl_quote($expected),
4435                };
4436        }
4437
4438        # --------------------------------------------------
4439        # Property 3: String length constraints
4440        # --------------------------------------------------
4441
24
28
        if(_is_string_transform($input_spec, $output_spec)) {
4442
6
5
                if(defined($output_spec->{'min'})) {
4443
2
4
                        push @properties, {
4444                                name => 'min_length',
4445                                code => "length(\$result) >= $output_spec->{'min'}",
4446                        };
4447                }
4448
4449
6
6
                if(defined($output_spec->{'max'})) {
4450
0
0
                        push @properties, {
4451                                name => 'max_length',
4452                                code => "length(\$result) <= $output_spec->{'max'}",
4453                        };
4454                }
4455
4456
6
9
                if(defined($output_spec->{'matches'})) {
4457
0
0
                        my $pattern = $output_spec->{'matches'};
4458
0
0
                        push @properties, {
4459                                name => 'pattern_match',
4460                                code => "\$result =~ qr/$pattern/",
4461                        };
4462                }
4463        }
4464
4465        # --------------------------------------------------
4466        # Property 4: Type preservation
4467        # --------------------------------------------------
4468
24
28
        if(_same_type($input_spec, $output_spec)) {
4469
22
17
                my $type = _get_dominant_type($output_spec);
4470
4471                # Only emit a numeric_type check for numeric types —
4472                # string and other types have no equivalent simple check
4473
22
40
                if($type eq 'number' || $type eq 'integer' || $type eq 'float') {
4474
15
19
                        push @properties, {
4475                                name => 'numeric_type',
4476                                code => 'looks_like_number($result)',
4477                        };
4478                }
4479        }
4480
4481        # --------------------------------------------------
4482        # Property 5: Definedness
4483        # --------------------------------------------------
4484        # Emit a defined() check for all transforms except those
4485        # whose output type is explicitly 'undef' — those are
4486        # expected to return nothing
4487
24
39
        unless(($output_spec->{'type'} // '') eq 'undef') {
4488
22
23
                push @properties, {
4489                        name => 'defined',
4490                        code => 'defined($result)',
4491                };
4492        }
4493
4494
24
33
        return @properties;
4495}
4496
4497# --------------------------------------------------
4498# _process_custom_properties
4499#
4500# Purpose:    Process the 'properties' array from a
4501#             transform definition, resolving each
4502#             entry to either a named builtin property
4503#             (looked up from _get_builtin_properties)
4504#             or a custom property with inline code.
4505#
4506# Entry:      $properties_spec - arrayref of property
4507#                                definitions from the
4508#                                schema. Each element
4509#                                is either a string
4510#                                (builtin name) or a
4511#                                hashref with 'name'
4512#                                and 'code' fields.
4513#             $function        - name of the function
4514#                                under test.
4515#             $module          - module name, or undef
4516#                                for builtins.
4517#             $input_spec      - the transform's input
4518#                                spec hashref.
4519#             $output_spec     - the transform's output
4520#                                spec hashref.
4521#             $new             - defined if the function
4522#                                is an OO method; value
4523#                                is not used, only
4524#                                presence is checked.
4525#
4526# Exit:       Returns a list of property hashrefs,
4527#             each containing 'name', 'code', and
4528#             'description' keys.
4529#             Invalid or unrecognised entries are
4530#             skipped with a carp warning.
4531#
4532# Side effects: Carps on unrecognised builtin names,
4533#               missing code fields, and invalid
4534#               property definition types.
4535#
4536# Notes:      The sixth argument is $new (the OO
4537#             constructor signal), not the full schema
4538#             hashref. It is used only to determine
4539#             whether to emit OO-style call code for
4540#             builtin property templates.
4541# --------------------------------------------------
4542sub _process_custom_properties {
4543
7
10814
        my ($properties_spec, $function, $module, $input_spec, $output_spec, $new) = @_;
4544
4545
7
6
        my @properties;
4546
7
8
        my $builtin_properties = _get_builtin_properties();
4547
4548
7
7
8
6
        for my $prop_def (@{$properties_spec}) {
4549
6
7
                my $prop_name;
4550                my $prop_code;
4551
6
0
                my $prop_desc;
4552
4553
6
10
                if(!ref($prop_def)) {
4554                        # Plain string — look up as a named builtin property
4555
2
3
                        $prop_name = $prop_def;
4556
4557
2
3
                        unless(exists($builtin_properties->{$prop_name})) {
4558
1
10
                                carp "Unknown built-in property '$prop_name', skipping";
4559
1
117
                                next;
4560                        }
4561
4562
1
1
                        my $builtin = $builtin_properties->{$prop_name};
4563
4564                        # Build the argument list, respecting positional order
4565
1
1
1
2
                        my @var_names = sort keys %{$input_spec};
4566
1
1
                        my @args;
4567
1
1
                        if(_has_positions($input_spec)) {
4568
1
0
2
0
                                my @sorted = sort { $input_spec->{$a}{'position'} <=> $input_spec->{$b}{'position'} } @var_names;
4569
1
1
2
2
                                @args = map { "\$$_" } @sorted;
4570                        } else {
4571
0
0
0
0
                                @args = map { "\$$_" } @var_names;
4572                        }
4573
4574                        # Build the call expression for the builtin template.
4575                        # $new here is the raw OO signal from the caller —
4576                        # defined means OO mode, undef means functional
4577
1
2
                        my $call_code;
4578
1
5
                        if($module && defined($new)) {
4579                                # OO mode — fresh object per trial
4580
0
0
                                $call_code  = "my \$obj = new_ok('$module');";
4581
0
0
                                $call_code .= "\$obj->$function";
4582                        } elsif($module && $module ne $MODULE_BUILTIN) {
4583                                # Functional mode with a named module
4584
0
0
                                $call_code = "$module\::$function";
4585                        } else {
4586                                # Builtin or unqualified function call
4587
1
1
                                $call_code = $function;
4588                        }
4589
1
2
                        $call_code .= '(' . join(', ', @args) . ')';
4590
4591                        # Instantiate the builtin's code template with the
4592                        # call expression and input variable list
4593
1
2
                        $prop_code = $builtin->{'code_template'}->($function, $call_code, \@var_names);
4594
1
1
                        $prop_desc = $builtin->{'description'};
4595
4596                } elsif(ref($prop_def) eq 'HASH') {
4597                        # Hashref — custom property with inline Perl code
4598
3
5
                        $prop_name = $prop_def->{'name'} || 'custom_property';
4599
3
3
                        $prop_code = $prop_def->{'code'};
4600
3
6
                        $prop_desc = $prop_def->{'description'} || "Custom property: $prop_name";
4601
4602
3
4
                        unless($prop_code) {
4603
1
5
                                carp "Custom property '$prop_name' missing 'code' field, skipping";
4604
1
85
                                next;
4605                        }
4606
4607                        # Sanity-check: code must contain at least a variable
4608                        # reference or a word character to be meaningful
4609
2
4
                        unless($prop_code =~ /\$/ || $prop_code =~ /\w+/) {
4610
0
0
                                carp "Custom property '$prop_name' code looks invalid: $prop_code";
4611
0
0
                                next;
4612                        }
4613
4614                } else {
4615                        # Neither string nor hashref — unrecognised definition type
4616
1
2
                        carp 'Invalid property definition: ', render_fallback($prop_def);
4617
1
104
                        next;
4618                }
4619
4620
3
7
                push @properties, {
4621                        name        => $prop_name,
4622                        code        => $prop_code,
4623                        description => $prop_desc,
4624                };
4625        }
4626
4627
7
74
        return @properties;
4628}
4629
4630 - 4707
=head1 NOTES

C<seed> and C<iterations> really should be within C<config>.

=head1 SEE ALSO

=over 4

=item * L<Test Coverage Report|https://nigelhorne.github.io/App-Test-Generator/coverage/>

=item * L<App::Test::Generator::Template> - Template of the file of tests created by C<App::Test::Generator>

=item * L<App::Test::Generator::SchemaExtractor> - Create schemas from Perl programs

=item * L<Params::Validate::Strict>: Schema Definition

=item * L<Params::Get>: Input validation

=item * L<Return::Set>: Output validation

=item * L<Test::LectroTest>

=item * L<Test::Most>

=item * L<YAML::XS>

=back

=head1 AUTHOR

Nigel Horne, C<< <njh at nigelhorne.com> >>

Portions of this module's initial design and documentation were created with the
assistance of AI.

=head1 SUPPORT

This module is provided as-is without any warranty.

You can find documentation for this module with the perldoc command.

    perldoc App::Test::Generator

You can also look for information at:

=over 4

=item * MetaCPAN

L<https://metacpan.org/release/App-Test-Generator>

=item * GitHub

L<https://github.com/nigelhorne/App-Test-Generator>

=item * CPANTS

L<http://cpants.cpanauthors.org/dist/App-Test-Generator>

=item * CPAN Testers' Matrix

L<http://matrix.cpantesters.org/?dist=App-Test-Generator>

=item * CPAN Testers Dependencies

L<http://deps.cpantesters.org/?module=App::Test::Generator>

=back

=head1 LICENCE AND COPYRIGHT

Copyright 2025-2026 Nigel Horne.

Usage is subject to the terms of GPL2.
If you use it,
please let me know.

=cut
4708
47091;