File Coverage

File:blib/lib/App/Test/Generator.pm
Coverage:78.1%

linestmtbrancondsubtimecode
1package App::Test::Generator;
2
3# TODO: Test validator from Params::Validate::Strict 0.16
4# TODO: $seed should be passed to Data::Random::String::Matches
5# TODO: positional args - when config_undef is set, see what happens when not all args are given
6
7
20
20
1129412
37
use 5.036;
8
9
20
20
20
32
17
163
use strict;
10
20
20
20
32
12
362
use warnings;
11
20
20
20
2363
81874
40
use autodie qw(:all);
12
13
20
20
20
118226
2099
44
use utf8;
14binmode STDOUT, ':utf8';
15binmode STDERR, ':utf8';
16
17
20
20
20
3669
9873
532
use open qw(:std :encoding(UTF-8));
18
19
20
20
20
113802
24
331
use App::Test::Generator::Template;
20
20
20
20
42
17
459
use Carp qw(carp croak);
21
20
20
20
4173
503210
293
use Config::Abstraction 0.36;
22
20
20
20
2190
25932
458
use Data::Dumper;
23
20
20
20
39
16
274
use Data::Section::Simple;
24
20
20
20
63
16
437
use File::Basename qw(basename);
25
20
20
20
33
13
185
use File::Spec;
26
20
20
20
3959
168683
555
use Module::Load::Conditional qw(check_install can_load);
27
20
20
20
46
12
300
use Params::Get;
28
20
20
20
30
141
232
use Params::Validate::Strict 0.30;
29
20
20
20
29
11
274
use Readonly;
30
20
20
20
28
13
816
use Readonly::Values::Boolean;
31
20
20
20
40
14
335
use Scalar::Util qw(looks_like_number);
32
20
20
20
34
21
1178
use re 'regexp_pattern';
33
20
20
20
4116
146651
332
use Template;
34
20
20
20
2742
19547
518
use YAML::XS qw(LoadFile);
35
36
20
20
20
43
14
536
use Exporter 'import';
37
38our @EXPORT_OK = qw(generate);
39
40our $VERSION = '0.41';
41
42use constant {
43
20
662
        DEFAULT_ITERATIONS => 30,
44        DEFAULT_PROPERTY_TRIALS => 1000
45
20
20
66
21
};
46
47
20
20
20
57
15
43608
use constant CONFIG_TYPES => ('test_nuls', 'test_undef', 'test_empty', 'test_non_ascii', 'dedup', 'properties', 'close_stdin', 'test_security', 'timeout');
48
49# --------------------------------------------------
50# Delimiter pairs tried in order when wrapping a
51# string with q{} — bracket forms are preferred as
52# they are most readable in generated test code
53# --------------------------------------------------
54Readonly my @Q_BRACKET_PAIRS => (
55        ['{', '}'],
56        ['(', ')'],
57        ['[', ']'],
58        ['<', '>'],
59);
60
61# --------------------------------------------------
62# Single-character delimiters tried when no bracket
63# pair is usable — each is tried in order and the
64# first one not present in the string is used.
65# The # character is last since it starts comments
66# in many contexts and is least readable
67# --------------------------------------------------
68Readonly my @Q_SINGLE_DELIMITERS => (
69        '~', '!', '%', '^', '=', '+', ':', ',', ';', '|', '/', '#'
70);
71
72# --------------------------------------------------
73# Sentinel returned by index() when the search
74# string is not found — used to make the >= 0
75# boundary check self-documenting and to prevent
76# NumericBoundary mutants from surviving
77# --------------------------------------------------
78Readonly my $INDEX_NOT_FOUND => -1;
79
80# --------------------------------------------------
81# Readonly constants for schema validation
82# --------------------------------------------------
83Readonly my $CONFIG_PROPERTIES_KEY => 'properties';
84Readonly my $LEGACY_PERL_KEY_1     => '$module';
85Readonly my $LEGACY_PERL_KEY_2     => 'our $module';
86Readonly my $SOURCE_KEY            => '_source';
87
88# --------------------------------------------------
89# Readonly constants for render_hash key detection
90# --------------------------------------------------
91Readonly my $KEY_MATCHES => 'matches';
92Readonly my $KEY_NOMATCH => 'nomatch';
93
94# --------------------------------------------------
95# Reserved module name indicating a Perl builtin
96# function rather than a CPAN or user module
97# --------------------------------------------------
98Readonly my $MODULE_BUILTIN => 'builtin';
99
100# --------------------------------------------------
101# Regex pattern matched against transform names to
102# detect the positive/non-negative idempotence
103# heuristic in _detect_transform_properties
104# --------------------------------------------------
105Readonly my $TRANSFORM_POSITIVE_PATTERN => 'positive';
106
107# --------------------------------------------------
108# Default type assumed for schema fields that declare
109# no explicit type — used in generator selection and
110# dominant-type detection
111# --------------------------------------------------
112Readonly my $DEFAULT_FIELD_TYPE => 'string';
113
114# --------------------------------------------------
115# Default range used by the LectroTest float/integer
116# generators when no min or max constraint is given.
117# Chosen to provide a useful spread without producing
118# values so large they overflow downstream arithmetic.
119# --------------------------------------------------
120Readonly my $DEFAULT_GENERATOR_RANGE => 1000;
121
122# --------------------------------------------------
123# Default upper bound on the number of elements in
124# generated arrayrefs and hashrefs when no max is
125# declared in the schema.
126# --------------------------------------------------
127Readonly my $DEFAULT_MAX_COLLECTION_SIZE => 10;
128
129# --------------------------------------------------
130# Default upper bound on generated string length
131# when no max is declared in the schema.
132# --------------------------------------------------
133Readonly my $DEFAULT_MAX_STRING_LEN => 100;
134
135# --------------------------------------------------
136# Sentinel for the zero boundary used in float
137# generator selection — comparing min/max against
138# this constant makes the boundary intent explicit
139# and prevents NumericBoundary mutants from surviving.
140# --------------------------------------------------
141Readonly my $ZERO_BOUNDARY => 0;
142
143# --------------------------------------------------
144# Environment variable names used to control verbose
145# output and optional load validation in
146# _validate_module. Centralised here so they are
147# easy to find and consistent across the codebase.
148# --------------------------------------------------
149Readonly my $ENV_TEST_VERBOSE       => 'TEST_VERBOSE';
150Readonly my $ENV_GENERATOR_VERBOSE  => 'GENERATOR_VERBOSE';
151Readonly my $ENV_VALIDATE_LOAD      => 'GENERATOR_VALIDATE_LOAD';
152
153 - 1533
=head1 NAME

App::Test::Generator - Fuzz Testing, Mutation Testing, LCSAJ Metrics and Test Dashboard for Perl modules

=head1 VERSION

Version 0.41

=head1 SYNOPSIS

C<App::Test::Generator> is a suite to help the testing of CPAN modules.
It consists of 4 modules:

=over 4

=item * Fuzz Tester

=item * Mutation Testing

=item * LCSAJ Metrics

=item * Test Dashboard

=back

From the command line:

  # Takes the formal definition of a routine, creates tests against that routine, and runs the test
  fuzz-harness-generator -r t/conf/add.yml

  # Attempt to create a formal definition from a routine package, then run tests against that formal definition
  # This is the holy grail of automatic test generation, just by looking at the source code
  extract-schemas lib/App/Test/Generator/Sample/Module.pm && fuzz-harness-generator -r schemas/greet.yml

From Perl:

  use App::Test::Generator qw(generate);

  # Generate to STDOUT
  App::Test::Generator->generate("t/conf/add.yml");

  # Generate directly to a file
  App::Test::Generator->generate('t/conf/add.yml', 't/add_fuzz.t');

  # Holy grail mode - read a Perl file, generate tests, and run them
  # This is a long way away yet, but see t/schema_input.t for a proof of concept
  my $extractor = App::Test::Generator::SchemaExtractor->new(
    input_file => 'Foo.pm',
    output_dir => $dir
  );
  my $schemas = $extractor->extract_all();
  foreach my $schema(keys %{$schemas}) {
    my $tempfile = '/var/tmp/foo.t';    # Use File::Temp in real life
    App::Test::Generator->generate(
      schema => $schemas->{$schema},
      output_file => $tempfile,
    );
    system("$^X -I$dir $tempfile");
    unlink $tempfile;
  }

=head1 OVERVIEW

This module takes a formal input/output specification for a routine or
method and automatically generates test cases. In effect, it allows you
to easily add comprehensive black-box tests in addition to the more
common white-box tests that are typically written for CPAN modules and other
subroutines.

The generated tests combine:

=over 4

=item * Random fuzzing based on input types

=item * Deterministic edge cases for min/max constraints

=item * Static corpus tests defined in Perl or YAML

=back

This approach strengthens your test suite by probing both expected and
unexpected inputs, helping you to catch boundary errors, invalid data
handling, and regressions without manually writing every case.

=head1 DESCRIPTION

This module implements the logic behind L<fuzz-harness-generator>.
It parses configuration files (fuzz and/or corpus YAML), and
produces a ready-to-run F<.t> test script to run through C<prove>.

It reads configuration files in any format,
and optional YAML corpus files.
All of the examples in this documentation are in C<YAML> format,
other formats may not work as they aren't so heavily tested.
It then generates a L<Test::Most>-based fuzzing harness combining:

=over 4

=item * Randomized fuzzing of inputs (with edge cases)

=item * Optional static corpus tests from Perl C<%cases> or YAML file (C<yaml_cases> key)

=item * Functional or OO mode (via C<$new>)

=item * Reproducible runs via C<$seed> and configurable iterations via C<$iterations>

=back

=head1 MUTATION-GUIDED TEST GENERATION

C<App::Test::Generator> includes a pipeline that automatically closes the
feedback loop between mutation testing, schema extraction, and fuzz
testing. The goal is that surviving mutants drive the creation of new
tests that kill them on the next run, without manual intervention.

=head2 The Pipeline

    mutation survivor
        |
        v
    SchemaExtractor extracts the schema for the enclosing sub
        |
        v
    Schema augmented with boundary values from the mutant
        |
        v
    Augmented schema written to t/conf/
        |
        v
    t/fuzz.t picks up the new schema and runs fuzz tests
        |
        v
    Mutation killed on next run

=head2 How to Use It

The pipeline is driven by three flags passed to
C<bin/test-generator-index>, which is invoked automatically by
C<bin/generate-test-dashboard> on each CI push.

=head3 Step 1: Generate TODO stubs for all survivors

    bin/test-generator-index --generate_mutant_tests=t

Produces C<t/mutant_YYYYMMDD_HHMMSS.t> containing:

=over 4

=item * TODO stubs for HIGH and MEDIUM difficulty survivors, with
boundary value suggestions, environment variable hints, and the
enclosing subroutine name for navigation context.

=item * Comment-only hints for LOW difficulty survivors.

=back

Multiple mutations on the same source line are deduplicated into one
stub. One good test kills all variants on that line.

=head3 Step 2: Generate runnable schemas for NUM_BOUNDARY survivors

    bin/test-generator-index \
        --generate_mutant_tests=t \
        --generate_test=mutant

For each NUM_BOUNDARY survivor, calls
L<App::Test::Generator::SchemaExtractor> to extract the schema for
the enclosing subroutine. If the confidence level is sufficient, the
schema is augmented with the boundary value from the mutant (plus one
value either side) and written to C<t/conf/> as a runnable YAML file.
L<t/fuzz.t> picks it up automatically on the next test run.

Falls back to a TODO stub if:

=over 4

=item * SchemaExtractor cannot parse the file

=item * The enclosing sub cannot be determined

=item * The extracted schema confidence is C<very_low> or C<none>

=back

=head3 Step 3: Augment existing schemas with survivor boundary values

    bin/test-generator-index \
        --generate_mutant_tests=t \
        --generate_test=mutant \
        --generate_fuzz

Scans C<t/conf/> for existing YAML schema files (hand-written or
previously generated) and writes augmented copies with boundary values
from surviving NUM_BOUNDARY mutants merged in. The original schema is
never modified. Augmented copies are written as
C<t/conf/mutant_fuzz_YYYYMMDD_HHMMSS_FUNCTION.yml> and picked up
automatically by C<t/fuzz.t>.

Schemas whose filename already starts with C<mutant_fuzz_> are skipped
to prevent cascading augmentation. Schemas with no matching survivors
are skipped, with a note if C<--verbose> is active.

=head3 Putting It All Together

The recommended invocation in C<bin/generate-test-dashboard>
Step 7 runs all three stages together:

    bin/test-generator-index \
        --generate_mutant_tests=t \
        --generate_test=mutant \
        --generate_fuzz

The GitHub Actions workflow in C<.github/workflows/dashboard.yml>
then commits any new C<t/mutant_*.t> and C<t/conf/mutant_*.yml> files
to the repository so they accumulate over time as the test suite
improves.

=head2 Confidence Levels

L<App::Test::Generator::SchemaExtractor> assigns a confidence level
to each extracted schema:

=over 4

=item * C<high> / C<medium> / C<low> - Schema is used for test generation

=item * C<very_low> / C<none> - Falls back to TODO stub

=back

Confidence is based on how much type and constraint information could
be inferred from the source code and its POD documentation. Methods
with explicit parameter validation (L<Params::Validate::Strict>,
L<Params::Get>) or comprehensive POD will produce higher-confidence
schemas.

=head2 Files Produced

=over 4

=item * C<t/mutant_YYYYMMDD_HHMMSS.t>

TODO stub file for all survivors. Committed to the repository by the
GitHub Actions workflow.

=item * C<t/conf/mutant_MODNAME_FUNCTION_YYYYMMDD_HHMMSS.yml>

Runnable YAML schema for a NUM_BOUNDARY survivor where SchemaExtractor
confidence was sufficient. Picked up by C<t/fuzz.t>.

=item * C<t/conf/mutant_fuzz_YYYYMMDD_HHMMSS_FUNCTION.yml>

Augmented copy of an existing schema with survivor boundary values
merged in. Picked up by C<t/fuzz.t>.

=back

=head2 See Also

=over 4

=item * L<App::Test::Generator::SchemaExtractor> - Schema extraction
from Perl source code

=item * L<bin/test-generator-index> - Dashboard generator and
pipeline driver

=item * L<bin/generate-test-dashboard> - Full pipeline runner

=back

=encoding utf8

=head1 CONFIGURATION

The configuration file,
for each set of tests to be produced,
is a file containing a schema that can be read by L<Config::Abstraction>.

=head2 SCHEMA

The schema is split into several sections.

=head3 C<%input> - input params with keys => type/optional specs

When using named parameters

  input:
    name:
      type: string
      optional: false
    age:
      type: integer
      optional: true

Supported basic types used by the fuzzer: C<string>, C<integer>, C<float>, C<number>, C<boolean>, C<arrayref>, C<hashref>.
See also L<Params::Validate::Strict>.
You can add more custom types using properties.

For routines with one unnamed parameter

  input:
    type: string

For routines with more than one named parameter, use the C<position> keyword.

  module: Math::Simple::MinMax
  fuction: max

  input:
    left:
      type: number
      position: 0
    right:
      type: number
      position: 1

  output:
    type: number

The keyword C<undef> is used to indicate that the C<function> takes no arguments.

=head3 C<%output> - output param types for L<Return::Set> checking

  output:
    type: string

If the output hash contains the key _STATUS, and if that key is set to DIES,
the routine should die with the given arguments; otherwise, it should live.
If it's set to WARNS,
the routine should warn with the given arguments.
The output can be set to the string 'undef' if the routine should return the undefined value:

  ---
  module: Scalar::Util
  function: blessed

  input:
    type: string

  output: undef

The keyword C<undef> is used to indicate that the C<function> returns nothing.

For methods that return a list (rather than a reference), use C<type: array>.
The generated test captures the result in list context and validates it as an
arrayref, which requires L<Test::Returns> 0.03 or later:

  output:
    type: array

=head3 C<%config> - optional hash of configuration.

The current supported variables are

=over 4

=item * C<close_stdin>

Tests should not attempt to read from STDIN (default: 1).
This is ignored on Windows, when never closes STDIN.

=item * C<test_nuls>, inject NUL bytes into strings (default: 1)

With this test enabled, the function is expected to die when a NUL byte is passed in.

=item * C<test_undef>, test with undefined value (default: 1)

=item * C<test_empty>, test with empty strings (default: 1)

=item * C<test_non_ascii>, test with strings that contain non ascii characters (default: 1)

=item * C<timeout>, ensure tests don't hang (default: 10)

Setting this to 0 disables timeout testing.

=item * C<dedup>, fuzzing can create duplicate tests, go some way to remove duplicates (default: 1)

=item * C<properties>, enable L<Test::LectroTest> Property tests (default: 0)

*item * C<test_security>, send some security string based tests (default: 0)

=back

All values default to C<true>.

=head3 C<%accessor> - this is an accessor routine

  accessor:
    property: ua
    type: getset

Has two mandatory elements:

=over 4

=item * C<property>

The name of the property in the object that the routine controls.

=item * C<type>

One of C<getter>, C<setter>, C<getset>.

=back

=head3 C<%transforms> - list of transformations from input sets to output sets

Transforms allow you to define how input data should be transformed into output data.
This is useful for testing functions that convert between formats, normalize data,
or apply business logic transformations on a set of data to different set of data.
It takes a list of subsets of the input and output definitions,
and verifies that data from each input subset is correctly transformed into data from the matching output subset.

=head4 Transform Validation Rules

For each transform:

=over 4

=item 1. Generate test cases using the transform's input schema

=item 2. Call the function with those inputs

=item 3. Validate the output matches the transform's output schema

=item 4. If output has a specific 'value', check exact match

=item 5. If output has constraints (min/max), validate within bounds

=back

=head4 Example 1

  ---
  module: builtin
  function: abs

  config:
    test_undef: no
    test_empty: no
    test_nuls: no
    test_non_ascii: no

  input:
    number:
      type: number
      position: 0

  output:
    type: number
    min: 0

  transforms:
    positive:
      input:
        number:
          type: number
          position: 0
          min: 0
      output:
        type: number
        min: 0
    negative:
      input:
        number:
          type: number
          position: 0
          max: 0
      output:
        type: number
        min: 0
    error:
      input:
        undef
      output:
        _STATUS: DIES

If the output hash contains the key _STATUS, and if that key is set to DIES,
the routine should die with the given arguments; otherwise, it should live.
If it's set to WARNS, the routine should warn with the given arguments.

The keyword C<undef> is used to indicate that the C<function> returns nothing.

=head4 Example 2

  ---
  module: Math::Utils
  function: normalize_number

  input:
    value:
      type: number
      position: 0

  output:
    type: number

  transforms:
    positive_stays_positive:
      input:
        value:
          type: number
          min: 0
          max: 1000
      output:
        type: number
        min: 0
        max: 1

    negative_becomes_zero:
      input:
        value:
          type: number
          max: 0
      output:
        type: number
        value: 0

    preserves_zero:
      input:
        value:
          type: number
          value: 0
      output:
        type: number
        value: 0

=head3 C<$module>

The name of the module (optional).

Using the reserved word C<builtin> means you're testing a Perl builtin function.

If omitted, the generator will guess from the config filename:
C<My-Widget.conf> -> C<My::Widget>.

=head3 C<$function>

The function/method to test.

This defaults to C<run>.

=head3 C<%new>

An optional hashref of args to pass to the module's constructor.

  new:
    api_key: ABC123
    verbose: true

To ensure C<new()> is called with no arguments, you still need to define new, thus:

  module: MyModule
  function: my_function

  new:

=head3 C<%cases>

An optional Perl static corpus, when the output is a simple string (expected => [ args... ]).

Maps the expected output string to the input and _STATUS

  cases:
    ok:
      input: ping
      _STATUS: OK
    error:
      input: ""
      _STATUS: DIES

=head3 C<$yaml_cases> - optional path to a YAML file with the same shape as C<%cases>.

=head3 C<$seed>

An optional integer.
When provided, the generated C<t/fuzz.t> will call C<srand($seed)> so fuzz runs are reproducible.

=head3 C<$iterations>

An optional integer controlling how many fuzz iterations to perform (default 30).

=head3 C<%edge_cases>

An optional hash mapping of extra values to inject.

        # Two named parameters
        edge_cases:
                name: [ '', 'a' x 1024, \"\x{263A}" ]
                age: [ -1, 0, 99999999 ]

        # Takes a string input
        edge_cases: [ 'foo', 'bar' ]

Values can be strings or numbers; strings will be properly quoted.
Note that this only works with routines that take named parameters.

=head3 C<%type_edge_cases>

An optional hash mapping types to arrayrefs of extra values to try for any field of that type:

        type_edge_cases:
                string: [ '', ' ', "\t", "\n", "\0", 'long' x 1024, chr(0x1F600) ]
                number: [ 0, 1.0, -1.0, 1e308, -1e308, 1e-308, -1e-308, 'NaN', 'Infinity' ]
                integer: [ 0, 1, -1, 2**31-1, -(2**31), 2**63-1, -(2**63) ]

=head3 C<%edge_case_array>

Specify edge case values for routines that accept a single unnamed parameter.
This is specifically designed for simple functions that take one argument without a parameter name.
These edge cases supplement the normal random string generation, ensuring specific problematic values are always tested.
During fuzzing iterations, there's a 40% probability that a test case will use a value from edge_case_array instead of randomly generated data.

  ---
  module: Text::Processor
  function: sanitize

  input:
    type: string
    min: 1
    max: 1000

  edge_case_array:
    - "<script>alert('xss')</script>"
    - "'; DROP TABLE users; --"
    - "\0null\0byte"
    - "emoji😊test"
    - ""
    - " "

  seed: 42
  iterations: 30

=head3 Semantic Data Generators

For property-based testing with L<Test::LectroTest>,
you can use semantic generators to create realistic test data.

C<unix_timestamp> is currently fully supported,
other fuzz testing support for C<semantic> entries is being developed.

  input:
    email:
      type: string
      semantic: email

    user_id:
      type: string
      semantic: uuid

    phone:
      type: string
      semantic: phone_us

=head4 Available Semantic Types

=over 4

=item * C<email> - Valid email addresses (user@domain.tld)

=item * C<url> - HTTP/HTTPS URLs

=item * C<uuid> - UUIDv4 identifiers

=item * C<phone_us> - US phone numbers (XXX-XXX-XXXX)

=item * C<phone_e164> - International E.164 format (+XXXXXXXXXXXX)

=item * C<ipv4> - IPv4 addresses (0.0.0.0 - 255.255.255.255)

=item * C<ipv6> - IPv6 addresses

=item * C<username> - Alphanumeric usernames with _ and -

=item * C<slug> - URL slugs (lowercase-with-hyphens)

=item * C<hex_color> - Hex color codes (#RRGGBB)

=item * C<iso_date> - ISO 8601 dates (YYYY-MM-DD)

=item * C<iso_datetime> - ISO 8601 datetimes (YYYY-MM-DDTHH:MM:SSZ)

=item * C<semver> - Semantic version strings (major.minor.patch)

=item * C<jwt> - JWT-like tokens (base64url format)

=item * C<json> - Simple JSON objects

=item * C<base64> - Base64-encoded strings

=item * C<md5> - MD5 hashes (32 hex chars)

=item * C<sha256> - SHA-256 hashes (64 hex chars)

=item * C<unix_timestamp>

=back

=head2 EDGE CASE GENERATION

In addition to purely random fuzz cases, the harness generates
deterministic edge cases for parameters that declare C<min>, C<max> or C<len> in their schema definitions.

For each constraint, three edge cases are added:

=over 4

=item * Just inside the allowable range

This case should succeed, since it lies strictly within the bounds.

=item * Exactly on the boundary

This case should succeed, since it meets the constraint exactly.

=item * Just outside the boundary

This case is annotated with C<_STATUS = 'DIES'> in the corpus and
should cause the harness to fail validation or croak.

=back

Supported constraint types:

=over 4

=item * C<number>, C<integer>, C<float>

Uses numeric values one below, equal to, and one above the boundary.

=item * C<string>

Uses strings of lengths one below, equal to, and one above the boundary.

=item * C<arrayref>

Uses references to arrays of with the number of elements one below, equal to, and one above the boundary.

=item * C<hashref>

Uses hashes with key counts one below, equal to, and one above the
boundary (C<min> = minimum number of keys, C<max> = maximum number
of keys).

=item * C<memberof> - arrayref of allowed values for a parameter

This example is for a routine called C<input()> that takes two arguments: C<status> and C<level>.
C<status> is a string that must have the value C<ok>, C<error> or C<pending>.
The C<level> argument is an integer that must be one of C<1>, C<5> or C<111>.

  ---
  input:
    status:
      type: string
      memberof:
        - ok
        - error
        - pending
    level:
      type: integer
      memberof:
        - 1
        - 5
        - 111

The generator will automatically create test cases for each allowed value (inside the member list),
and at least one value outside the list (which should die or C<croak>, C<_STATUS = 'DIES'>).
This works for strings, integers, and numbers.

=item * C<enum> - synonym of C<memberof>

=item * C<boolean> - automatic boundary tests for boolean fields

  input:
    flag:
      type: boolean

The generator will automatically create test cases for 0 and 1; true and false; off and on, and values that should trigger C<_STATUS = 'DIES'>.

=back

These edge cases are inserted automatically, in addition to the random
fuzzing inputs, so each run will reliably probe boundary conditions
without relying solely on randomness.

=head1 EXAMPLES

See the files in C<t/conf> for examples.

=head2 Adding Scheduled fuzz Testing with GitHub Actions to Your Code

To automatically create and run tests on a regular basis on GitHub Actions,
you need to create a configuration file for each method and subroutine that you're testing,
and a GitHub Actions configuration file.

This example takes you through testing the online_render method of L<HTML::Genealogy::Map>.

=head3 t/conf/online_render.yml

  ---

  module: HTML::Genealogy::Map
  function: onload_render

  input:
    gedcom:
      type: object
      can: individuals
    geocoder:
      type: object
      can: geocode
    debug:
      type: boolean
      optional: true
    google_key:
      type: string
      optional: true
      min: 39
      max: 39
      matches: "^AIza[0-9A-Za-z_-]{35}$"

  config:
    test_undef: 0

=head3 .github/actions/fuzz.t

  ---
  name: Fuzz Testing

  permissions:
    contents: read

  on:
    push:
      branches: [main, master]
    pull_request:
      branches: [main, master]
    schedule:
      - cron: '29 5 14 * *'

  jobs:
    generate-fuzz-tests:
      strategy:
        fail-fast: false
        matrix:
          os:
            - macos-latest
            - ubuntu-latest
            - windows-latest
          perl: ['5.42', '5.40', '5.38', '5.36', '5.34', '5.32', '5.30', '5.28', '5.22']

      runs-on: ${{ matrix.os }}
      name: Fuzz testing with perl ${{ matrix.perl }} on ${{ matrix.os }}

      steps:
        - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6

        - name: Set up Perl
          uses: shogo82148/actions-setup-perl@a198315ec4e9244f206879ea7b63078003aec8a6 # v1.41.1
          with:
            perl-version: ${{ matrix.perl }}

        - name: Install App::Test::Generator this module's dependencies
          run: |
            cpanm App::Test::Generator
            cpanm --installdeps .

        - name: Make Module
          run: |
            perl Makefile.PL
            make
          env:
            AUTOMATED_TESTING: 1
            NONINTERACTIVE_TESTING: 1

        - name: Generate fuzz tests
          run: |
            mkdir t/fuzz
            find t/conf -name '*.yml' | while read config; do
              test_name=$(basename "$config" .conf)
              fuzz-harness-generator "$config" > "t/fuzz/${test_name}_fuzz.t"
            done

        - name: Run generated fuzz tests
          run: |
            prove -lr t/fuzz/
          env:
            AUTOMATED_TESTING: 1
            NONINTERACTIVE_TESTING: 1

=head2 Fuzz Testing your CPAN Module

Running fuzz tests when you run C<make test> in your CPAN module.

Create a directory <t/conf> which contains the schemas.

Then create this file as <t/fuzz.t>:

  #!/usr/bin/env perl

  use strict;
  use warnings;

  use FindBin qw($Bin);
  use IPC::Run3;
  use IPC::System::Simple qw(system);
  use Test::Needs 'App::Test::Generator';
  use Test::Most;

  my $dirname = "$Bin/conf";

  if((-d $dirname) && opendir(my $dh, $dirname)) {
        while (my $filename = readdir($dh)) {
                # Skip '.' and '..' entries and vi temporary files
                next if ($filename eq '.' || $filename eq '..') || ($filename =~ /\.swp$/);

                my $filepath = "$dirname/$filename";

                if(-f $filepath) {      # Check if it's a regular file
                        my ($stdout, $stderr);
                        run3 ['fuzz-harness-generator', '-r', $filepath], undef, \$stdout, \$stderr;

                        ok($? == 0, 'Generated test script exits successfully');

                        if($? == 0) {
                                ok($stdout =~ /^Result: PASS/ms);
                                if($stdout =~ /Files=1, Tests=(\d+)/ms) {
                                        diag("$1 tests run");
                                }
                        } else {
                                diag("$filepath: STDOUT:\n$stdout");
                                diag($stderr) if(length($stderr));
                                diag("$filepath Failed");
                                last;
                        }
                        diag($stderr) if(length($stderr));
                }
        }
        closedir($dh);
  }

  done_testing();

=head2 Property-Based Testing with Transforms

The generator can create property-based tests using L<Test::LectroTest> when the
C<properties> configuration option is enabled.
This provides more comprehensive
testing by automatically generating thousands of test cases and verifying that
mathematical properties hold across all inputs.

=head3 Basic Property-Based Transform Example

Here's a complete example testing the C<abs> builtin function:

B<t/conf/abs.yml>:

  ---
  module: builtin
  function: abs

  config:
    test_undef: no
    test_empty: no
    test_nuls: no
    properties:
      enable: true
      trials: 1000

  input:
    number:
      type: number
      position: 0

  output:
    type: number
    min: 0

  transforms:
    positive:
      input:
        number:
          type: number
          min: 0
      output:
        type: number
        min: 0

    negative:
      input:
        number:
          type: number
          max: 0
      output:
        type: number
        min: 0

This configuration:

=over 4

=item * Enables property-based testing with 1000 trials per property

=item * Defines two transforms: one for positive numbers, one for negative

=item * Automatically generates properties that verify C<abs()> always returns non-negative numbers

=back

Generate the test:

  fuzz-harness-generator t/conf/abs.yml > t/abs_property.t

The generated test will include:

=over 4

=item * Traditional edge-case tests for boundary conditions

=item * Random fuzzing with 30 iterations (or as configured)

=item * Property-based tests that verify the transforms with 1000 trials each

=back

=head3 What Properties Are Tested?

The generator automatically detects and tests these properties based on your transform specifications:

=over 4

=item * B<Range constraints> - If output has C<min> or C<max>, verifies results stay within bounds

=item * B<Type preservation> - Ensures numeric inputs produce numeric outputs

=item * B<Definedness> - Verifies the function doesn't return C<undef> unexpectedly

=item * B<Specific values> - If output specifies a C<value>, checks exact equality

=back

For the C<abs> example above, the generated properties verify:

  # For the "positive" transform:
  - Given a positive number, abs() returns >= 0
  - The result is a valid number
  - The result is defined

  # For the "negative" transform:
  - Given a negative number, abs() returns >= 0
  - The result is a valid number
  - The result is defined

=head3 Advanced Example: String Normalization

Here's a more complex example testing a string normalization function:

B<t/conf/normalize.yml>:

  ---
  module: Text::Processor
  function: normalize_whitespace

  config:
    properties:
      enable: true
      trials: 500

  input:
    text:
      type: string
      min: 0
      max: 1000
      position: 0

  output:
    type: string
    min: 0
    max: 1000

  transforms:
    empty_preserved:
      input:
        text:
          type: string
          value: ""
      output:
        type: string
        value: ""

    single_space:
      input:
        text:
          type: string
          min: 1
          matches: '^\S+(\s+\S+)*$'
      output:
        type: string
        matches: '^\S+( \S+)*$'

    length_bounded:
      input:
        text:
          type: string
          min: 1
          max: 100
      output:
        type: string
        min: 1
        max: 100

This tests that the normalization function:

=over 4

=item * Preserves empty strings (C<empty_preserved> transform)

=item * Collapses multiple spaces into single spaces (C<single_space> transform)

=item * Maintains length constraints (C<length_bounded> transform)

=back

=head3 Interpreting Property Test Results

When property-based tests run, you'll see output like:

  ok 123 - negative property holds (1000 trials)
  ok 124 - positive property holds (1000 trials)

If a property fails, Test::LectroTest will attempt to find the minimal failing
case and display it:

  not ok 123 - positive property holds (47 trials)
  # Property failed
  # Reason: counterexample found

This helps you quickly identify edge cases that your function doesn't handle correctly.

=head3 Configuration Options for Property-Based Testing

In the C<config> section:

  config:
    properties:
      enable: true     # Enable property-based testing (default: false)
      trials: 1000     # Number of test cases per property (default: 1000)

You can also disable traditional fuzzing and only use property-based tests:

  config:
    properties:
      enable: true
      trials: 5000

  iterations: 0  # Disable random fuzzing, use only property tests

=head3 When to Use Property-Based Testing

Property-based testing with transforms is particularly useful for:

=over 4

=item * Mathematical functions (C<abs>, C<sqrt>, C<min>, C<max>, etc.)

=item * Data transformations (encoding, normalization, sanitization)

=item * Parsers and formatters

=item * Functions with clear input-output relationships

=item * Code that should satisfy mathematical properties (commutativity, associativity, idempotence)

=back

=head3 Requirements

Property-based testing requires L<Test::LectroTest> to be installed:

  cpanm Test::LectroTest

If not installed, the generated tests will automatically skip the property-based
portion with a message.

=head3 Testing Email Validation

  ---
  module: Email::Valid
  function: rfc822

  config:
    properties:
      enable: true
      trials: 200
    close_stdin: true
    test_undef: no
    test_empty: no
    test_nuls: no

  input:
    email:
      type: string
      semantic: email
      position: 0

  output:
    type: boolean

  transforms:
    valid_emails:
      input:
        email:
          type: string
          semantic: email
      output:
        type: boolean

This generates 200 realistic email addresses for testing, rather than random strings.

=head3 Combining Semantic with Regex

You can combine semantic generators with regex validation:

  input:
    corporate_email:
      type: string
      semantic: email
      matches: '@company\.com$'

The semantic generator creates realistic emails, and the regex ensures they match your domain.

=head3 Custom Properties for Transforms

You can define additional properties that should hold for your transforms beyond
the automatically detected ones.

=head4 Using Built-in Properties

  transforms:
    positive:
      input:
        number:
          type: number
          min: 0
      output:
        type: number
        min: 0
      properties:
        - idempotent       # f(f(x)) == f(x)
        - non_negative     # result >= 0
        - positive         # result > 0

Available built-in properties:

=over 4

=item * C<idempotent> - Function is idempotent: f(f(x)) == f(x)

=item * C<non_negative> - Result is always >= 0

=item * C<positive> - Result is always > 0

=item * C<non_empty> - String result is never empty

=item * C<length_preserved> - Output length equals input length

=item * C<uppercase> - Result is all uppercase

=item * C<lowercase> - Result is all lowercase

=item * C<trimmed> - No leading/trailing whitespace

=item * C<sorted_ascending> - Array is sorted ascending

=item * C<sorted_descending> - Array is sorted descending

=item * C<unique_elements> - Array has no duplicates

=item * C<preserves_keys> - Hash has same keys as input

=back

=head4 Custom Property Code

Custom properties allows the definition additional invariants and relationships that should hold for their transforms,
beyond what's auto-detected.
For example:

=over 4

=item * Idempotence: f(f(x)) == f(x)

=item * Commutativity: f(x, y) == f(y, x)

=item * Associativity: f(f(x, y), z) == f(x, f(y, z))

=item * Inverse relationships: decode(encode(x)) == x

=item * Domain-specific invariants: Custom business logic

=back

Define your own properties with custom Perl code:

  transforms:
    normalize:
      input:
        text:
          type: string
      output:
        type: string
      properties:
        - name: single_spaces
          description: "No multiple consecutive spaces"
          code: $result !~ /  /

        - name: no_leading_space
          description: "No space at start"
          code: $result !~ /^\s/

        - name: reversible
          description: "Can be reversed back"
          code: length($result) == length($text)

The code has access to:

=over 4

=item * C<$result> - The function's return value

=item * Input variables - All input parameters (e.g., C<$text>, C<$number>)

=item * The function itself - Can call it again for idempotence checks

=back

=head4 Combining Auto-detected and Custom Properties

The generator automatically detects properties from your output spec, and adds
your custom properties:

  transforms:
    sanitize:
      input:
        html:
          type: string
      output:
        type: string
        min: 0              # Auto-detects: defined, min_length >= 0
        max: 10000
      properties:           # Additional custom checks:
        - name: no_scripts
          code: $result !~ /<script/i
        - name: no_iframes
          code: $result !~ /<iframe/i

=head2 GENERATED OUTPUT

The generated test:

=over 4

=item * Seeds RND (if configured) for reproducible fuzz runs

=item * Uses edge cases (per-field and per-type) with configurable probability

=item * Runs C<$iterations> fuzz cases plus appended edge-case runs

=item * Validates inputs with Params::Get / Params::Validate::Strict

=item * Validates outputs with L<Return::Set>

=item * Runs static C<is(... )> corpus tests from Perl and/or YAML corpus

=item * Runs L<Test::LectroTest> tests

=back

=cut
1534
1535 - 1566
=head1 METHODS

=head2 generate

Takes a schema file and produces a test file (or STDOUT).

  # Modern named API
  App::Test::Generator->generate(
      schema_file => 'schemas/foo.yml',
      output_file => 'test/foo.t',
  );

  # Legacy positional API
  App::Test::Generator->generate($schema_file, $test_file);

=head3 API Specification

=head4 Input

    {
        schema_file => { type => 'string', optional => 1 },
        input_file  => { type => 'string', optional => 1 },
        output_file => { type => 'string', optional => 1 },
        schema      => { type => 'hashref', optional => 1 },
        quiet       => { type => 'boolean', optional => 1 },   # accepted but not yet implemented; has no effect
    }

=head4 Output

    { type => 'string' }

=cut
1567
1568sub generate
1569{
1570
96
3082864
        croak 'Usage: generate(schema_file [, outfile])' if(scalar(@_) == 0);
1571
1572        # Accept both class-method call (App::Test::Generator->generate(...))
1573        # and plain-function call with a hashref (generate({...})).
1574        # In the method form the first arg is the class name (a plain string);
1575        # in the function form with a hashref the first arg IS the hashref.
1576
96
170
        my $class = (ref($_[0]) ne 'HASH') ? shift : undef;
1577
96
198
        my ($schema_file, $test_file, $schema);
1578        # Globals loaded from the user's conf (all optional except function maybe)
1579
96
0
        my ($module, $function, $new, $yaml_cases);
1580
96
0
        my ($seed, $iterations);
1581
1582
96
240
        if((ref($_[0]) eq 'HASH') || defined($_[2])) {
1583                # Modern API
1584
14
63
                my $params = Params::Validate::Strict::validate_strict({
1585                        args => Params::Get::get_params(undef, \@_),
1586                        schema => {
1587                                input_file => { type => 'string', optional => 1 },
1588                                schema_file => { type => 'string', optional => 1 },
1589                                output_file => { type => 'string', optional => 1 },
1590                                schema => { type => 'hashref', optional => 1 },
1591                                quiet => { type => 'boolean', optional => 1 }, # Not yet used
1592                        }
1593                });
1594
14
2666
                if($params->{'schema_file'}) {
1595
5
6
                        $schema_file = $params->{'schema_file'};
1596                } elsif($params->{'input_file'}) {
1597
1
1
                        $schema_file = $params->{'input_file'};
1598                } elsif($params->{'schema'}) {
1599
8
13
                        $schema = $params->{'schema'};
1600                } else {
1601
0
0
                        croak(__PACKAGE__, ': Usage: generate(input_file|schema [, output_file]');
1602                }
1603
14
27
                if(defined($schema_file)) {
1604
6
9
                        $schema = _load_schema($schema_file);
1605                }
1606
14
65
                $test_file = $params->{'output_file'};
1607        } else {
1608                # Legacy API
1609
82
93
                ($schema_file, $test_file) = @_;
1610
82
108
                if(defined($schema_file)) {
1611
77
150
                        $schema = _load_schema($schema_file);
1612                } else {
1613
5
28
                        croak 'Usage: generate(schema_file [, outfile])';
1614                }
1615        }
1616
1617        # Parse the schema file and load into our structures
1618
88
88
587
128
        my %input = %{_load_schema_section($schema, 'input', $schema_file)};
1619
87
87
83
76
        my %output = %{_load_schema_section($schema, 'output', $schema_file)};
1620
86
86
93
76
        my %transforms = %{_load_schema_section($schema, 'transforms', $schema_file)};
1621
85
85
76
77
        my %accessor = %{_load_schema_section($schema, 'accessor', $schema_file)};
1622
1623
85
2
128
4
        my %cases = %{$schema->{cases}} if(exists($schema->{cases}));
1624
85
0
95
0
        my %edge_cases = %{$schema->{edge_cases}} if(exists($schema->{edge_cases}));
1625
85
1
98
2
        my %type_edge_cases = %{$schema->{type_edge_cases}} if(exists($schema->{type_edge_cases}));
1626
1627
85
227
        $module = $schema->{module} if(exists($schema->{module}) && length($schema->{module}));
1628
85
112
        $function = $schema->{function} if(exists($schema->{function}));
1629
85
124
        if(exists($schema->{new})) {
1630
14
26
                $new = defined($schema->{'new'}) ? $schema->{new} : '_UNDEF';
1631        }
1632
85
90
        $yaml_cases = $schema->{yaml_cases} if(exists($schema->{yaml_cases}));
1633
85
98
        $seed = $schema->{seed} if(exists($schema->{seed}));
1634
85
120
        $iterations = $schema->{iterations} if(exists($schema->{iterations}));
1635
1636
85
3
120
6
        my @edge_case_array = @{$schema->{edge_case_array}} if(exists($schema->{edge_case_array}));
1637
85
135
        _validate_config($schema);
1638
1639
82
10
96
32
        my %config = %{$schema->{config}} if(exists($schema->{config}));
1640
1641
82
142
        _normalize_config(\%config);
1642
1643        # Guess module name from config file if not set
1644
82
346
        if(!$module) {
1645
1
2
                if($schema_file) {
1646
0
0
                        ($module = basename($schema_file)) =~ s/\.(conf|pl|pm|yml|yaml)$//;
1647
0
0
                        $module =~ s/-/::/g;
1648                        # Guard against Perl builtin function names being mistaken
1649                        # for module names — builtins have no module to load
1650
0
0
                        if(_is_perl_builtin($module)) {
1651
0
0
                                undef $module;
1652                        }
1653                }
1654        } elsif($module eq $MODULE_BUILTIN) {
1655
54
125
                undef $module;
1656        }
1657
1658
82
205
        if($module && length($module) && ($module ne 'builtin')) {
1659
27
42
                _validate_module($module, $schema_file);
1660        }
1661
1662        # $module/$function are spliced unescaped into generated test
1663        # source below (use_ok, new_ok, ->$function, $module::$function)
1664        # — reject anything that isn't identifier-shaped before that happens.
1665
82
163
        _assert_identifier($module, 'module', package => 1) if defined($module) && length($module);
1666
1667        # sensible defaults
1668
81
101
        $function ||= 'run';
1669        # package => 1: fully-qualified sub names (e.g. DB::DB, a debugger
1670        # hook installed into the DB:: package regardless of its source
1671        # package) are legitimate function names, not just bare identifiers
1672
81
118
        _assert_identifier($function, 'function', package => 1);
1673
79
175
        $iterations ||= DEFAULT_ITERATIONS;              # default fuzz runs if not specified
1674
79
107
        $seed = undef if defined $seed && $seed eq '';  # treat empty as undef
1675
1676        # --- YAML corpus support (yaml_cases is filename string) ---
1677
79
53
        my %yaml_corpus_data;
1678
79
76
        if (defined $yaml_cases) {
1679
5
48
                croak("$yaml_cases: $!") if(!-f $yaml_cases);
1680
1681
4
12
                my $yaml_data = LoadFile(Encode::decode('utf8', $yaml_cases));
1682
4
269
                if ($yaml_data && ref($yaml_data) eq 'HASH') {
1683                        # Validate that the corpus inputs are arrayrefs
1684                        # e.g: "FooBar":      ["foo_bar"]
1685                        # Skip only invalid entries:
1686
4
4
4
10
                        for my $expected (keys %{$yaml_data}) {
1687
6
5
                                my $outputs = $yaml_data->{$expected};
1688
6
14
                                unless($outputs && (ref $outputs eq 'ARRAY')) {
1689
2
13
                                        carp("$yaml_cases: $expected does not point to an array ref, ignoring");
1690
2
181
                                        next;
1691                                }
1692
4
7
                                $yaml_corpus_data{$expected} = $outputs;
1693                        }
1694                }
1695        }
1696
1697        # Merge Perl %cases and YAML corpus safely
1698        # my %all_cases = (%cases, %yaml_corpus_data);
1699
78
94
        my %all_cases = (%yaml_corpus_data, %cases);
1700
78
114
        for my $k (keys %yaml_corpus_data) {
1701
4
13
                if (exists $cases{$k} && ref($cases{$k}) eq 'ARRAY' && ref($yaml_corpus_data{$k}) eq 'ARRAY') {
1702
1
1
1
1
1
2
                        $all_cases{$k} = [ @{$yaml_corpus_data{$k}}, @{$cases{$k}} ];
1703                }
1704        }
1705
1706
78
112
        if(my $hints = delete $schema->{_yamltest_hints}) {
1707
5
10
                if(my $boundaries = $hints->{boundary_values}) {
1708
5
5
4
8
                        push @edge_case_array, @{$boundaries};
1709                }
1710
5
11
                if(my $invalid = $hints->{invalid}) {
1711
0
0
                        carp('TODO: handle yamltest_hints->invalid');
1712                }
1713        }
1714
1715        # If the schema says the type is numeric, normalize
1716
78
105
        if ($schema->{type} && $schema->{type} =~ /^(integer|number|float)$/) {
1717
0
0
                for (@edge_case_array) {
1718
0
0
                        next unless defined $_;
1719
0
0
                        $_ += 0 if Scalar::Util::looks_like_number($_);
1720                }
1721        }
1722
1723        # Load relationships from the schema if present and well-formed.
1724        # SchemaExtractor may set this to undef or an empty arrayref when
1725        # no relationships were detected, so guard both existence and type.
1726
78
53
        my @relationships;
1727
78
113
        if(exists($schema->{relationships}) && ref($schema->{relationships}) eq 'ARRAY') {
1728
0
0
0
0
                @relationships = @{$schema->{relationships}};
1729        }
1730
1731        # Serialise the relationships array from the schema into Perl source
1732        # code for embedding in the generated test file. Each relationship
1733        # type is rendered as a hashref in the @relationships array.
1734
1735
78
73
        my $relationships_code = '';
1736
1737        # Walk each relationship in the order SchemaExtractor produced them
1738
78
96
        for my $rel (@relationships) {
1739
0
0
                my $type = $rel->{type} // '';
1740
1741                # Mutually exclusive: both params being set should cause the method to die
1742
0
0
                if($type eq 'mutually_exclusive') {
1743                        $relationships_code .= "{ type => 'mutually_exclusive', params => [" .
1744
0
0
0
0
0
0
                                join(', ', map { perl_quote($_) } @{$rel->{params}}) .
1745                                "] },\n";
1746
1747                # Required group: at least one of the params must be present
1748                } elsif($type eq 'required_group') {
1749                        $relationships_code .= "{ type => 'required_group', params => [" .
1750
0
0
0
0
                                join(', ', map { perl_quote($_) } @{$rel->{params}}) .
1751
0
0
                                "], logic => " . perl_quote($rel->{logic} // 'or') . " },\n";
1752
1753                # Conditional requirement: if one param is set, another becomes mandatory
1754                } elsif($type eq 'conditional_requirement') {
1755                        $relationships_code .= "{ type => 'conditional_requirement', if => " .
1756                                perl_quote($rel->{'if'}) . ", then_required => " .
1757
0
0
                                perl_quote($rel->{then_required}) . " },\n";
1758
1759                # Dependency: one param requires another to also be present
1760                } elsif($type eq 'dependency') {
1761                        $relationships_code .= "{ type => 'dependency', param => " .
1762                                perl_quote($rel->{param}) . ", requires => " .
1763
0
0
                                perl_quote($rel->{requires}) . " },\n";
1764
1765                # Value constraint: one param being set forces another to a specific value
1766                } elsif($type eq 'value_constraint') {
1767                        $relationships_code .= "{ type => 'value_constraint', if => " .
1768                                perl_quote($rel->{'if'}) . ", then => " .
1769                                perl_quote($rel->{then}) . ", operator => " .
1770                                perl_quote($rel->{operator}) . ", value => " .
1771
0
0
                                perl_quote($rel->{value}) . " },\n";
1772
1773                # Value conditional: one param equalling a specific value requires another param
1774                } elsif($type eq 'value_conditional') {
1775                        $relationships_code .= "{ type => 'value_conditional', if => " .
1776                                perl_quote($rel->{'if'}) . ", equals => " .
1777                                perl_quote($rel->{equals}) . ", then_required => " .
1778
0
0
                                perl_quote($rel->{then_required}) . " },\n";
1779
1780                # Unknown type — warn and skip rather than emitting broken code
1781                } else {
1782
0
0
                        carp "Unknown relationship type '$type', skipping";
1783                }
1784        }
1785
1786        # Dedup the edge cases
1787
78
53
        my %seen;
1788        @edge_case_array = grep {
1789
78
99
84
136
                my $key = defined($_) ? (Scalar::Util::looks_like_number($_) ? "N:$_" : "S:$_") : 'U';
1790
99
160
                !$seen{$key}++;
1791        } @edge_case_array;
1792
1793        # Sort the edge cases to keep it consistent across runs
1794        @edge_case_array = sort {
1795
78
137
151
111
                return -1 if !defined $a;
1796
137
104
                return 1 if !defined $b;
1797
1798
137
109
                my $na = Scalar::Util::looks_like_number($a);
1799
137
92
                my $nb = Scalar::Util::looks_like_number($b);
1800
1801
137
179
                return $a <=> $b if $na && $nb;
1802
9
10
                return -1 if $na;
1803
9
9
                return 1 if $nb;
1804
9
9
                return $a cmp $b;
1805        } @edge_case_array;
1806
1807        # render edge case maps for inclusion in the .t
1808
78
115
        my $edge_cases_code = render_arrayref_map(\%edge_cases);
1809
78
84
        my $type_edge_cases_code = render_arrayref_map(\%type_edge_cases);
1810
1811
78
66
        my $edge_case_array_code = '';
1812
78
87
        if(scalar(@edge_case_array)) {
1813
19
89
22
76
                $edge_case_array_code = join(', ', map { q_wrap($_) } @edge_case_array);
1814        }
1815
1816        # Render configuration - all the values are integers for now, if that changes, wrap the $config{$key} in single quotes
1817
78
76
        my $config_code = '';
1818
78
245
        foreach my $key (sort keys %config) {
1819                # Skip nested structures like 'properties' - they're used during
1820                # generation but don't need to be in the generated test
1821
624
462
                if(ref($config{$key}) eq 'HASH') {
1822
78
58
                        next;
1823                }
1824
546
624
                if((!defined($config{$key})) || !$config{$key}) {
1825                        # YAML will strip the word 'false'
1826                        # e.g. in 'test_undef: false'
1827
33
27
                        $config_code .= "'$key' => 0,\n";
1828                } else {
1829
513
412
                        $config_code .= "'$key' => $config{$key},\n";
1830                }
1831        }
1832
1833        # Render input/output
1834
78
92
        my $input_code = '';
1835
78
204
        if(((scalar keys %input) == 1) && exists($input{'type'}) && !ref($input{'type'})) {
1836                # %input = ( type => 'string' );
1837
45
78
                foreach my $key (sort keys %input) {
1838
45
48
                        $input_code .= "'$key' => '$input{$key}',\n";
1839                }
1840        } else {
1841                # %input = ( str => { type => 'string' } );
1842
33
65
                $input_code = render_hash(\%input);
1843        }
1844
78
119
        if(defined(my $re = $output{'matches'})) {
1845
0
0
                if(ref($re) ne 'Regexp') {
1846                        # Use eval to compile safely — qr/$re/ would interpolate
1847                        # the string first, corrupting patterns containing [ or \
1848
0
0
0
0
                        my $compiled = eval { qr/$re/ };
1849
0
0
                        if($@) {
1850
0
0
                                carp("Invalid matches pattern '$re': $@");
1851                        } else {
1852
0
0
                                $output{'matches'} = $compiled;
1853                        }
1854                }
1855        }
1856
1857        # Compile nomatch pattern to a Regexp object so it renders
1858        # as qr{} in the generated test rather than a raw string.
1859        # Without this, patterns containing [ or other regex
1860        # metacharacters cause compilation failures in validators
1861
78
82
        if(defined(my $re = $output{'nomatch'})) {
1862
0
0
                if(ref($re) ne 'Regexp') {
1863                        # Use eval to compile safely — qr/$re/ would interpolate
1864                        # the string first, corrupting patterns containing [ or \
1865
0
0
0
0
                        my $compiled = eval { qr/$re/ };
1866
0
0
                        if($@) {
1867
0
0
                                carp("Invalid nomatch pattern '$re': $@");
1868                        } else {
1869
0
0
                                $output{'nomatch'} = $compiled;
1870                        }
1871                }
1872        }
1873
1874
78
99
        my $output_code = render_args_hash(\%output);
1875
78
161
        my $new_code = ($new && (ref $new eq 'HASH')) ? render_args_hash($new) : '';
1876
1877
78
147
        my $transforms_code;
1878
78
79
        if(keys %transforms) {
1879
5
5
                foreach my $transform(keys %transforms) {
1880
8
13
                        my $properties = render_fallback($transforms{$transform}->{'properties'});
1881
1882
8
11
                        if($transforms_code) {
1883
3
2
                                $transforms_code .= "},\n";
1884                        }
1885                        $transforms_code .= "$transform => {\n" .
1886                                "\t'input' => { " .
1887                                render_args_hash($transforms{$transform}->{'input'}) .
1888                                "\t}, 'output' => { " .
1889
8
12
                                render_args_hash($transforms{$transform}->{'output'}) .
1890                                "\t}, 'properties' => $properties\n" .
1891                                "\t,\n";
1892                }
1893
5
5
                $transforms_code .= "}\n";
1894        }
1895
1896
78
61
        my $transform_properties_code = '';
1897
78
62
        my $use_properties = 0;
1898
1899
78
88
        if (keys %transforms && ($config{properties}{enable} // 0)) {
1900
4
3
                $use_properties = 1;
1901
1902                # Generate property-based tests for transforms
1903
4
8
                my $properties = _generate_transform_properties(
1904                        \%transforms,
1905                        $function,
1906                        $module,
1907                        \%input,
1908                        \%config,
1909                        $new
1910                );
1911
1912                # Convert to code for template
1913
3
5
                $transform_properties_code = _render_properties($properties);
1914        }
1915
1916
77
77
        if(keys %accessor) {
1917                # Sanity test
1918
2
5
                my $property = $accessor{property};
1919
2
4
                my $type = $accessor{type};
1920
1921
2
4
                if(!defined($new)) {
1922
0
0
                        croak("BUG: $property: accessor $type can only work on an object, incorrectly tagged as $type");
1923                }
1924
2
9
                if($type eq 'getset') {
1925
0
0
                        if(scalar(keys %input) != 1) {
1926
0
0
                                croak("BUG: $property: getset must take one input argument, incorrectly tagged as getset");
1927                        }
1928
0
0
                        if(scalar(keys %output) == 0) {
1929
0
0
                                croak("BUG: $property: getset must give one output, incorrectly tagged as getset");
1930                        }
1931                }
1932        }
1933
1934        # Setup / call code (always load module)
1935
77
90
        my $setup_code = ($module) ? "BEGIN { use_ok('$module') }" : '';
1936
77
53
        my $call_code;  # Code to call the function being test when used with named arguments
1937        my $position_code;      # Code to call the function being test when used with position arguments
1938
77
111
        my $has_positions = _has_positions(\%input);
1939
77
175
        if(defined($new) && defined($module)) {
1940                # keep use_ok regardless (user found earlier issue)
1941
13
26
                if($new_code eq '') {
1942
12
13
                        $new_code = "new_ok('$module')";
1943                } else {
1944
1
2
                        $new_code = "new_ok('$module' => [ { $new_code } ] )";
1945                }
1946
13
13
                $setup_code .= "\nmy \$obj = $new_code;";
1947
13
17
                if($has_positions) {
1948
5
7
                        $position_code = "\$result = (scalar(\@alist) == 1) ? \$obj->$function(\$alist[0]) : (scalar(\@alist) == 0) ? \$obj->$function() : \$obj->$function(\@alist);";
1949
5
11
                        if(defined($accessor{type})) {
1950
0
0
                                if($accessor{type} eq 'getter') {
1951
0
0
                                        $position_code .= "my \$prev_value = \$obj->{$accessor{property}};";
1952                                } elsif($accessor{type} eq 'getset') {
1953
0
0
                                        $position_code .= 'if(scalar(@alist) == 1) { ';
1954
0
0
                                        $position_code .= "cmp_ok(\$result, 'eq', \$alist[0], 'getset function returns what was put in'); ok(\$obj->$function() eq \$result, 'test getset accessor');";
1955
0
0
                                        $position_code .= '}';
1956                                }
1957
0
0
                                if(($accessor{type} eq 'getset') || ($accessor{type} eq 'getter')) {
1958                                        # Since Perl doesn't support data encapsulation, we can test the getter returns the correct item
1959
0
0
                                        $position_code .= 'if(scalar(@alist) == 1) { ';
1960
0
0
                                        $position_code .= "cmp_ok(\$result, 'eq', \$obj->{$accessor{property}}, 'getset function returns correct item');";
1961
0
0
                                        if($accessor{type} eq 'getter') {
1962
0
0
                                                $position_code .= "if(defined(\$prev_value)) { cmp_ok(\$result, 'eq', \$prev_value, 'getter does not change value'); } ";
1963                                        }
1964
0
0
                                        $position_code .= '}';
1965                                }
1966
0
0
                                if($output{'_returns_self'}) {
1967
0
0
                                        croak("$accessor{type} for $accessor{property} cannot return \$self");
1968                                }
1969                        }
1970                } else {
1971
8
12
                        $call_code = "\$result = \$obj->$function(\$input);";
1972
8
30
                        if($output{'_returns_self'}) {
1973
0
0
                                $call_code .= "ok(defined(\$result)); ok(\$result eq \$obj, '$function returns self')";
1974                        } elsif(defined($accessor{type}) && ($accessor{type} eq 'getset')) {
1975
0
0
                                $call_code .= "ok(\$obj->$function() eq \$result, 'test getset accessor');"
1976                        }
1977
8
17
                        if(scalar(keys %input) == 0) {
1978
5
15
                                if(defined($accessor{type}) && ($accessor{type} eq 'getter')) {
1979
2
5
                                        $call_code .= "cmp_ok(\$result, 'eq', \$obj->{$accessor{property}}, 'getter function returns correct item') if(defined(\$result));";
1980                                }
1981                        }
1982                }
1983        } elsif(defined($module) && length($module)) {
1984
12
13
                if($function eq 'new') {
1985
2
8
                        if($has_positions) {
1986
0
0
                                $position_code = "\$result = (scalar(\@alist) == 1) ? ${module}\->$function(\$alist[0]) : (scalar(\@alist) == 0) ? ${module}\->$function() : ${module}\->$function(\@alist);";
1987                        } else {
1988
2
7
                                $call_code = "\$result = ${module}\->$function(\$input);";
1989                        }
1990                } else {
1991
10
8
                        if($has_positions) {
1992
0
0
                                $position_code = "\$result = (scalar(\@alist) == 1) ? ${module}::$function(\$alist[0]) : (scalar(\@alist) == 0) ? ${module}::$function() : ${module}::$function(\@alist);";
1993                        } else {
1994
10
13
                                $call_code = "\$result = ${module}::$function(\$input);";
1995                        }
1996                }
1997        } else {
1998
52
46
                if($has_positions) {
1999
7
8
                        $position_code = "\$result = $function(\@alist);";
2000                } else {
2001
45
46
                        $call_code = "\$result = $function(\$input);";
2002                }
2003        }
2004
2005        # List-context capture: $result = func() in scalar context returns a count, not the list.
2006        # When the schema says output type is 'array', capture into @_r then take a ref.
2007
77
140
        if(($output{type} // '') eq 'array') {
2008
2
2
                if(defined($call_code)) {
2009
2
9
                        $call_code =~ s/^\$result = (.*?);/my \@_r = ($1); \$result = \\\@_r;/s;
2010                }
2011
2
2
                if(defined($position_code)) {
2012
0
0
                        $position_code =~ s/^\$result = (.*?);/my \@_r = ($1); \$result = \\\@_r;/s;
2013                }
2014        }
2015
2016        # Build static corpus code
2017
77
95
        my $corpus_code = '';
2018
77
74
        if (%all_cases) {
2019
3
5
                $corpus_code = "\n# --- Static Corpus Tests ---\n" .
2020                        "diag('Running " . scalar(keys %all_cases) . " corpus tests');\n";
2021
2022
3
4
                for my $expected (sort keys %all_cases) {
2023
6
7
                        my $inputs = $all_cases{$expected};
2024
6
9
                        next unless($inputs);
2025
2026
6
5
                        my $expected_str = perl_quote($expected);
2027
6
9
                        my $status = ((ref($inputs) eq 'HASH') && $inputs->{'_STATUS'}) // 'OK';
2028
6
12
                        if($expected_str eq "'_STATUS:DIES'") {
2029
0
0
                                $status = 'DIES';
2030                        } elsif($expected_str eq "'_STATUS:WARNS'") {
2031
0
0
                                $status = 'WARNS';
2032                        }
2033
2034
6
11
                        if(ref($inputs) eq 'HASH') {
2035
0
0
                                $inputs = $inputs->{'input'};
2036                        }
2037
6
4
                        my $input_str;
2038
6
7
                        if(ref($inputs) eq 'ARRAY') {
2039
6
9
6
5
6
6
                                $input_str = join(', ', map { perl_quote($_) } @{$inputs});
2040                        } elsif(ref($inputs) eq 'HASH') {
2041
0
0
                                $input_str = render_fallback($inputs);
2042
2043                                # YAML can't express Perl's undef, so a corpus value of
2044                                # the sentinel string 'undef' means "this param is
2045                                # undef" -- convert the quoted sentinel back to the
2046                                # bareword so the generated test passes real undef
2047
0
0
                                $input_str =~ s/=> 'undef'/=> undef/gms;
2048                        } else {
2049
0
0
                                $input_str = $inputs;
2050                        }
2051
6
12
                        if(($input_str eq 'undef') && (!$config{'test_undef'})) {
2052
0
0
                                carp('corpus case set to undef, yet test_undef is not set in config');
2053                        }
2054
6
9
                        if($new) {
2055
0
0
                                if($status eq 'DIES') {
2056                                        $corpus_code .= "dies_ok { \$obj->$function($input_str) } " .
2057
0
0
0
0
                                                        "'$function(" . join(', ', map { $_ // '' } @$inputs ) . ") dies';\n";
2058                                } elsif($status eq 'WARNS') {
2059                                        $corpus_code .= "warnings_exist { \$obj->$function($input_str) } qr/./, " .
2060
0
0
0
0
                                                        "'$function(" . join(', ', map { $_ // '' } @$inputs ) . ") warns';\n";
2061                                } else {
2062                                        my $desc = sprintf("$function(%s) returns %s",
2063
0
0
0
0
                                                perl_quote(join(', ', map { $_ // '' } @$inputs )),
2064                                                $expected_str
2065                                        );
2066
0
0
                                        if(($output{'type'} // '') eq 'boolean') {
2067
0
0
                                                if($expected_str eq '1') {
2068
0
0
                                                        $corpus_code .= "ok(\$obj->$function($input_str), " . q_wrap($desc) . ");\n";
2069                                                } elsif($expected_str eq '0') {
2070
0
0
                                                        $corpus_code .= "ok(!\$obj->$function($input_str), " . q_wrap($desc) . ");\n";
2071                                                } else {
2072
0
0
                                                        croak("Boolean is expected to return $expected_str");
2073                                                }
2074                                        } else {
2075
0
0
                                                $corpus_code .= "is(\$obj->$function($input_str), $expected_str, " . q_wrap($desc) . ");\n";
2076                                        }
2077                                }
2078                        } else {
2079
6
8
                                if($status eq 'DIES') {
2080
0
0
                                        if($module) {
2081
0
0
                                                $corpus_code .= "dies_ok { $module\::$function($input_str) } " .
2082                                                        "'Corpus $expected dies';\n";
2083                                        } else {
2084
0
0
                                                $corpus_code .= "dies_ok { $function($input_str) } " .
2085                                                        "'Corpus $expected dies';\n";
2086                                        }
2087                                } elsif($status eq 'WARNS') {
2088
0
0
                                        if($module) {
2089
0
0
                                                $corpus_code .= "warnings_exist { $module\::$function($input_str) } qr/./, " .
2090                                                        "'Corpus $expected warns';\n";
2091                                        } else {
2092
0
0
                                                $corpus_code .= "warnings_exist { $function($input_str) } qr/./, " .
2093                                                        "'Corpus $expected warns';\n";
2094                                        }
2095                                } else {
2096                                        my $desc = sprintf("$function(%s) returns %s",
2097
6
9
6
11
12
5
                                                perl_quote((ref $inputs eq 'ARRAY') ? (join(', ', map { $_ // '' } @{$inputs})) : $inputs),
2098                                                $expected_str
2099                                        );
2100
6
10
                                        if(($output{'type'} // '') eq 'boolean') {
2101
0
0
                                                if($expected_str eq '1') {
2102
0
0
                                                        $corpus_code .= "ok(\$obj->$function($input_str), " . q_wrap($desc) . ");\n";
2103                                                } elsif($expected_str eq '0') {
2104
0
0
                                                        $corpus_code .= "ok(!\$obj->$function($input_str), " . q_wrap($desc) . ");\n";
2105                                                } else {
2106
0
0
                                                        croak("Boolean is expected to return $expected_str");
2107                                                }
2108                                        } else {
2109
6
11
                                                $corpus_code .= "is(\$obj->$function($input_str), $expected_str, " . q_wrap($desc) . ");\n";
2110                                        }
2111                                }
2112                        }
2113                }
2114        }
2115
2116        # Prepare seed/iterations code fragment for the generated test
2117
77
72
        my $seed_code = '';
2118
77
76
        if (defined $seed) {
2119                # ensure integer-ish
2120
7
8
                $seed = int($seed);
2121
7
7
                $seed_code = "srand($seed);\n";
2122        }
2123
2124
77
97
        my $determinism_code = 'my $result2;' .
2125                'eval { $result2 = do { ' . (defined($position_code) ? $position_code : $call_code) . " }; };\n" .
2126                'is_deeply($result2, $result, "deterministic result for same input");' .
2127                "\n";
2128
2129        # Generate the test content
2130
77
375
        my $tt = Template->new({ ENCODING => 'utf8', TRIM => 1 });
2131
2132        # Read template from DATA handle
2133
77
133473
        my $template_package = __PACKAGE__ . '::Template';
2134
77
297
        my $template = $template_package->get_data_section('test.tt');
2135
2136        my $vars = {
2137                setup_code => $setup_code,
2138                edge_cases_code => $edge_cases_code,
2139                edge_case_array_code => $edge_case_array_code,
2140                type_edge_cases_code => $type_edge_cases_code,
2141                config_code => $config_code,
2142                seed_code => $seed_code,
2143                input_code => $input_code,
2144                output_code => $output_code,
2145                transforms_code => $transforms_code,
2146                corpus_code => $corpus_code,
2147                call_code => $call_code,
2148                position_code => $position_code,
2149                determinism_code => $determinism_code,
2150                function => $function,
2151                iterations_code => int($iterations),
2152                use_properties => $use_properties,
2153                transform_properties_code => $transform_properties_code,
2154
77
84774
                property_trials => $config{properties}{trials} // DEFAULT_PROPERTY_TRIALS,
2155                relationships_code => $relationships_code,
2156                module => $module
2157        };
2158
2159
77
66
        my $test;
2160
77
153
        $tt->process($template, $vars, \$test) or croak($tt->error());
2161
2162
77
1503800
        if ($test_file) {
2163                # autodie is disabled for this open -- under "use autodie qw(:all)"
2164                # open() never returns false on failure, it throws its own exception
2165                # instead, which would silently make the "or croak" dead code.
2166
20
20
20
68
14
53
                no autodie qw(open);
2167
31
1191
                open my $fh, '>:encoding(UTF-8)', $test_file or croak "Cannot open $test_file: $!";
2168
31
14640
                print $fh "$test\n";
2169
31
108
                close $fh;
2170
31
4842
                if($module) {
2171
18
340
                        print "Generated $test_file for $module\::$function with fuzzing + corpus support\n";
2172                } else {
2173
13
200
                        print "Generated $test_file for $function with fuzzing + corpus support\n";
2174                }
2175        } else {
2176
46
48910
                print "$test\n";
2177        }
2178}
2179
2180# --- Helpers for rendering data structures into Perl code for the generated test ---
2181
2182# --------------------------------------------------
2183# _is_perl_builtin
2184#
2185# Purpose:    Return true if a string is the name of
2186#             a Perl core builtin function, to prevent
2187#             it being used as a module name in
2188#             use_ok() calls in generated tests.
2189#
2190# Entry:      $name - the string to check.
2191# Exit:       Returns 1 if builtin, 0 otherwise.
2192# --------------------------------------------------
2193sub _is_perl_builtin {
2194
23
12019
        my $name = $_[0];
2195
23
26
        return 0 unless defined $name;
2196
2197
22
202
18
181
        state %BUILTINS = map { $_ => 1 } qw(
2198                abs accept alarm atan2 bind binmode bless
2199                caller chdir chmod chomp chop chown chr chroot
2200                close closedir connect cos crypt
2201                dbmclose dbmopen defined delete die do dump
2202                each endgrent endhostent endnetent endprotoent endpwent endservent
2203                eof eval exec exists exit exp
2204                fcntl fileno flock fork format formline
2205                getc getgrent getgrgid getgrnam gethostbyaddr gethostbyname
2206                gethostent getlogin getnetbyaddr getnetbyname getnetent
2207                getpeername getpgrp getppid getpriority getprotobyname
2208                getprotobynumber getprotoent getpwent getpwnam getpwuid
2209                getservbyname getservbyport getservent getsockname getsockopt
2210                glob gmtime goto grep
2211                hex
2212                index int ioctl
2213                join
2214                keys kill
2215                last lc lcfirst length link listen local localtime log lstat
2216                map mkdir msgctl msgget msgrcv msgsnd my
2217                next no
2218                oct open opendir ord our
2219                pack pipe pop pos print printf prototype push
2220                quotemeta
2221                rand read readdir readline readlink readpipe recv redo
2222                ref rename require reset return reverse rewinddir rindex rmdir
2223                say scalar seek seekdir select semctl semget semop send
2224                setgrent sethostent setnetent setpgrp setpriority setprotoent
2225                setpwent setservent setsockopt shift shmctl shmget shmread
2226                shmwrite shutdown sin sleep socket socketpair sort splice split
2227                sprintf sqrt srand stat study sub substr symlink syscall
2228                sysopen sysread sysseek system syswrite
2229                tell telldir tie tied time times truncate
2230                uc ucfirst umask undef unlink unpack unshift untie use
2231                utime values vec wait waitpid wantarray warn write
2232        );
2233
22
49
        return $BUILTINS{lc $name} // 0;
2234}
2235
2236# --------------------------------------------------
2237# _load_schema
2238#
2239# Load and parse a schema file using
2240#     Config::Abstraction, returning the
2241#     schema as a hashref.
2242#
2243# Entry:      $schema_file - path to the schema file.
2244#             Must be defined, non-empty, and readable.
2245#
2246# Exit:       Returns a hashref of the parsed schema
2247#             with a '_source' key added containing
2248#             the originating file path.
2249#             Croaks on any error.
2250#
2251# Side effects: Reads from the filesystem.
2252#
2253# Notes:      Legacy Perl-file configs (containing
2254#             '$module' or 'our $module' keys) are
2255#             rejected with a clear error. Config::
2256#             Abstraction is used rather than require()
2257#             to avoid executing arbitrary code from
2258#             user-supplied config files.
2259# --------------------------------------------------
2260sub _load_schema {
2261
91
14392
        my $schema_file = $_[0];
2262
2263        # Validate the argument before touching the filesystem
2264
91
121
        croak(__PACKAGE__, ': Usage: _load_schema($schema_file)') unless defined $schema_file;
2265
2266
89
120
        croak(__PACKAGE__, ': _load_schema given empty filename') unless length($schema_file);
2267
2268        # Confirm the file exists and is readable before attempting
2269        # to load it — gives a clearer error than Config::Abstraction would
2270
87
526
        croak(__PACKAGE__, ": _load_schema($schema_file): $!") unless -r $schema_file;
2271
2272        # Load configuration via Config::Abstraction which supports
2273        # YAML, JSON, and other formats without executing arbitrary code.
2274        # no_fixate prevents automatic type coercion that could alter values
2275
82
366
        if(my $schema = Config::Abstraction->new(
2276                config_dirs  => ['.', ''],
2277                config_file  => $schema_file,
2278                no_fixate    => 1,
2279        )) {
2280
82
77778
                if($schema = $schema->all()) {
2281                        # Detect legacy Perl config files by the presence of
2282                        # variable declaration keys — these are no longer supported
2283
82
579
                        if(exists($schema->{$LEGACY_PERL_KEY_1}) ||
2284                           exists($schema->{$LEGACY_PERL_KEY_2})) {
2285
1
8
                                croak("$schema_file: Loading perl files as configs is no longer supported");
2286                        }
2287
2288                        # Tag the schema with its source path for error messages
2289
81
565
                        $schema->{$SOURCE_KEY} = $schema_file;
2290
81
422
                        return $schema;
2291                }
2292        }
2293
2294
0
0
        croak "Failed to load schema from $schema_file";
2295}
2296
2297# --------------------------------------------------
2298# _load_schema_section
2299#
2300# Purpose:    Extract a named section from a parsed
2301#             schema hashref, validating that it is
2302#             a hashref if present.
2303#
2304# Entry:      $schema      - the full parsed schema hashref.
2305#             $section     - name of the section to extract
2306#                            (e.g. 'input', 'output').
2307#             $schema_file - path of the schema file,
2308#                            used in error messages only.
2309#
2310# Exit:       Returns the section hashref if present,
2311#             or an empty hashref {} if absent.
2312#             Croaks if the section exists but is not
2313#             a hashref (and not the string 'undef').
2314#
2315# Notes:      The string 'undef' is treated as an
2316#             absent section — callers that set a
2317#             section to 'undef' in YAML get the same
2318#             result as omitting it entirely.
2319# --------------------------------------------------
2320sub _load_schema_section {
2321
354
6364
        my ($schema, $section, $schema_file) = @_;
2322
2323        # Section absent — return empty hash as the safe default
2324
354
454
        return {} unless exists $schema->{$section};
2325
2326        # Section present and is a hashref — return it directly
2327        return $schema->{$section}
2328
180
441
                if ref($schema->{$section}) eq 'HASH';
2329
2330        # Treat the YAML scalar 'undef' as equivalent to absent
2331        return {}
2332                if defined($schema->{$section}) &&
2333
8
21
                   $schema->{$section} eq 'undef';
2334
2335        # Section present but wrong type — croak with a clear message
2336        # showing what type was found so the user can fix their schema
2337        croak(
2338                "$schema_file: $section should be a hash, not ",
2339
5
40
                ref($schema->{$section}) || $schema->{$section}
2340        );
2341}
2342
2343# --------------------------------------------------
2344# _validate_config
2345#
2346# Purpose:    Validate the top-level schema hashref
2347#             loaded from a schema file, checking that
2348#             required fields are present and that all
2349#             input parameters, types, positions, and
2350#             transform properties are well-formed.
2351#
2352# Entry:      $schema - the full parsed schema hashref
2353#             as returned by _load_schema().
2354#
2355# Exit:       Returns nothing on success.
2356#             Croaks on any structural error.
2357#             Carps on non-fatal warnings (unknown
2358#             semantic types, position gaps, missing
2359#             input/output definitions).
2360#
2361# Side effects: May delete $schema->{input} if its
2362#               value is the string 'undef'.
2363#
2364# Notes:      The parameter is named $schema throughout
2365#             to distinguish the top-level schema from
2366#             the nested config sub-hash. _validate_config
2367#             is called before _normalize_config so config
2368#             boolean normalisation has not yet occurred.
2369# --------------------------------------------------
2370sub _validate_config {
2371
100
58176
        my $schema = $_[0];
2372
2373        # At least one of module or function must be present —
2374        # without these we cannot generate any meaningful test
2375
100
149
        if(!defined($schema->{'module'}) && !defined($schema->{'function'})) {
2376
4
19
                croak('At least one of function and module must be defined');
2377        }
2378
2379        # Warn if neither input nor output is defined — a few
2380        # generic tests can still be generated but it is unusual
2381
96
145
        if(!defined($schema->{'input'}) && !defined($schema->{'output'})) {
2382
9
53
                carp('Neither input nor output is defined, only a few tests will be generated');
2383        }
2384
2385        # Normalise input: the string 'undef' means no input defined
2386
96
4720
        if($schema->{'input'} && ref($schema->{input}) ne 'HASH') {
2387
3
5
                if($schema->{'input'} eq 'undef') {
2388
1
1
                        delete $schema->{'input'};
2389                } else {
2390
2
8
                        croak("Invalid input specification: expected hash, got '$schema->{'input'}'");
2391                }
2392        }
2393
2394        # Validate each input parameter if input is defined
2395
94
112
        if($schema->{input}) {
2396
80
119
                _validate_input_params($schema);
2397
79
122
                _validate_input_positions($schema);
2398
79
92
                _validate_input_semantics($schema);
2399        }
2400
2401        # Validate transform property definitions if present
2402
93
190
        if(exists($schema->{transforms}) && ref($schema->{transforms}) eq 'HASH') {
2403
11
20
                _validate_transform_properties($schema);
2404        }
2405
2406        # Validate any nested config sub-hash keys against known types
2407
93
147
        if(ref($schema->{config}) eq 'HASH') {
2408
12
12
13
40
                for my $k (keys %{$schema->{'config'}}) {
2409                        # CONFIG_TYPES is the authoritative list of valid keys
2410                        croak "unknown config setting '$k'"
2411
49
441
44
328
                                unless grep { $_ eq $k } CONFIG_TYPES;
2412                }
2413        }
2414}
2415
2416# --------------------------------------------------
2417# _validate_input_params
2418#
2419# Purpose:    Validate type specifications for each
2420#             named input parameter.
2421#
2422# Entry:      $schema - the full parsed schema hashref.
2423#             $schema->{input} must be a hashref.
2424#
2425# Exit:       Returns nothing. Croaks on invalid type.
2426# --------------------------------------------------
2427sub _validate_input_params {
2428
83
8012
        my $schema = $_[0];
2429
2430
83
83
63
129
        for my $param (keys %{$schema->{input}}) {
2431                # Catch empty parameter names — these would produce
2432                # broken Perl variable names in the generated test
2433
84
93
                croak 'Empty input parameter name'
2434                        unless length($param);
2435
2436
83
81
                my $spec = $schema->{input}{$param};
2437
2438                # Validate the type field — required for all parameters
2439
83
87
                if(ref($spec)) {
2440                        croak("Missing type for parameter '$param'")
2441
35
75
                                unless defined $spec->{type};
2442                        # 'coderef' is a SchemaExtractor-specific type; treat as 'any'
2443
34
45
                        $spec->{type} = 'any' if $spec->{type} eq 'coderef';
2444                        croak("Invalid type '$spec->{type}' for parameter '$param'")
2445
34
61
                                unless _valid_type($spec->{type});
2446                } else {
2447
48
75
                        croak("Invalid type '$spec' for parameter '$param'")
2448                                unless _valid_type($spec);
2449                }
2450        }
2451}
2452
2453# --------------------------------------------------
2454# _validate_input_positions
2455#
2456# Purpose:    Validate positional argument declarations
2457#             in the input schema — positions must be
2458#             non-negative integers with no duplicates,
2459#             and either all or no parameters must have
2460#             positions.
2461#
2462# Entry:      $schema - the full parsed schema hashref.
2463#             $schema->{input} must be a hashref.
2464#
2465# Exit:       Returns nothing. Croaks on invalid or
2466#             duplicate positions. Carps on gaps.
2467# --------------------------------------------------
2468sub _validate_input_positions {
2469
87
12490
        my $schema = $_[0];
2470
2471
87
63
        my $has_positions = 0;
2472
87
63
        my %positions;
2473
2474
87
87
56
103
        for my $param (keys %{$schema->{input}}) {
2475
94
86
                my $spec = $schema->{input}{$param};
2476
2477                # Only process params that explicitly declare a position
2478
94
201
                next unless ref($spec) eq 'HASH' && defined($spec->{position});
2479
2480
30
21
                $has_positions = 1;
2481
30
28
                my $pos = $spec->{position};
2482
2483                # Position must be a non-negative integer
2484
30
77
                croak "Position for '$param' must be a non-negative integer"
2485                        unless $pos =~ /^\d+$/;
2486
2487                # Duplicate positions would produce ambiguous generated tests
2488                croak "Duplicate position $pos for parameters '$positions{$pos}' and '$param'"
2489
29
69
                        if exists $positions{$pos};
2490
2491
27
36
                $positions{$pos} = $param;
2492        }
2493
2494        # If any param has a position, all params must have one
2495
84
108
        if($has_positions) {
2496
18
18
16
28
                for my $param (keys %{$schema->{input}}) {
2497
26
27
                        my $spec = $schema->{input}{$param};
2498
26
65
                        unless(ref($spec) eq 'HASH' && defined($spec->{position})) {
2499
2
8
                                croak "Parameter '$param' missing position " .
2500                                        '(all params must have positions if any do)';
2501                        }
2502                }
2503
2504                # Check for gaps — positions must be a contiguous sequence
2505                # starting at 0, otherwise the generated test will be wrong
2506
16
7
42
13
                my @sorted = sort { $a <=> $b } keys %positions;
2507
16
24
                for my $i (0 .. $#sorted) {
2508
23
44
                        if($sorted[$i] != $i) {
2509
1
4
                                carp "Position sequence has gaps (positions: @sorted)";
2510
1
137
                                last;
2511                        }
2512                }
2513        }
2514}
2515
2516# --------------------------------------------------
2517# _validate_input_semantics
2518#
2519# Purpose:    Validate semantic type annotations and
2520#             enum/memberof constraints on input params.
2521#
2522# Entry:      $schema - the full parsed schema hashref.
2523#             $schema->{input} must be a hashref.
2524#
2525# Exit:       Returns nothing. Croaks on conflicting
2526#             or malformed enum/memberof. Carps on
2527#             unknown semantic types.
2528# --------------------------------------------------
2529sub _validate_input_semantics {
2530
89
16196
        my $schema = $_[0];
2531
2532
89
137
        my $semantic_generators = _get_semantic_generators();
2533
2534
89
89
74
154
        for my $param (keys %{$schema->{input}}) {
2535
89
80
                my $spec = $schema->{input}{$param};
2536
89
247
                next unless ref($spec) eq 'HASH';
2537
2538                # Warn on unknown semantic types rather than croaking —
2539                # new semantic types may be added without updating this list
2540
40
51
                if(defined($spec->{semantic})) {
2541
4
3
                        my $semantic = $spec->{semantic};
2542
4
6
                        unless(exists $semantic_generators->{$semantic}) {
2543                                carp "Unknown semantic type '$semantic' for parameter '$param'. " .
2544                                        'Available types: ' .
2545
2
2
4
18
                                        join(', ', sort keys %{$semantic_generators});
2546                        }
2547                }
2548
2549                # enum and memberof are mutually exclusive representations
2550                # of the same concept — having both is always a schema error
2551
40
275
                if($spec->{'enum'} && $spec->{'memberof'}) {
2552
2
8
                        croak "$param: has both enum and memberof";
2553                }
2554
2555                # Both enum and memberof must be arrayrefs when present
2556
38
40
                for my $type ('enum', 'memberof') {
2557
75
200
                        if(exists $spec->{$type}) {
2558                                croak "$type must be an arrayref"
2559
4
15
                                        unless ref($spec->{$type}) eq 'ARRAY';
2560                        }
2561                }
2562        }
2563}
2564
2565# --------------------------------------------------
2566# _validate_transform_properties
2567#
2568# Purpose:    Validate the properties array in each
2569#             transform definition, checking that each
2570#             property is either a known builtin name
2571#             or a custom hashref with name and code.
2572#
2573# Entry:      $schema - the full parsed schema hashref.
2574#             $schema->{transforms} must be a hashref.
2575#
2576# Exit:       Returns nothing. Croaks on invalid property
2577#             definitions. Carps on unknown builtins.
2578# --------------------------------------------------
2579sub _validate_transform_properties {
2580
17
10358
        my $schema = $_[0];
2581
2582
17
33
        my $builtin_props = _get_builtin_properties();
2583
2584
17
17
17
108
        for my $transform_name (keys %{$schema->{transforms}}) {
2585
15
13
                my $transform = $schema->{transforms}{$transform_name};
2586
2587                # properties is optional — skip transforms that don't define it
2588
15
85
                next unless exists $transform->{properties};
2589
2590                croak "Transform '$transform_name': properties must be an array"
2591
6
12
                        unless ref($transform->{properties}) eq 'ARRAY';
2592
2593
5
5
3
5
                for my $prop (@{$transform->{properties}}) {
2594
5
6
                        if(!ref($prop)) {
2595                                # Plain string — must be a known builtin property name
2596
2
13
                                unless(exists $builtin_props->{$prop}) {
2597                                        carp "Transform '$transform_name': unknown built-in property '$prop'. " .
2598                                                'Available: ' .
2599
1
1
2
7
                                                join(', ', sort keys %{$builtin_props});
2600                                }
2601                        } elsif(ref($prop) eq 'HASH') {
2602                                # Custom property — must have both name and code fields
2603
2
16
                                unless($prop->{name} && $prop->{code}) {
2604
1
22
                                        croak "Transform '$transform_name': " .
2605                                                "custom properties must have 'name' and 'code' fields";
2606                                }
2607                        } else {
2608
1
4
                                croak "Transform '$transform_name': invalid property definition";
2609                        }
2610                }
2611        }
2612}
2613
2614# --------------------------------------------------
2615# _normalize_config
2616#
2617# Purpose:    Normalise boolean string values in the
2618#             config sub-hash to Perl integers (1/0),
2619#             and default absent boolean fields to 1
2620#             (enabled). The 'properties' field is a
2621#             hashref not a boolean and is handled
2622#             separately.
2623#
2624# Entry:      $config - the config sub-hash extracted
2625#             from the schema (i.e. $schema->{config}).
2626#             May be empty.
2627#
2628# Exit:       Returns nothing. Modifies $config in place.
2629#
2630# Side effects: Modifies the caller's config hashref.
2631#
2632# Notes:      String-to-boolean conversion is delegated
2633#             to %Readonly::Values::Boolean::booleans
2634#             which handles 'yes'/'no', 'on'/'off',
2635#             'true'/'false' etc. Fields not present in
2636#             the config hash are defaulted to 1 so
2637#             that test generation is maximally thorough
2638#             unless the schema explicitly disables a
2639#             feature.
2640# --------------------------------------------------
2641sub _normalize_config {
2642
91
12810
        my $config = $_[0];
2643
2644
91
129
        for my $field (CONFIG_TYPES) {
2645                # Non-boolean fields are handled separately
2646
819
933
                next if $field eq $CONFIG_PROPERTIES_KEY;
2647
728
1529
                next if $field eq 'timeout';    # numeric, not boolean; absence means use generated-test default
2648
2649
637
793
                if(exists($config->{$field}) && defined($config->{$field})) {
2650                        # Convert string boolean representations to integers
2651                        # using the lookup table from Readonly::Values::Boolean
2652
460
653
                        if(defined(my $b = $Readonly::Values::Boolean::booleans{$config->{$field}})) {
2653
460
1459
                                $config->{$field} = $b;
2654                        }
2655                } else {
2656                        # Default absent boolean fields to enabled (1) so that
2657                        # test generation is comprehensive unless explicitly disabled
2658
177
156
                        $config->{$field} = 1;
2659                }
2660        }
2661
2662        # Ensure properties is always a hashref — if absent or set to
2663        # a non-hash value, replace with a disabled default so that
2664        # downstream code can safely dereference it without checking ref()
2665
91
89
        $config->{$CONFIG_PROPERTIES_KEY} = { enable => 0 } unless ref($config->{$CONFIG_PROPERTIES_KEY}) eq 'HASH';
2666}
2667
2668# --------------------------------------------------
2669# _valid_type
2670#
2671# Determine whether a string is a
2672#     recognised schema field type accepted
2673#     by the generator.
2674#
2675# Entry:      $type - the type string to validate.
2676#             May be undef.
2677#
2678# Exit:       Returns 1 if the type is known,
2679#             0 if the type is unknown or undef.
2680#
2681# Notes:      The lookup hash is declared with
2682#             'state' so it is built only once per
2683#             process rather than on every call —
2684#             important since _valid_type is called
2685#             in a loop over all input parameters.
2686#
2687#             'int' and 'bool' are accepted as
2688#             aliases for 'integer' and 'boolean'
2689#             respectively, for compatibility with
2690#             schemas generated by external tools
2691#             that use the shorter forms.
2692# --------------------------------------------------
2693sub _valid_type {
2694
143
17400
        my $type = $_[0];
2695
2696        # Undef is never a valid type
2697
143
153
        return 0 unless defined($type);
2698
2699        # Build the lookup table once and cache it for
2700        # the lifetime of the process via 'state'
2701
140
154
107
172
        state %VALID = map { $_ => 1 } qw(
2702                string boolean integer number float
2703                hashref arrayref object int bool any
2704        );
2705
2706
140
322
        return($VALID{$type} // 0);
2707}
2708
2709# --------------------------------------------------
2710# _assert_identifier
2711#
2712# Purpose:    Validate that a string is shaped like a
2713#             plain Perl identifier (or, with
2714#             package => 1, a "::"-separated package
2715#             name) before it is spliced into generated
2716#             test source as a bareword, package name,
2717#             method name, or variable name rather than
2718#             a quoted string literal. Schema-derived
2719#             names (module, function, transform names)
2720#             are spliced unescaped at the call sites
2721#             that use this guard, so an unvalidated
2722#             name could otherwise break out of the
2723#             generated source and inject arbitrary
2724#             Perl into a file that L<prove> will run.
2725#
2726# Entry:      $name - the string to validate.
2727#             $what - short label for the value, used
2728#                     only in the croak message.
2729#             %opts - package => 1 allows "::"
2730#                     separators in $name.
2731#
2732# Exit:       Returns $name unchanged on success.
2733#             Croaks if $name is not identifier-shaped.
2734# --------------------------------------------------
2735sub _assert_identifier {
2736
144
17930
        my ($name, $what, %opts) = @_;
2737
2738
144
277
        croak(__PACKAGE__, ": $what is missing or empty")
2739                unless defined($name) && length($name);
2740
2741        my $re = $opts{package}
2742
142
312
                ? qr/^[A-Za-z_]\w*(?:::[A-Za-z_]\w*)*\z/
2743                : qr/^[A-Za-z_]\w*\z/;
2744
2745
142
545
        croak(__PACKAGE__, ": $what '$name' is not a valid Perl identifier")
2746                unless $name =~ $re;
2747
2748
132
174
        return $name;
2749}
2750
2751# --------------------------------------------------
2752# _validate_module
2753#
2754# Purpose:    Check whether the module named in a
2755#             schema can be found in @INC during
2756#             test generation. Optionally also
2757#             attempts to load it if the
2758#             GENERATOR_VALIDATE_LOAD environment
2759#             variable is set.
2760#
2761# Entry:      $module      - the module name to
2762#                            check. If undef or
2763#                            empty, returns 1
2764#                            immediately (builtin
2765#                            functions need no
2766#                            module).
2767#             $schema_file - path to the schema
2768#                            file, used in warning
2769#                            messages only.
2770#
2771# Exit:       Returns 1 if the module was found
2772#             (and loaded, if validation was
2773#             requested).
2774#             Returns 0 if the module was not
2775#             found or failed to load — this is
2776#             non-fatal; generation continues.
2777#             Returns 1 immediately for undef or
2778#             empty $module.
2779#
2780# Side effects: Prints to STDERR when TEST_VERBOSE
2781#               or GENERATOR_VERBOSE is set.
2782#               Carps (non-fatally) when the module
2783#               cannot be found or loaded.
2784#               May attempt to load the module into
2785#               the current process when
2786#               GENERATOR_VALIDATE_LOAD is set —
2787#               this can have side effects depending
2788#               on the module.
2789#
2790# Notes:      Not finding a module during generation
2791#             is intentionally non-fatal — the module
2792#             may be available on the target machine
2793#             even if not on the generation machine.
2794#             Verbose output goes to STDERR via
2795#             print rather than carp since it is
2796#             informational, not a warning.
2797# --------------------------------------------------
2798sub _validate_module {
2799
31
6098
        my ($module, $schema_file) = @_;
2800
2801        # Builtin functions have no module to validate
2802
31
42
        return 1 unless $module;
2803
2804        # Check whether the module is findable in @INC
2805
29
91
        my $mod_info = check_install(module => $module);
2806
2807
29
43699
        if($schema_file && !$mod_info) {
2808                # Non-fatal — emit a single consolidated warning so
2809                # the caller sees one message rather than four
2810
10
217
                carp(
2811                        "Module '$module' not found in \@INC during generation.\n" .
2812                        "  Config file: $schema_file\n" .
2813                        "  This is OK if the module will be available when tests run.\n" .
2814                        '  If unexpected, check your module name and installation.'
2815                );
2816
10
8126
                return 0;
2817        }
2818
2819        # Check once and reuse — avoids evaluating two env vars twice
2820
19
40
        my $verbose = $ENV{$ENV_TEST_VERBOSE} || $ENV{$ENV_GENERATOR_VERBOSE};
2821
2822
19
136
        if($verbose) {
2823                print STDERR "Found module '$module' at: $mod_info->{'file'}\n",
2824
0
0
                        '  Version: ', ($mod_info->{'version'} || 'unknown'), "\n";
2825        }
2826
2827        # Optional load validation — disabled by default because
2828        # loading a module can have side effects (e.g. BEGIN blocks,
2829        # database connections, file I/O) that are undesirable
2830        # during generation
2831
19
23
        if($ENV{$ENV_VALIDATE_LOAD}) {
2832
0
0
                my $loaded = can_load(modules => { $module => undef }, verbose => 0);
2833
2834
0
0
                if(!$loaded) {
2835
0
0
                        my $err = $Module::Load::Conditional::ERROR || 'unknown error';
2836
0
0
                        carp(
2837                                "Module '$module' found but failed to load: $err\n" .
2838                                '  This might indicate a broken installation or missing dependencies.'
2839                        );
2840
0
0
                        return 0;
2841                }
2842
2843
0
0
                if($verbose) {
2844
0
0
                        print STDERR "Successfully loaded module '$module'\n";
2845                }
2846        }
2847
2848
19
82
        return 1;
2849}
2850
2851 - 2897
=head2 render_fallback

Render any Perl value into a compact Perl source-code string using
L<Data::Dumper>. Used as a catch-all when no more specific renderer
applies.

    my $code = render_fallback({ key => 'value' });
    # returns: "{'key' => 'value'}"

=head3 Arguments

=over 4

=item * C<$v>

Any Perl value, including undef, scalars, refs, and blessed objects.

=back

=head3 Returns

A string of Perl source code that reproduces the value when evaluated.
Returns the string C<'undef'> when C<$v> is undef.

=head3 Side effects

Temporarily sets C<$Data::Dumper::Terse> and C<$Data::Dumper::Indent>
to produce compact single-line output. Both are restored on return via
C<local>.

=head3 Notes

The output is always a single line with no trailing newline. Suitable
for embedding in generated test code where readability is secondary to
correctness.

=head3 API specification

=head4 input

    { v => { type => 'any', optional => 1 } }

=head4 output

    { type => 'string' }

=cut
2898
2899sub render_fallback {
2900
38
7387
        my $v = $_[0];
2901
2902        # Handle undef explicitly rather than letting Dumper produce
2903        # 'undef' without the localised settings applied
2904
38
51
        return 'undef' unless defined $v;
2905
2906        # Use Terse+Indent=0 to produce compact single-line output
2907        # suitable for embedding in generated test code
2908
28
37
        local $Data::Dumper::Terse  = 1;
2909
28
45
        local $Data::Dumper::Indent = 0;
2910
2911
28
109
        my $s = Dumper($v);
2912
2913        # Remove trailing newline that Dumper always appends
2914
28
1023
        chomp $s;
2915
28
55
        return $s;
2916}
2917
2918 - 2972
=head2 render_hash

Render a two-level hashref (parameter name => spec hashref) into Perl
source code suitable for embedding in a generated test file as the
input specification passed to L<Params::Validate::Strict>.

    my $code = render_hash(\%input);

=head3 Arguments

=over 4

=item * C<$href>

A hashref whose values are themselves hashrefs containing field
specifications. A scalar value that is a recognised type string (see
C<_valid_type>) is expanded to C<{ type =E<gt> $value }>. Any other
non-hashref value is skipped with a warning.

=back

=head3 Returns

A string of comma-separated Perl source-code lines, one per key, of
the form:

    'key' => { subkey => value, ... }

Returns an empty string if C<$href> is undef, empty, or not a hashref.

=head3 Side effects

None. Does not modify C<$href>.

=head3 Notes

The C<matches> and C<nomatch> sub-keys are treated specially — their
values are compiled to C<Regexp> objects via C<eval { qr/.../ }> and
then rendered using C<perl_quote> so they appear as C<qr{...}> in the
generated test. This prevents unmatched bracket characters in the
pattern from causing compilation failures.

Other sub-keys are rendered via C<perl_quote>.

=head3 API specification

=head4 input

    { href => { type => 'hashref', optional => 1 } }

=head4 output

    { type => 'string' }

=cut
2973
2974sub render_hash {
2975
52
19348
        my $href = $_[0];
2976
2977        # Return empty string for absent or non-hash input — callers
2978        # treat '' as "no input specification" in the generated test
2979
52
127
        return '' unless $href && ref($href) eq 'HASH';
2980
2981
47
36
        my @lines;
2982
2983
47
47
43
69
        for my $k (sort keys %{$href}) {
2984
38
52
                my $def = $href->{$k};
2985
2986                # Handle scalar shorthand — 'arg1: string' is equivalent to
2987                # 'arg1: { type: string }' and is explicitly supported by the
2988                # validation layer in _validate_input_params
2989
38
92
                unless(defined($def) && ref($def) eq 'HASH') {
2990
6
20
                        if(defined($def) && !ref($def) && _valid_type($def)) {
2991                                # Expand scalar type shorthand to a full spec hashref
2992
4
7
                                $def = { type => $def };
2993                        } else {
2994
2
18
                                carp "render_hash: skipping key '$k' — value is not a hashref or recognised type string";
2995
2
215
                                next;
2996                        }
2997                }
2998
2999
36
28
                my @pairs;
3000
3001
36
36
39
62
                for my $subk (sort keys %{$def}) {
3002                        # Skip undef sub-values — they contribute nothing to the spec
3003
78
77
                        next unless defined $def->{$subk};
3004
3005                        # Validate that reference types are ones we can render —
3006                        # nested hashrefs are not yet supported
3007
77
67
                        if(ref($def->{$subk})) {
3008
0
0
                                unless((ref($def->{$subk}) eq 'ARRAY') ||
3009                                       (ref($def->{$subk}) eq 'Regexp')) {
3010                                        croak(
3011                                                __PACKAGE__,
3012                                                ": $subk is a nested element, not yet supported (",
3013
0
0
                                                ref($def->{$subk}), ')'
3014                                        );
3015                                }
3016                        }
3017
3018                        # matches and nomatch values must be Regexp objects in the
3019                        # generated test — compile raw strings safely via eval so
3020                        # patterns containing [ or \ don't cause compile failures
3021
77
103
                        if(($subk eq $KEY_MATCHES) || ($subk eq $KEY_NOMATCH)) {
3022                                my $re = ref($def->{$subk}) eq 'Regexp'
3023                                        ? $def->{$subk}
3024
3
3
14
27
                                        : eval { qr/$def->{$subk}/ };
3025
3
9
                                if($@ || !defined($re)) {
3026
0
0
                                        carp "render_hash: invalid $subk pattern '$def->{$subk}': $@";
3027
0
0
                                        next;
3028                                }
3029
3
6
                                push @pairs, "$subk => " . perl_quote($re);
3030                        } else {
3031                                # All other sub-keys are rendered via perl_quote which
3032                                # handles scalars, arrayrefs, and Regexp objects correctly
3033
74
392
                                push @pairs, "$subk => " . perl_quote($def->{$subk});
3034                        }
3035                }
3036
3037                # Use "\t" rather than a literal tab for clarity and grep-ability
3038
36
43
                push @lines, "\t" . perl_quote($k) . ' => { ' . join(', ', @pairs) . ' }';
3039        }
3040
3041
47
76
        return join(",\n", @lines);
3042}
3043
3044 - 3086
=head2 render_args_hash

Render a flat hashref into a Perl source-code argument list of the
form C<'key' => value, ...>, suitable for embedding in a function call
in a generated test file.

    my $code = render_args_hash({ type => 'string', min => 1 });
    # returns: "'min' => 1, 'type' => 'string'"

=head3 Arguments

=over 4

=item * C<$href>

A flat hashref of key-value pairs. Values may be scalars, arrayrefs,
or Regexp objects — all are handled by C<perl_quote>.

=back

=head3 Returns

A comma-separated string of C<key => value> pairs sorted by key.
Returns an empty string if C<$href> is undef, empty, or not a hashref.

=head3 Notes

Keys and values are both rendered via C<perl_quote>. In particular,
C<Regexp> values are rendered as C<qr{...}> which is correct for
L<Params::Validate::Strict> and L<Return::Set> schema arguments in
the generated test.

=head3 API specification

=head4 input

    { href => { type => 'hashref', optional => 1 } }

=head4 output

    { type => 'string' }

=cut
3087
3088sub render_args_hash {
3089
110
11393
        my $href = $_[0];
3090
3091        # Return empty string for absent or non-hash input
3092
110
220
        return '' unless $href && ref($href) eq 'HASH';
3093
3094        # Sort keys for deterministic output across runs — important for
3095        # generated test files that are committed to version control
3096        my @pairs = map {
3097
135
131
                perl_quote($_) . ' => ' . perl_quote($href->{$_})
3098
106
106
77
146
        } sort keys %{$href};
3099
3100
106
176
        return join(', ', @pairs);
3101}
3102
3103 - 3145
=head2 render_arrayref_map

Render a hashref whose values are arrayrefs into a Perl source-code
fragment suitable for use as a hash literal in a generated test file.

    my $code = render_arrayref_map({ name => ['', 'a' x 100] });

=head3 Arguments

=over 4

=item * C<$href>

A hashref whose values are arrayrefs. Keys whose values are not
arrayrefs are silently skipped.

=back

=head3 Returns

A comma-separated string of C<'key' => [ val, ... ]> entries, one per
qualifying key, sorted alphabetically. Returns the string C<'()'> if
C<$href> is undef, empty, or not a hashref — this produces an empty
hash assignment in the generated test rather than a syntax error.

=head3 Notes

Array element values are rendered via C<perl_quote> which handles
scalars, arrayrefs, and Regexp objects. Non-arrayref values are
skipped without warning — this is intentional since callers may pass
mixed-value hashes and only want the arrayref entries rendered.

=head3 API specification

=head4 input

    { href => { type => 'hashref', optional => 1 } }

=head4 output

    { type => 'string' }

=cut
3146
3147sub render_arrayref_map {
3148
169
13278
        my $href = $_[0];
3149
3150        # Return '()' rather than '' so callers get a valid empty hash
3151        # literal rather than a syntax error in the generated test
3152
169
310
        return '()' unless $href && ref($href) eq 'HASH';
3153
3154
166
99
        my @entries;
3155
3156
166
166
115
182
        for my $k (sort keys %{$href}) {
3157
10
10
                my $aref = $href->{$k};
3158
3159                # Skip non-arrayref values — mixed hashes are allowed by callers
3160
10
14
                next unless ref($aref) eq 'ARRAY';
3161
3162                # Render each array element via perl_quote so strings are
3163                # properly quoted and numbers are left unquoted
3164
7
12
7
4
12
8
                my $vals = join(', ', map { perl_quote($_) } @{$aref});
3165
3166                # Use "\t" rather than a literal tab for clarity
3167
7
10
                push @entries, "\t" . perl_quote($k) . " => [ $vals ]";
3168        }
3169
3170
166
213
        return join(",\n", @entries);
3171}
3172
3173# --------------------------------------------------
3174# _has_positions
3175#
3176# Purpose:    Determine whether any field in an input
3177#             spec hashref declares a positional argument
3178#             via the 'position' key.
3179#
3180# Entry:      $input_spec - the input section of a parsed
3181#             schema, expected to be a hashref whose values
3182#             are themselves hashrefs containing field specs.
3183#             May be undef or a non-hash ref.
3184#
3185# Exit:       Returns 1 if any field has a defined
3186#             'position' key, 0 otherwise.
3187#
3188# Notes:      Returns 0 immediately for undef or non-hash
3189#             input rather than throwing — callers use the
3190#             return value as a boolean and do not expect
3191#             exceptions from this function.
3192# --------------------------------------------------
3193sub _has_positions {
3194
104
13909
        my $input_spec = $_[0];
3195
3196        # Guard against undef or non-hash input — keys %$undef would throw
3197
104
203
        return 0 unless defined($input_spec) && ref($input_spec) eq 'HASH';
3198
3199
100
100
66
105
        for my $field (keys %{$input_spec}) {
3200                # Only examine fields whose spec is a hashref — scalar specs
3201                # (e.g. input: { type: string }) cannot have positions
3202
89
116
                next unless ref($input_spec->{$field}) eq 'HASH';
3203
3204                # Return immediately on first match — no need to scan further
3205
41
64
                return 1 if defined $input_spec->{$field}{position};
3206        }
3207
3208        # No positional arguments found in any field
3209
74
76
        return 0;
3210}
3211
3212# --------------------------------------------------
3213# q_wrap
3214#
3215# Purpose:    Wrap a string in the most readable
3216#             q{} form that does not require escaping,
3217#             falling back to single-quoted form with
3218#             escaped apostrophes if no delimiter is
3219#             available.
3220#
3221# Entry:      $s - the string to wrap. May be undef.
3222# Exit:       Returns a Perl source-code fragment that
3223#             evaluates to the original string value,
3224#             or the string 'undef' if $s is undef.
3225#
3226# Notes:      index() returns -1 when not found and
3227#             any value >= 0 when found, including 0
3228#             for a delimiter at the start of the
3229#             string. We compare against $INDEX_NOT_FOUND
3230#             to make this boundary explicit and to
3231#             prevent off-by-one mutation survivors.
3232#             See GitHub issue #1.
3233# --------------------------------------------------
3234sub q_wrap {
3235
113
17129
        my $s = $_[0];
3236
3237
113
153
        croak('q_wrap: argument must be a plain string, not a reference') if ref($s);
3238
3239        # Return empty string for undef — this function is a low-level
3240        # string quoter only. Callers that need the Perl literal 'undef'
3241        # for undefined values should use perl_quote() instead, which
3242        # handles the undef -> 'undef' semantic conversion correctly.
3243        # Returning '' here preserves the original behaviour and avoids
3244        # injecting the bare word 'undef' into contexts that expect a
3245        # quoted string value.
3246
113
87
        return "''" unless defined $s;
3247
3248        # Try bracket-form q{} delimiters first — most readable
3249
110
168
        for my $p (@Q_BRACKET_PAIRS) {
3250
124
124
272
121
                my ($l, $r) = @{$p};
3251
3252                # Only use this bracket pair if neither bracket
3253                # appears in the string — both must be checked
3254
124
1487
                return "q$l$s$r" unless $s =~ /\Q$l\E|\Q$r\E/;
3255        }
3256
3257        # Try single-character delimiters in preference order
3258
3
9
        for my $d (@Q_SINGLE_DELIMITERS) {
3259                # index() returns $INDEX_NOT_FOUND (-1) when not found.
3260                # Must use != $INDEX_NOT_FOUND rather than > 0 since
3261                # the delimiter may legitimately appear at position 0
3262
14
82
                return "q$d$s$d" if index($s, $d) == $INDEX_NOT_FOUND;
3263        }
3264
3265        # Last resort — single-quoted string with escaped apostrophes
3266
1
5
        (my $esc = $s) =~ s/'/\\'/g;
3267
1
2
        return "'$esc'";
3268}
3269
3270# --------------------------------------------------
3271# perl_sq
3272#
3273# Purpose:    Escape a string for safe inclusion
3274#             inside a single-quoted Perl string
3275#             literal in generated test code.
3276#
3277# Entry:      $s - the string to escape.
3278# Exit:       Returns the escaped string, or an
3279#             empty string if $s is undef.
3280#
3281# Notes:      NUL byte replacement produces the
3282#             two-character sequence \0 which is
3283#             only correct when the result is used
3284#             inside a double-quoted string context
3285#             in the generated test.
3286#
3287#             The \b substitution (backspace) is
3288#             intentionally omitted — in Perl regex
3289#             context \b means word boundary, not
3290#             backspace, so substituting it here
3291#             would corrupt strings containing word
3292#             boundaries.
3293# --------------------------------------------------
3294sub perl_sq {
3295
374
209516
        my $s = $_[0];
3296
3297
374
272
        croak('perl_sq: argument must be a plain string, not a reference') if ref($s);
3298
3299        # Return empty string for undef — callers that need
3300        # 'undef' literal should use perl_quote instead
3301
374
273
        return '' unless defined $s;
3302
3303        # Escape backslashes first so later substitutions
3304        # don't double-escape already-escaped sequences
3305
372
291
        $s =~ s/\\/\\\\/g;
3306
3307        # Escape apostrophes so they don't terminate the
3308        # surrounding single-quoted string literal
3309
372
234
        $s =~ s/'/\\'/g;
3310
3311        # Escape common control characters to their
3312        # printable two-character escape sequences
3313
372
225
        $s =~ s/\n/\\n/g;
3314
372
225
        $s =~ s/\r/\\r/g;
3315
372
224
        $s =~ s/\t/\\t/g;
3316
372
247
        $s =~ s/\f/\\f/g;
3317
3318        # Replace NUL bytes with \0 — valid only in
3319        # double-quoted string context in generated code
3320
372
209
        $s =~ s/\0/\\0/g;
3321
3322
372
663
        return $s;
3323}
3324
3325 - 3355
=head2 perl_quote

Convert any Perl value into a source-code fragment that reproduces that value
when evaluated in a generated test file.

=head3 Arguments

=over 4

=item * C<$v>

Any Perl value. May be undef, a scalar, an arrayref, a Regexp, or a blessed
object. All types are handled — undef becomes C<'undef'>, the strings
C<'true'>/C<'false'> become the Perl boolean constants C<!!1>/C<!!0>,
numbers are unquoted, other strings are single-quoted, arrayrefs recurse,
Regexps become C<qr{...}>, and anything else (including hashrefs and
blessed objects) falls through to C<render_fallback>.

=back

=head3 API specification

=head4 input

    { v => { type => 'any', optional => 1 } }

=head4 output

    { type => 'string' }

=cut
3356
3357sub perl_quote {
3358
475
257772
        my ($v) = @_;
3359
475
385
        return _perl_quote($v, 0);
3360}
3361
3362sub _perl_quote {
3363
600
11043
        my ($v, $depth) = @_;
3364
600
496
        croak('perl_quote: structure too deeply nested (circular reference?)') if $depth > 100;
3365
3366        # Undef produces the Perl literal 'undef'
3367
599
406
        return 'undef' unless defined $v;
3368
3369        # Convert YAML boolean string literals to Perl
3370        # boolean constants so they survive round-tripping
3371
594
476
        return '!!1' if $v eq 'true';
3372
591
441
        return '!!0' if $v eq 'false';
3373
3374
588
431
        if(ref($v)) {
3375                # Recursively quote each element of an arrayref
3376
141
158
                if(ref($v) eq 'ARRAY') {
3377
111
125
111
71
147
73
                        my @quoted_v = map { _perl_quote($_, $depth + 1) } @{$v};
3378
10
24
                        return '[ ' . join(', ', @quoted_v) . ' ]';
3379                }
3380
3381                # Render Regexp objects as qr{} with modifiers
3382
30
42
                if(ref($v) eq 'Regexp') {
3383
12
117
                        my ($pat, $mods) = regexp_pattern($v);
3384
12
16
                        my $re = "qr{$pat}";
3385
3386                        # Append modifiers (e.g. 'i', 'x') if present
3387
12
15
                        $re .= $mods if $mods;
3388
12
27
                        return $re;
3389                }
3390
3391                # Hashrefs and other reference types fall through
3392                # to render_fallback which uses Data::Dumper
3393
18
25
                return render_fallback($v);
3394        }
3395
3396        # Numeric values are emitted unquoted so the generated
3397        # test performs numeric rather than string comparison
3398
447
644
        return looks_like_number($v) ? $v : "'" . perl_sq($v) . "'";
3399}
3400
3401# --------------------------------------------------
3402# _generate_transform_properties
3403#
3404# Convert a hashref of transform
3405#     specifications into an arrayref of
3406#     LectroTest property definition hashrefs,
3407#     one per transform. Each hashref contains
3408#     all the information needed by
3409#     _render_properties to emit a runnable
3410#     Test::LectroTest property block.
3411#
3412# Entry:      $transforms  - hashref of transform name
3413#                            => transform spec, as
3414#                            loaded from the schema.
3415#             $function    - name of the function under
3416#                            test.
3417#             $module      - module name, or undef for
3418#                            builtin functions.
3419#             $input       - the top-level input spec
3420#                            hashref from the schema
3421#                            (used for position sorting).
3422#             $config      - the normalised config
3423#                            hashref, used to read
3424#                            properties.trials.
3425#             $new         - defined if the function is
3426#                            an object method; the value
3427#                            is not used here since
3428#                            property tests always
3429#                            construct a fresh object
3430#                            via new_ok() with no args.
3431#                            Presence vs absence is the
3432#                            only signal used.
3433#
3434# Exit:       Returns an arrayref of property hashrefs.
3435#             Returns an empty arrayref if no transforms
3436#             produce any testable properties.
3437#             Never returns undef.
3438#
3439# Notes:      Transforms whose input is the string
3440#             'undef' or whose input spec is not a
3441#             hashref are silently skipped — they
3442#             represent error-case transforms that have
3443#             no meaningful generator.
3444#
3445#             The 'WARN' vs 'WARNS' distinction in
3446#             _STATUS: the schema convention uses
3447#             'WARNS' throughout. This function checks
3448#             for 'WARNS' to match that convention.
3449# --------------------------------------------------
3450sub _generate_transform_properties {
3451
11
10172
        my ($transforms, $function, $module, $input, $config, $new) = @_;
3452
3453
11
8
        my @properties;
3454
3455
11
11
8
15
        for my $transform_name (sort keys %{$transforms}) {
3456                # $transform_name is spliced by _render_properties as a Perl
3457                # *variable name* (my $$transform_name = Property {...}), not
3458                # just inside a string literal — reject anything that isn't
3459                # identifier-shaped before it reaches that point.
3460
13
16
                _assert_identifier($transform_name, 'transform name');
3461
3462
12
11
                my $transform   = $transforms->{$transform_name};
3463
3464
12
11
                my $input_spec  = $transform->{input};
3465
3466                # Guard: skip transforms with no input or with the
3467                # YAML scalar 'undef' as their input — these have no
3468                # generator and cannot produce meaningful properties
3469
12
21
                if(!defined($input_spec) ||
3470                   (!ref($input_spec) && $input_spec eq 'undef')) {
3471
2
2
                        next;
3472                }
3473
3474                # Guard: skip transforms whose input is not a hashref —
3475                # must come before the helper calls below so we never
3476                # pass a non-hash to _detect_transform_properties or
3477                # _process_custom_properties
3478
10
18
                next unless ref($input_spec) eq 'HASH';
3479
3480                # Default output spec to empty hash so _STATUS lookups
3481                # below are always safe regardless of schema content
3482
9
11
                my $output_spec = $transform->{output} // {};
3483
3484                # Detect automatic properties from the transform spec
3485                # (range constraints, type preservation, definedness)
3486
9
11
                my @detected_props = _detect_transform_properties(
3487                        $transform_name,
3488                        $input_spec,
3489                        $output_spec
3490                );
3491
3492                # Process any custom properties defined in the schema
3493
9
9
                my @custom_props = ();
3494
9
20
                if(exists($transform->{properties}) &&
3495                   ref($transform->{properties}) eq 'ARRAY') {
3496                        @custom_props = _process_custom_properties(
3497                                $transform->{properties},
3498
0
0
                                $function,
3499                                $module,
3500                                $input_spec,
3501                                $output_spec,
3502                                $new
3503                        );
3504                }
3505
3506                # Combine auto-detected and custom properties into one list
3507
9
10
                my @all_props = (@detected_props, @custom_props);
3508
3509                # Skip this transform if no properties were produced —
3510                # nothing useful to render into the generated test
3511
9
7
                next unless @all_props;
3512
3513                # Build the LectroTest generator specification string,
3514                # one entry per input field that has a generator
3515
9
7
                my @generators;
3516                my @var_names;
3517
3518
9
9
7
9
                for my $field (sort keys %{$input_spec}) {
3519
9
7
                        my $spec = $input_spec->{$field};
3520
3521                        # Skip non-hashref field specs — scalar types
3522                        # like 'string' have no generator sub-structure
3523
9
8
                        next unless ref($spec) eq 'HASH';
3524
3525                        # $field is spliced unescaped into the generated
3526                        # LectroTest generator spec by
3527                        # _schema_to_lectrotest_generator() — reject anything
3528                        # that isn't identifier-shaped first.
3529
9
10
                        _assert_identifier($field, 'input field name');
3530
3531
9
8
                        my $gen = _schema_to_lectrotest_generator($field, $spec);
3532
9
35
                        if(defined($gen) && length($gen)) {
3533
9
6
                                push @generators, $gen;
3534
9
10
                                push @var_names, $field;
3535                        }
3536                }
3537
3538
9
9
                my $gen_spec = join(', ', @generators);
3539
3540                # Build the call expression for the function under test.
3541                # Note: property tests always construct a fresh object
3542                # via new_ok() with no constructor arguments, regardless
3543                # of what $new holds in the caller — the intent here is
3544                # to test the method in isolation, not with specific
3545                # construction state.
3546
9
7
                my $call_code;
3547
9
22
                if($module && defined($new)) {
3548                        # OO mode — construct a fresh object for each trial
3549
1
1
                        $call_code  = "my \$obj = new_ok('$module');";
3550
1
1
                        $call_code .= "\$obj->$function";
3551                } elsif($module && $module ne $MODULE_BUILTIN) {
3552                        # Functional mode with a named module
3553
0
0
                        $call_code = "$module\::$function";
3554                } else {
3555                        # Builtin or unqualified function call
3556
8
7
                        $call_code = $function;
3557                }
3558
3559                # Build the argument list, respecting positional order
3560                # if the input spec declares positions
3561
9
5
                my @args;
3562
9
11
                if(_has_positions($input_spec)) {
3563                        # Sort fields by declared position so the generated
3564                        # call passes arguments in the correct order
3565                        my @sorted = sort {
3566                                $input_spec->{$a}{position} <=>
3567                                $input_spec->{$b}{position}
3568
9
0
9
5
0
446
                        } keys %{$input_spec};
3569
9
9
9
15
                        @args = map { "\$$_" } @sorted;
3570                } else {
3571                        # No positions — use alphabetical order from @var_names
3572
0
0
0
0
                        @args = map { "\$$_" } @var_names;
3573                }
3574
3575
9
10
                my $args_str = join(', ', @args);
3576
3577                # Concatenate all property check expressions with &&
3578                # so the generated property block passes only when
3579                # every check holds
3580
9
29
7
26
                my @checks = map { $_->{code} } @all_props;
3581
9
11
                my $property_checks = join(" &&\n\t", @checks);
3582
3583                # Determine expected behaviour from output _STATUS.
3584                # Note: the schema convention uses 'WARNS' not 'WARN'
3585
9
18
                my $should_die  = ($output_spec->{'_STATUS'} // '') eq 'DIES';
3586
9
11
                my $should_warn = ($output_spec->{'_STATUS'} // '') eq 'WARNS';
3587
3588                push @properties, {
3589                        name             => $transform_name,
3590                        generator_spec   => $gen_spec,
3591                        call_code        => "$call_code($args_str)",
3592                        property_checks  => $property_checks,
3593                        should_die       => $should_die,
3594                        should_warn      => $should_warn,
3595
9
39
                        trials           => $config->{'properties'}{'trials'} // DEFAULT_PROPERTY_TRIALS,
3596                };
3597        }
3598
3599
10
14
        return \@properties;
3600}
3601
3602# --------------------------------------------------
3603# _get_semantic_generators
3604#
3605# Return a hashref of named semantic
3606#     generator definitions for use in
3607#     LectroTest property-based tests.
3608#     Each entry contains a 'code' key
3609#     holding a Gen {} block string and a
3610#     'description' key for documentation
3611#     and validation messages.
3612#
3613# Entry:      None.
3614#
3615# Exit:       Returns a hashref keyed by semantic
3616#             type name. Each value is a hashref
3617#             with 'code' and 'description' keys.
3618#
3619# Notes:      The returned hashref is built fresh
3620#             on every call — callers that need it
3621#             repeatedly should cache the result.
3622#             The 'code' strings are multi-line
3623#             Gen {} blocks; callers are responsible
3624#             for compressing whitespace before
3625#             embedding them in generated test files.
3626# --------------------------------------------------
3627sub _get_semantic_generators {
3628        return {
3629
94
6372
                email => {
3630                        code => q{
3631                                Gen {
3632                                        my $len = 5 + int(rand(10));
3633                                        my @addr;
3634                                        my @tlds = qw(com org net edu gov io co uk de fr);
3635
3636                                        for(my $i = 0; $i < $len; $i++) {
3637                                                push @addr, pack('c', (int(rand 26))+97);
3638                                        }
3639                                        push @addr, '@';
3640                                        $len = 5 + int(rand(10));
3641                                        for(my $i = 0; $i < $len; $i++) {
3642                                                push @addr, pack('c', (int(rand 26))+97);
3643                                        }
3644                                        push @addr, '.';
3645                                        $len = rand($#tlds+1);
3646                                        push @addr, $tlds[$len];
3647                                        return join('', @addr);
3648                                }
3649                        },
3650                        description => 'Valid email addresses',
3651                },
3652
3653                url => {
3654                        code => q{
3655                                Gen {
3656                                        my @schemes = qw(http https);
3657                                        my @tlds = qw(com org net io);
3658                                        my $scheme = $schemes[int(rand(@schemes))];
3659                                        my $domain = join('', map { ('a'..'z')[int(rand(26))] } 1..(5 + int(rand(10))));
3660                                        my $tld = $tlds[int(rand(@tlds))];
3661                                        my $path = join('', map { ('a'..'z', '0'..'9', '-', '_')[int(rand(38))] } 1..int(rand(20)));
3662
3663                                        return "$scheme://$domain.$tld" . ($path ? "/$path" : '');
3664                                }
3665                        },
3666                        description => 'Valid HTTP/HTTPS URLs',
3667                },
3668
3669                uuid => {
3670                        code => q{
3671                                Gen {
3672                                        require UUID::Tiny;
3673                                        UUID::Tiny::create_uuid_as_string(UUID::Tiny::UUID_V4());
3674                                }
3675                        },
3676                        description => 'Valid UUIDv4 identifiers',
3677                },
3678
3679                phone_us => {
3680                        code => q{
3681                                Gen {
3682                                        my $area = 200 + int(rand(800));
3683                                        my $exchange = 200 + int(rand(800));
3684                                        my $subscriber = int(rand(10000));
3685                                        sprintf('%03d-%03d-%04d', $area, $exchange, $subscriber);
3686                                }
3687                        },
3688                        description => 'US phone numbers (XXX-XXX-XXXX format)',
3689                },
3690
3691                phone_e164 => {
3692                        code => q{
3693                                Gen {
3694                                        my $country = 1 + int(rand(999));
3695                                        my $area = 100 + int(rand(900));
3696                                        my $number = int(rand(10000000));
3697                                        sprintf('+%d%03d%07d', $country, $area, $number);
3698                                }
3699                        },
3700                        description => 'E.164 international phone numbers',
3701                },
3702
3703                ipv4 => {
3704                        code => q{
3705                                Gen {
3706                                        join('.', map { int(rand(256)) } 1..4);
3707                                }
3708                        },
3709                        description => 'IPv4 addresses',
3710                },
3711
3712                ipv6 => {
3713                        code => q{
3714                                Gen {
3715                                        join(':', map { sprintf('%04x', int(rand(0x10000))) } 1..8);
3716                                }
3717                        },
3718                        description => 'IPv6 addresses',
3719                },
3720
3721                username => {
3722                        code => q{
3723                                Gen {
3724                                        my $len = 3 + int(rand(13));
3725                                        my @chars = ('a'..'z', '0'..'9', '_', '-');
3726                                        my $first = ('a'..'z')[int(rand(26))];
3727                                        $first . join('', map { $chars[int(rand(@chars))] } 1..($len-1));
3728                                }
3729                        },
3730                        description => 'Valid usernames (alphanumeric with _ and -)',
3731                },
3732
3733                slug => {
3734                        code => q{
3735                                Gen {
3736                                        my @words = qw(quick brown fox jumps over lazy dog hello world test data);
3737                                        my $count = 1 + int(rand(4));
3738                                        join('-', map { $words[int(rand(@words))] } 1..$count);
3739                                }
3740                        },
3741                        description => 'URL slugs (lowercase words separated by hyphens)',
3742                },
3743
3744                hex_color => {
3745                        code => q{
3746                                Gen {
3747                                        sprintf('#%06x', int(rand(0x1000000)));
3748                                }
3749                        },
3750                        description => 'Hex color codes (#RRGGBB)',
3751                },
3752
3753                iso_date => {
3754                        code => q{
3755                                Gen {
3756                                        my $year = 2000 + int(rand(25));
3757                                        my $month = 1 + int(rand(12));
3758                                        my $day = 1 + int(rand(28));
3759                                        sprintf('%04d-%02d-%02d', $year, $month, $day);
3760                                }
3761                        },
3762                        description => 'ISO 8601 date format (YYYY-MM-DD)',
3763                },
3764
3765                iso_datetime => {
3766                        code => q{
3767                                Gen {
3768                                        my $year = 2000 + int(rand(25));
3769                                        my $month = 1 + int(rand(12));
3770                                        my $day = 1 + int(rand(28));
3771                                        my $hour = int(rand(24));
3772                                        my $minute = int(rand(60));
3773                                        my $second = int(rand(60));
3774                                        sprintf('%04d-%02d-%02dT%02d:%02d:%02dZ',
3775                                                $year, $month, $day, $hour, $minute, $second);
3776                                }
3777                        },
3778                        description => 'ISO 8601 datetime format (YYYY-MM-DDTHH:MM:SSZ)',
3779                },
3780
3781                semver => {
3782                        code => q{
3783                                Gen {
3784                                        my $major = int(rand(10));
3785                                        my $minor = int(rand(20));
3786                                        my $patch = int(rand(50));
3787                                        "$major.$minor.$patch";
3788                                }
3789                        },
3790                        description => 'Semantic version strings (major.minor.patch)',
3791                },
3792
3793                jwt => {
3794                        code => q{
3795                                Gen {
3796                                        my @chars = ('A'..'Z', 'a'..'z', '0'..'9', '-', '_');
3797                                        my $header    = join('', map { $chars[int(rand(@chars))] } 1..20);
3798                                        my $payload   = join('', map { $chars[int(rand(@chars))] } 1..40);
3799                                        my $signature = join('', map { $chars[int(rand(@chars))] } 1..30);
3800                                        "$header.$payload.$signature";
3801                                }
3802                        },
3803                        description => 'JWT-like tokens (base64url format)',
3804                },
3805
3806                json => {
3807                        code => q{
3808                                Gen {
3809                                        my @keys = qw(id name value status count);
3810                                        my $key = $keys[int(rand(@keys))];
3811                                        my $value = 1 + int(rand(1000));
3812                                        qq({"$key":$value});
3813                                }
3814                        },
3815                        description => 'Simple JSON objects',
3816                },
3817
3818                base64 => {
3819                        code => q{
3820                                Gen {
3821                                        my @chars = ('A'..'Z', 'a'..'z', '0'..'9', '+', '/');
3822                                        my $len = 12 + int(rand(20));
3823                                        my $str = join('', map { $chars[int(rand(@chars))] } 1..$len);
3824                                        $str .= '=' x (4 - ($len % 4)) if $len % 4;
3825                                        $str;
3826                                }
3827                        },
3828                        description => 'Base64-encoded strings',
3829                },
3830
3831                md5 => {
3832                        code => q{
3833                                Gen {
3834                                        join('', map { sprintf('%x', int(rand(16))) } 1..32);
3835                                }
3836                        },
3837                        description => 'MD5 hashes (32 hex characters)',
3838                },
3839
3840                sha256 => {
3841                        code => q{
3842                                Gen {
3843                                        join('', map { sprintf('%x', int(rand(16))) } 1..64);
3844                                }
3845                        },
3846                        description => 'SHA-256 hashes (64 hex characters)',
3847                },
3848
3849                unix_timestamp => {
3850                        code => q{
3851                                Gen {
3852                                        time;
3853                                }
3854                        },
3855                        description => 'Unix timestamps (seconds since epoch)',
3856                },
3857        };
3858}
3859
3860# --------------------------------------------------
3861# _get_builtin_properties
3862#
3863# Purpose:    Return a hashref of named built-in
3864#             property templates that can be
3865#             referenced by name in a transform's
3866#             'properties' list in the schema.
3867#             Each entry contains a 'description'
3868#             string, a 'code_template' coderef, and
3869#             an 'applicable_to' arrayref.
3870#
3871# Entry:      None.
3872#
3873# Exit:       Returns a hashref keyed by property
3874#             name. Each value is a hashref with
3875#             'description', 'code_template', and
3876#             'applicable_to' keys.
3877#
3878# Notes:      'applicable_to' lists the types for
3879#             which each property is meaningful. It
3880#             is stored for documentation purposes
3881#             and potential future filtering — it is
3882#             not currently enforced by any caller.
3883#
3884#             Each 'code_template' coderef receives
3885#             three arguments: ($function, $call_code,
3886#             $input_vars). Most templates use only
3887#             $call_code; $function and $input_vars
3888#             are provided for templates that need
3889#             them (e.g. idempotent, length_preserved,
3890#             preserves_keys).
3891#
3892#             'monotonic_increasing' has been
3893#             intentionally omitted. A correct
3894#             implementation requires calling the
3895#             function twice with ordered inputs,
3896#             which the current single-call property
3897#             framework does not support. A
3898#             placeholder that unconditionally returns
3899#             true would give false confidence and has
3900#             therefore been removed.
3901# --------------------------------------------------
3902sub _get_builtin_properties {
3903        return {
3904                idempotent => {
3905                        description   => 'Function is idempotent: f(f(x)) == f(x)',
3906                        code_template => sub {
3907
2
14711
                                my ($function, $call_code, $input_vars) = @_;
3908
3909                                # String comparison works for all scalar types in Perl —
3910                                # numeric values stringify consistently for eq
3911
2
4
                                return "do { my \$tmp = $call_code; \$result eq \$tmp }";
3912                        },
3913                        applicable_to => ['all'],
3914                },
3915
3916                non_negative => {
3917                        description   => 'Result is always non-negative',
3918                        code_template => sub {
3919
3
267
                                my ($function, $call_code, $input_vars) = @_;
3920
3
3
                                return '$result >= 0';
3921                        },
3922                        applicable_to => ['number', 'integer', 'float'],
3923                },
3924
3925                positive => {
3926                        description   => 'Result is always positive (> 0)',
3927                        code_template => sub {
3928
2
276
                                my ($function, $call_code, $input_vars) = @_;
3929
2
3
                                return '$result > 0';
3930                        },
3931                        applicable_to => ['number', 'integer', 'float'],
3932                },
3933
3934                non_empty => {
3935                        description   => 'Result is never empty',
3936                        code_template => sub {
3937
2
303
                                my ($function, $call_code, $input_vars) = @_;
3938
2
3
                                return 'length($result) > 0';
3939                        },
3940                        applicable_to => ['string'],
3941                },
3942
3943                length_preserved => {
3944                        description   => 'Output length equals input length',
3945                        code_template => sub {
3946
2
272
                                my ($function, $call_code, $input_vars) = @_;
3947
2
2
                                my $first_var = $input_vars->[0];
3948
2
3
                                return "length(\$result) == length(\$$first_var)";
3949                        },
3950                        applicable_to => ['string'],
3951                },
3952
3953                uppercase => {
3954                        description   => 'Result is all uppercase',
3955                        code_template => sub {
3956
2
264
                                my ($function, $call_code, $input_vars) = @_;
3957
2
2
                                return '$result eq uc($result)';
3958                        },
3959                        applicable_to => ['string'],
3960                },
3961
3962                lowercase => {
3963                        description   => 'Result is all lowercase',
3964                        code_template => sub {
3965
2
331
                                my ($function, $call_code, $input_vars) = @_;
3966
2
4
                                return '$result eq lc($result)';
3967                        },
3968                        applicable_to => ['string'],
3969                },
3970
3971                trimmed => {
3972                        description   => 'Result has no leading or trailing whitespace',
3973                        code_template => sub {
3974
2
295
                                my ($function, $call_code, $input_vars) = @_;
3975
2
2
                                return '$result !~ /^\s/ && $result !~ /\s$/';
3976                        },
3977                        applicable_to => ['string'],
3978                },
3979
3980                sorted_ascending => {
3981                        description   => 'Array is sorted in ascending order',
3982                        code_template => sub {
3983
2
264
                                my ($function, $call_code, $input_vars) = @_;
3984
2
2
                                return 'do { my @arr = @$result; my $sorted = 1; ' .
3985                                        'for my $i (1..$#arr) { $sorted = 0 if $arr[$i] < $arr[$i-1]; } ' .
3986                                        '$sorted }';
3987                        },
3988                        applicable_to => ['arrayref'],
3989                },
3990
3991                sorted_descending => {
3992                        description   => 'Array is sorted in descending order',
3993                        code_template => sub {
3994
2
266
                                my ($function, $call_code, $input_vars) = @_;
3995
2
3
                                return 'do { my @arr = @$result; my $sorted = 1; ' .
3996                                        'for my $i (1..$#arr) { $sorted = 0 if $arr[$i] > $arr[$i-1]; } ' .
3997                                        '$sorted }';
3998                        },
3999                        applicable_to => ['arrayref'],
4000                },
4001
4002                unique_elements => {
4003                        description   => 'Array has no duplicate elements',
4004                        code_template => sub {
4005
2
271
                                my ($function, $call_code, $input_vars) = @_;
4006
2
2
                                return 'do { my @arr = @$result; my %seen; !grep { $seen{$_}++ } @arr }';
4007                        },
4008                        applicable_to => ['arrayref'],
4009                },
4010
4011                preserves_keys => {
4012                        description   => 'Hash has same keys as input',
4013                        code_template => sub {
4014
2
139
                                my ($function, $call_code, $input_vars) = @_;
4015
2
2
                                my $first_var = $input_vars->[0];
4016
2
4
                                return 'do { my @in  = sort keys %{$' . $first_var . '}; ' .
4017                                        'my @out = sort keys %$result; ' .
4018                                        'join(",", @in) eq join(",", @out) }';
4019                        },
4020
28
32594
                        applicable_to => ['hashref'],
4021                },
4022        };
4023}
4024
4025# --------------------------------------------------
4026# _schema_to_lectrotest_generator
4027#
4028# Purpose:    Convert a single schema field spec
4029#             hashref into a LectroTest generator
4030#             declaration string of the form
4031#             '$field <- Generator(...)'.
4032#             Used to build the ##[ ... ]## generator
4033#             block inside a Property definition.
4034#
4035# Entry:      $field_name - the parameter name as it
4036#                           will appear in the
4037#                           generated test code.
4038#             $spec       - hashref containing at
4039#                           minimum a 'type' key.
4040#                           May also contain 'min',
4041#                           'max', 'semantic', and
4042#                           'matches' keys depending
4043#                           on type.
4044#
4045# Exit:       Returns a string of the form
4046#             '$field <- Generator(...)' on success.
4047#             Returns undef if the spec is not a
4048#             hashref or if range constraints are
4049#             invalid (min >= max for numeric types).
4050#             Returns a String generator with a carp
4051#             warning for unknown types.
4052#
4053# Side effects: Carps on unknown semantic types,
4054#               invalid numeric ranges, and unknown
4055#               field types.
4056#
4057# Notes:      Semantic generators are checked first
4058#             for string fields and take precedence
4059#             over the regular string generator.
4060#             The $input_spec parameter in the type-
4061#             detection helpers is reserved for future
4062#             use and is currently unused.
4063# --------------------------------------------------
4064sub _schema_to_lectrotest_generator {
4065
34
17204
        my ($field_name, $spec) = @_;
4066
4067        # Guard: must be a hashref to dereference safely
4068
34
71
        return unless defined($spec) && ref($spec) eq 'HASH';
4069
4070        # Default to string when no type is declared
4071
31
33
        my $type = $spec->{'type'} || $DEFAULT_FIELD_TYPE;
4072
4073        # --------------------------------------------------
4074        # Semantic generators take precedence for string
4075        # fields — they produce realistic domain-specific
4076        # values rather than random character sequences
4077        # --------------------------------------------------
4078
31
53
        if($type eq 'string' && defined($spec->{'semantic'})) {
4079
1
1
                my $semantic_type = $spec->{'semantic'};
4080
1
2
                my $generators    = _get_semantic_generators();
4081
4082
1
1
                if(exists($generators->{$semantic_type})) {
4083
1
1
                        my $gen_code = $generators->{$semantic_type}{'code'};
4084
4085                        # Compress the multi-line generator code into a
4086                        # single line for embedding in the ##[ ]## block
4087
1
3
                        $gen_code =~ s/^\s+//;
4088
1
7
                        $gen_code =~ s/\s+$//;
4089
1
6
                        $gen_code =~ s/\n\s+/ /g;
4090
4091
1
5
                        return "$field_name <- $gen_code";
4092                } else {
4093
0
0
                        carp "Unknown semantic type '$semantic_type', " .
4094                                "falling back to regular string generator";
4095                        # Fall through to regular string generation below
4096                }
4097        }
4098
4099        # --------------------------------------------------
4100        # Integer generator
4101        # --------------------------------------------------
4102
30
30
        if($type eq 'integer') {
4103
6
4
                my $min = $spec->{'min'};
4104
6
6
                my $max = $spec->{'max'};
4105
4106
6
14
                if(!defined($min) && !defined($max)) {
4107                        # Unconstrained — use LectroTest's built-in Int
4108
3
6
                        return "$field_name <- Int";
4109                } elsif(!defined($min)) {
4110                        # Only max defined — generate 0 to max
4111
0
0
                        return "$field_name <- Int(sized => sub { int(rand($max + 1)) })";
4112                } elsif(!defined($max)) {
4113                        # Only min defined — generate min to min + range
4114
0
0
                        return "$field_name <- Int(sized => sub { $min + int(rand($DEFAULT_GENERATOR_RANGE)) })";
4115                } else {
4116                        # Both defined — generate within [min, max]
4117
3
3
                        my $range = $max - $min;
4118
3
8
                        return "$field_name <- Int(sized => sub { $min + int(rand($range + 1)) })";
4119                }
4120        }
4121
4122        # --------------------------------------------------
4123        # Float / number generator
4124        # --------------------------------------------------
4125
24
51
        if($type eq 'number' || $type eq 'float') {
4126
12
9
                my $min = $spec->{'min'};
4127
12
9
                my $max = $spec->{'max'};
4128
4129
12
29
                if(!defined($min) && !defined($max)) {
4130                        # Unconstrained — symmetric range around zero
4131
3
7
                        return "$field_name <- Float(sized => sub { rand($DEFAULT_GENERATOR_RANGE) - $DEFAULT_GENERATOR_RANGE / 2 })";
4132
4133                } elsif(!defined($min)) {
4134                        # Only max defined — choose range based on sign of max
4135
4
7
                        if($max == $ZERO_BOUNDARY) {
4136                                # max=0: negative numbers only
4137
4
17
                                return "$field_name <- Float(sized => sub { -rand($DEFAULT_GENERATOR_RANGE) })";
4138                        } elsif($max > $ZERO_BOUNDARY) {
4139                                # Positive max: generate 0 to max
4140
0
0
                                return "$field_name <- Float(sized => sub { rand($max) })";
4141                        } else {
4142                                # Negative max: generate from (max - range) to max
4143
0
0
                                return "$field_name <- Float(sized => sub { ($max - $DEFAULT_GENERATOR_RANGE) + rand($DEFAULT_GENERATOR_RANGE + $max) })";
4144                        }
4145
4146                } elsif(!defined($max)) {
4147                        # Only min defined — choose range based on sign of min
4148
3
3
                        if($min == $ZERO_BOUNDARY) {
4149                                # min=0: positive numbers only
4150
3
10
                                return "$field_name <- Float(sized => sub { rand($DEFAULT_GENERATOR_RANGE) })";
4151                        } elsif($min > $ZERO_BOUNDARY) {
4152                                # Positive min: generate min to min + range
4153
0
0
                                return "$field_name <- Float(sized => sub { $min + rand($DEFAULT_GENERATOR_RANGE) })";
4154                        } else {
4155                                # Negative min: generate from min to min + range
4156
0
0
                                return "$field_name <- Float(sized => sub { $min + rand(-$min + $DEFAULT_GENERATOR_RANGE) })";
4157                        }
4158
4159                } else {
4160                        # Both min and max defined — validate then generate
4161
2
2
                        my $range = $max - $min;
4162
2
4
                        if($range <= $ZERO_BOUNDARY) {
4163
2
15
                                carp "Invalid range for '$field_name': min=$min, max=$max";
4164                                # Return undef rather than emitting a degenerate
4165                                # generator that would silently produce wrong values
4166
2
191
                                return;
4167                        }
4168
0
0
                        return "$field_name <- Float(sized => sub { $min + rand($range) })";
4169                }
4170        }
4171
4172        # --------------------------------------------------
4173        # String generator
4174        # --------------------------------------------------
4175
12
13
        if($type eq 'string') {
4176
7
12
                my $min_len = $spec->{'min'} // 0;
4177
7
18
                my $max_len = $spec->{'max'} // $DEFAULT_MAX_STRING_LEN;
4178
4179                # If a regex pattern is declared, delegate to
4180                # Data::Random::String::Matches for pattern-aware generation
4181
7
28
                if(defined($spec->{'matches'})) {
4182
3
3
                        my $pattern = $spec->{'matches'};
4183
4184                        # Compile the pattern safely rather than splicing the raw
4185                        # string into qr/$pattern/ — the raw form lets a pattern
4186                        # containing an unescaped '/' break out of the qr//
4187                        # delimiter and inject arbitrary Perl into the generated
4188                        # test. regexp_pattern() decomposes the already-compiled
4189                        # Regexp object back into pattern text that is guaranteed
4190                        # to be a self-contained regex body, safe to re-embed.
4191
3
3
5
25
                        my $compiled = ref($pattern) eq 'Regexp' ? $pattern : eval { qr/$pattern/ };
4192
3
8
                        if($@ || !defined($compiled)) {
4193
0
0
                                carp "Invalid matches pattern '$pattern' for field '$field_name': $@";
4194
0
0
                                return "$field_name <- String(length => [$min_len, $max_len])";
4195                        }
4196
3
8
                        my ($pat, $mods) = regexp_pattern($compiled);
4197
3
6
                        my $safe_re = "qr{$pat}" . ($mods // '');
4198
4199
3
8
                        if(defined($spec->{'max'})) {
4200
0
0
                                return "$field_name <- Gen { Data::Random::String::Matches->create_random_string({ regex => $safe_re, length => $spec->{'max'} }) }";
4201                        } elsif(defined($spec->{'min'})) {
4202
0
0
                                return "$field_name <- Gen { Data::Random::String::Matches->create_random_string({ regex => $safe_re, length => $spec->{'min'} }) }";
4203                        } else {
4204
3
10
                                return "$field_name <- Gen { Data::Random::String::Matches->create_random_string({ regex => $safe_re }) }";
4205                        }
4206                }
4207
4208
4
8
                return "$field_name <- String(length => [$min_len, $max_len])";
4209        }
4210
4211        # --------------------------------------------------
4212        # Boolean generator
4213        # --------------------------------------------------
4214
5
6
        if($type eq 'boolean') {
4215
2
5
                return "$field_name <- Bool";
4216        }
4217
4218        # --------------------------------------------------
4219        # Arrayref generator
4220        # --------------------------------------------------
4221
3
5
        if($type eq 'arrayref') {
4222
2
5
                my $min_size = $spec->{'min'} // 0;
4223
2
6
                my $max_size = $spec->{'max'} // $DEFAULT_MAX_COLLECTION_SIZE;
4224
2
9
                return "$field_name <- List(Int, length => [$min_size, $max_size])";
4225        }
4226
4227        # --------------------------------------------------
4228        # Hashref generator
4229        # LectroTest has no built-in Hash generator so we
4230        # use Elements over a pre-built list of hashrefs
4231        # --------------------------------------------------
4232
1
1
        if($type eq 'hashref') {
4233
1
8
                my $min_keys = $spec->{'min'} // 0;
4234
1
1
                my $max_keys = $spec->{'max'} // $DEFAULT_MAX_COLLECTION_SIZE;
4235
1
3
                return "$field_name <- Elements(map { my \%h; for (1..\$_) { \$h{'key'.\$_} = \$_ }; \\\%h } $min_keys..$max_keys)";
4236        }
4237
4238        # --------------------------------------------------
4239        # Unknown type — fall back to String with a warning
4240        # --------------------------------------------------
4241
0
0
        carp "Unknown type '$type' for '$field_name' LectroTest generator, using String";
4242
0
0
        return "$field_name <- String";
4243}
4244
4245# --------------------------------------------------
4246# _is_numeric_transform
4247#
4248# Determine whether a transform's output
4249#     spec declares a numeric type, indicating
4250#     that numeric range properties should be
4251#     generated for it.
4252#
4253# Entry:      $input_spec  - the transform's input
4254#                            spec hashref. Currently
4255#                            unused; reserved for
4256#                            future input-type checks.
4257#             $output_spec - the transform's output
4258#                            spec hashref.
4259#
4260# Exit:       Returns 1 if the output type is one of
4261#             'number', 'integer', or 'float'.
4262#             Returns 0 otherwise.
4263# --------------------------------------------------
4264sub _is_numeric_transform {
4265
37
4263
        my ($input_spec, $output_spec) = @_;
4266
4267        # $input_spec is currently unused — reserved for future
4268        # input-side type checking when detecting mixed transforms
4269
37
59
        my $out_type = ($output_spec // {})->{'type'} // '';
4270
4271
37
92
        return($out_type eq 'number' || $out_type eq 'integer' || $out_type eq 'float');
4272}
4273
4274# --------------------------------------------------
4275# _is_string_transform
4276#
4277# Purpose:    Determine whether a transform's output
4278#             spec declares a string type, indicating
4279#             that string length and pattern properties
4280#             should be generated for it.
4281#
4282# Entry:      $input_spec  - the transform's input
4283#                            spec hashref. Currently
4284#                            unused; reserved for
4285#                            future input-type checks.
4286#             $output_spec - the transform's output
4287#                            spec hashref.
4288#
4289# Exit:       Returns 1 if the output type is 'string'.
4290#             Returns 0 otherwise.
4291# --------------------------------------------------
4292sub _is_string_transform {
4293
31
3422
        my ($input_spec, $output_spec) = @_;
4294
4295        # $input_spec is currently unused — reserved for future
4296        # input-side type checking when detecting mixed transforms
4297
31
48
        my $out_type = ($output_spec // {})->{'type'} // '';
4298
4299
31
40
        return($out_type eq 'string');
4300}
4301
4302# --------------------------------------------------
4303# _same_type
4304#
4305# Purpose:    Determine whether the dominant type of
4306#             a transform's input and output specs
4307#             match, indicating that type-preservation
4308#             properties are meaningful.
4309#
4310# Entry:      $input_spec  - the transform's input
4311#                            spec hashref, or a nested
4312#                            multi-field hashref.
4313#             $output_spec - the transform's output
4314#                            spec hashref.
4315#
4316# Exit:       Returns 1 if the dominant input and
4317#             output types are identical strings.
4318#             Returns 0 otherwise.
4319#
4320# Notes:      Uses _get_dominant_type for both sides.
4321#             For multi-field input specs, dominant
4322#             type is the type of the first field
4323#             encountered — this is a simplification.
4324#             TODO: extend to handle mixed-type inputs
4325#             by checking all fields, not just the
4326#             first one found.
4327# --------------------------------------------------
4328sub _same_type {
4329
31
4638
        my ($input_spec, $output_spec) = @_;
4330
4331        # Guard: treat missing specs as untyped — two untyped
4332        # specs both default to $DEFAULT_FIELD_TYPE and would
4333        # compare equal, which is intentionally conservative
4334
31
40
        my $in_type  = _get_dominant_type($input_spec  // {});
4335
31
43
        my $out_type = _get_dominant_type($output_spec // {});
4336
4337
31
47
        return($in_type eq $out_type);
4338}
4339
4340# --------------------------------------------------
4341# _get_dominant_type
4342#
4343# Purpose:    Extract the most representative type
4344#             string from a spec hashref. For flat
4345#             output specs this is simply the 'type'
4346#             key. For multi-field input specs it is
4347#             the type of the first sub-field found
4348#             that declares one.
4349#
4350# Entry:      $spec - a spec hashref. May be a flat
4351#                     output spec ({ type => '...' })
4352#                     or a multi-field input spec
4353#                     ({ field => { type => '...' } }).
4354#                     May be undef or empty.
4355#
4356# Exit:       Returns a type string. Returns
4357#             $DEFAULT_FIELD_TYPE ('string') if no
4358#             type can be determined.
4359# --------------------------------------------------
4360sub _get_dominant_type {
4361
93
6613
        my $spec = $_[0];
4362
4363        # Guard: return default for undef or non-hash input
4364
93
125
        return $DEFAULT_FIELD_TYPE
4365                unless defined($spec) && ref($spec) eq 'HASH';
4366
4367        # Flat spec — type declared directly
4368
91
97
        return $spec->{'type'} if defined($spec->{'type'});
4369
4370        # Multi-field spec — return the type of the first
4371        # sub-field that declares one
4372
36
36
35
41
        for my $field (keys %{$spec}) {
4373
31
47
                next unless ref($spec->{$field}) eq 'HASH';
4374                return $spec->{$field}{'type'}
4375
29
47
                        if defined($spec->{$field}{'type'});
4376        }
4377
4378        # No type found anywhere — return the safe default
4379
8
9
        return $DEFAULT_FIELD_TYPE;
4380}
4381
4382# --------------------------------------------------
4383# _render_properties
4384#
4385# Purpose:    Render an arrayref of property definition
4386#             hashrefs (as produced by
4387#             _generate_transform_properties) into a
4388#             string of Perl source code suitable for
4389#             embedding in a generated test file.
4390#             The output uses Test::LectroTest::Compat
4391#             to run each property as a holds() check.
4392#
4393# Entry:      $properties - arrayref of property
4394#             hashrefs, each containing: name,
4395#             generator_spec, call_code,
4396#             property_checks, should_die,
4397#             should_warn, trials.
4398#             May be undef or an empty arrayref.
4399#
4400# Exit:       Returns a string of Perl source code.
4401#             Returns an empty string if $properties
4402#             is undef, not an arrayref, or empty.
4403#
4404# Notes:      The generated code uses 4-space
4405#             indentation deliberately — this is the
4406#             indentation style of the generated test
4407#             file, not of this module. Tabs are used
4408#             in this module's own source; spaces are
4409#             emitted into generated output for
4410#             readability of the produced test files.
4411# --------------------------------------------------
4412sub _render_properties {
4413
12
9894
        my $properties = $_[0];
4414
4415        # Return empty string for absent or non-array input —
4416        # callers treat '' as no property block to emit
4417
12
29
        return '' unless defined($properties) && ref($properties) eq 'ARRAY';
4418
9
9
6
18
        return '' unless @{$properties};
4419
4420
7
10
        my $code = "use_ok('Test::LectroTest::Compat');\n\n";
4421
4422
7
7
3
8
        for my $prop (@{$properties}) {
4423                # Emit a labelled Property block for each transform property
4424
10
14
                $code .= "# Transform property: $prop->{'name'}\n";
4425
10
10
                $code .= "my \$$prop->{'name'} = Property {\n";
4426
10
8
                $code .= "    ##[ $prop->{'generator_spec'} ]##\n";
4427
10
9
                $code .= "    \n";
4428
10
7
                $code .= "    my \$result = eval { $prop->{'call_code'} };\n";
4429
4430
10
13
                if($prop->{'should_die'}) {
4431                        # For transforms that expect death, pass if the
4432                        # eval caught an exception
4433
2
3
                        $code .= "    my \$died = defined(\$\@) && \$\@;\n";
4434
2
2
                        $code .= "    \$died;\n";
4435                } else {
4436                        # For normal transforms, pass only if no exception
4437                        # was thrown and all property checks hold
4438
8
8
                        $code .= "    my \$error = \$\@;\n";
4439
8
5
                        $code .= "    \n";
4440
8
7
                        $code .= "    !\$error && (\n";
4441
8
4
                        $code .= "        $prop->{'property_checks'}\n";
4442
8
7
                        $code .= "    );\n";
4443                }
4444
4445
10
12
                $code .= "}, name => '$prop->{'name'}', trials => $prop->{'trials'};\n\n";
4446
10
9
                $code .= "holds(\$$prop->{'name'});\n";
4447        }
4448
4449
7
12
        return $code;
4450}
4451
4452# --------------------------------------------------
4453# _detect_transform_properties
4454#
4455# Purpose:    Automatically derive a list of testable
4456#             LectroTest property hashrefs from a
4457#             transform's input and output specs.
4458#             Detects numeric range constraints, exact
4459#             value matches, string length constraints,
4460#             type preservation, and definedness.
4461#
4462# Entry:      $transform_name - string name of the
4463#                               transform, used for
4464#                               heuristic matching
4465#                               (e.g. 'positive').
4466#             $input_spec     - the transform's input
4467#                               hashref, or the string
4468#                               'undef'.
4469#             $output_spec    - the transform's output
4470#                               hashref, or undef if
4471#                               absent.
4472#
4473# Exit:       Returns a list of property hashrefs,
4474#             each containing 'name' and 'code' keys.
4475#             Returns an empty list if no properties
4476#             can be detected or if $input_spec is
4477#             undef or the string 'undef'.
4478#
4479# Notes:      The 'positive' heuristic checks the
4480#             transform name case-insensitively against
4481#             $TRANSFORM_POSITIVE_PATTERN and adds a
4482#             non-negative constraint if matched.
4483#             This is intentionally a rough heuristic
4484#             rather than a precise semantic check.
4485# --------------------------------------------------
4486sub _detect_transform_properties {
4487
28
15087
        my ($transform_name, $input_spec, $output_spec) = @_;
4488
4489
28
22
        my @properties;
4490
4491        # Guard: skip undef input and the YAML scalar 'undef'
4492
28
29
        return @properties unless defined($input_spec);
4493
26
42
        return @properties if(!ref($input_spec) && $input_spec eq 'undef');
4494
4495        # Default output spec to empty hash so all key lookups
4496        # below are safe regardless of what the schema provides
4497
24
22
        $output_spec //= {};
4498
4499        # --------------------------------------------------
4500        # Property 1: Output range constraints (numeric)
4501        # --------------------------------------------------
4502
24
22
        if(_is_numeric_transform($input_spec, $output_spec)) {
4503
15
18
                if(defined($output_spec->{'min'})) {
4504
11
9
                        my $min = $output_spec->{'min'};
4505
11
17
                        push @properties, {
4506                                name => 'min_constraint',
4507                                code => "defined(\$result) && looks_like_number(\$result) && \$result >= $min",
4508                        };
4509                }
4510
4511
15
22
                if(defined($output_spec->{'max'})) {
4512
2
2
                        my $max = $output_spec->{'max'};
4513
2
5
                        push @properties, {
4514                                name => 'max_constraint',
4515                                code => "defined(\$result) && looks_like_number(\$result) && \$result <= $max",
4516                        };
4517                }
4518
4519                # Heuristic: transforms named 'positive' (case-insensitive)
4520                # imply a non-negative result constraint
4521
15
29
                if($transform_name =~ /$TRANSFORM_POSITIVE_PATTERN/i) {
4522
6
26
                        push @properties, {
4523                                name => 'non_negative',
4524                                code => "defined(\$result) && looks_like_number(\$result) && \$result >= 0",
4525                        };
4526                }
4527        }
4528
4529        # --------------------------------------------------
4530        # Property 2: Specific value output
4531        # --------------------------------------------------
4532
24
66
        if(defined($output_spec->{'value'})) {
4533
2
2
                my $expected = $output_spec->{'value'};
4534
4535                # Numeric refs use == for comparison; scalars use eq
4536                # via perl_quote to produce the correct quoted literal
4537
2
4
                push @properties, {
4538                        name => 'exact_value',
4539                        code => ref($expected)
4540                                ? "\$result == $expected"
4541                                : "\$result eq " . perl_quote($expected),
4542                };
4543        }
4544
4545        # --------------------------------------------------
4546        # Property 3: String length constraints
4547        # --------------------------------------------------
4548
24
29
        if(_is_string_transform($input_spec, $output_spec)) {
4549
6
7
                if(defined($output_spec->{'min'})) {
4550
2
4
                        push @properties, {
4551                                name => 'min_length',
4552                                code => "length(\$result) >= $output_spec->{'min'}",
4553                        };
4554                }
4555
4556
6
6
                if(defined($output_spec->{'max'})) {
4557
0
0
                        push @properties, {
4558                                name => 'max_length',
4559                                code => "length(\$result) <= $output_spec->{'max'}",
4560                        };
4561                }
4562
4563
6
6
                if(defined($output_spec->{'matches'})) {
4564
0
0
                        my $pattern = $output_spec->{'matches'};
4565
4566                        # See the matching comment in _schema_to_lectrotest_generator —
4567                        # compile first and re-embed via regexp_pattern() rather than
4568                        # splicing the raw string into qr/$pattern/, which would let
4569                        # an unescaped '/' break out of the delimiter.
4570
0
0
0
0
                        my $compiled = ref($pattern) eq 'Regexp' ? $pattern : eval { qr/$pattern/ };
4571
0
0
                        if($@ || !defined($compiled)) {
4572
0
0
                                carp "Invalid matches pattern '$pattern' for transform '$transform_name': $@";
4573                        } else {
4574
0
0
                                my ($pat, $mods) = regexp_pattern($compiled);
4575
0
0
                                my $safe_re = "qr{$pat}" . ($mods // '');
4576
0
0
                                push @properties, {
4577                                        name => 'pattern_match',
4578                                        code => "\$result =~ $safe_re",
4579                                };
4580                        }
4581                }
4582        }
4583
4584        # --------------------------------------------------
4585        # Property 4: Type preservation
4586        # --------------------------------------------------
4587
24
22
        if(_same_type($input_spec, $output_spec)) {
4588
22
22
                my $type = _get_dominant_type($output_spec);
4589
4590                # Only emit a numeric_type check for numeric types —
4591                # string and other types have no equivalent simple check
4592
22
32
                if($type eq 'number' || $type eq 'integer' || $type eq 'float') {
4593
15
21
                        push @properties, {
4594                                name => 'numeric_type',
4595                                code => 'looks_like_number($result)',
4596                        };
4597                }
4598        }
4599
4600        # --------------------------------------------------
4601        # Property 5: Definedness
4602        # --------------------------------------------------
4603        # Emit a defined() check for all transforms except those
4604        # whose output type is explicitly 'undef' — those are
4605        # expected to return nothing
4606
24
34
        unless(($output_spec->{'type'} // '') eq 'undef') {
4607
22
40
                push @properties, {
4608                        name => 'defined',
4609                        code => 'defined($result)',
4610                };
4611        }
4612
4613
24
31
        return @properties;
4614}
4615
4616# --------------------------------------------------
4617# _process_custom_properties
4618#
4619# Purpose:    Process the 'properties' array from a
4620#             transform definition, resolving each
4621#             entry to either a named builtin property
4622#             (looked up from _get_builtin_properties)
4623#             or a custom property with inline code.
4624#
4625# Entry:      $properties_spec - arrayref of property
4626#                                definitions from the
4627#                                schema. Each element
4628#                                is either a string
4629#                                (builtin name) or a
4630#                                hashref with 'name'
4631#                                and 'code' fields.
4632#             $function        - name of the function
4633#                                under test.
4634#             $module          - module name, or undef
4635#                                for builtins.
4636#             $input_spec      - the transform's input
4637#                                spec hashref.
4638#             $output_spec     - the transform's output
4639#                                spec hashref.
4640#             $new             - defined if the function
4641#                                is an OO method; value
4642#                                is not used, only
4643#                                presence is checked.
4644#
4645# Exit:       Returns a list of property hashrefs,
4646#             each containing 'name', 'code', and
4647#             'description' keys.
4648#             Invalid or unrecognised entries are
4649#             skipped with a carp warning.
4650#
4651# Side effects: Carps on unrecognised builtin names,
4652#               missing code fields, and invalid
4653#               property definition types.
4654#
4655# Notes:      The sixth argument is $new (the OO
4656#             constructor signal), not the full schema
4657#             hashref. It is used only to determine
4658#             whether to emit OO-style call code for
4659#             builtin property templates.
4660# --------------------------------------------------
4661sub _process_custom_properties {
4662
7
10732
        my ($properties_spec, $function, $module, $input_spec, $output_spec, $new) = @_;
4663
4664
7
6
        my @properties;
4665
7
7
        my $builtin_properties = _get_builtin_properties();
4666
4667
7
7
8
6
        for my $prop_def (@{$properties_spec}) {
4668
6
6
                my $prop_name;
4669                my $prop_code;
4670
6
0
                my $prop_desc;
4671
4672
6
9
                if(!ref($prop_def)) {
4673                        # Plain string — look up as a named builtin property
4674
2
1
                        $prop_name = $prop_def;
4675
4676
2
4
                        unless(exists($builtin_properties->{$prop_name})) {
4677
1
5
                                carp "Unknown built-in property '$prop_name', skipping";
4678
1
114
                                next;
4679                        }
4680
4681
1
1
                        my $builtin = $builtin_properties->{$prop_name};
4682
4683                        # Build the argument list, respecting positional order
4684
1
1
1
1
                        my @var_names = sort keys %{$input_spec};
4685
1
1
                        my @args;
4686
1
1
                        if(_has_positions($input_spec)) {
4687
1
0
2
0
                                my @sorted = sort { $input_spec->{$a}{'position'} <=> $input_spec->{$b}{'position'} } @var_names;
4688
1
1
1
2
                                @args = map { "\$$_" } @sorted;
4689                        } else {
4690
0
0
0
0
                                @args = map { "\$$_" } @var_names;
4691                        }
4692
4693                        # Build the call expression for the builtin template.
4694                        # $new here is the raw OO signal from the caller —
4695                        # defined means OO mode, undef means functional
4696
1
1
                        my $call_code;
4697
1
24
                        if($module && defined($new)) {
4698                                # OO mode — fresh object per trial
4699
0
0
                                $call_code  = "my \$obj = new_ok('$module');";
4700
0
0
                                $call_code .= "\$obj->$function";
4701                        } elsif($module && $module ne $MODULE_BUILTIN) {
4702                                # Functional mode with a named module
4703
0
0
                                $call_code = "$module\::$function";
4704                        } else {
4705                                # Builtin or unqualified function call
4706
1
1
                                $call_code = $function;
4707                        }
4708
1
2
                        $call_code .= '(' . join(', ', @args) . ')';
4709
4710                        # Instantiate the builtin's code template with the
4711                        # call expression and input variable list
4712
1
2
                        $prop_code = $builtin->{'code_template'}->($function, $call_code, \@var_names);
4713
1
2
                        $prop_desc = $builtin->{'description'};
4714
4715                } elsif(ref($prop_def) eq 'HASH') {
4716                        # Hashref — custom property with inline Perl code
4717
3
5
                        $prop_name = $prop_def->{'name'} || 'custom_property';
4718
3
3
                        $prop_code = $prop_def->{'code'};
4719
3
6
                        $prop_desc = $prop_def->{'description'} || "Custom property: $prop_name";
4720
4721
3
3
                        unless($prop_code) {
4722
1
5
                                carp "Custom property '$prop_name' missing 'code' field, skipping";
4723
1
83
                                next;
4724                        }
4725
4726                        # Sanity-check: code must contain at least a variable
4727                        # reference or a word character to be meaningful
4728
2
4
                        unless($prop_code =~ /\$/ || $prop_code =~ /\w+/) {
4729
0
0
                                carp "Custom property '$prop_name' code looks invalid: $prop_code";
4730
0
0
                                next;
4731                        }
4732
4733                } else {
4734                        # Neither string nor hashref — unrecognised definition type
4735
1
1
                        carp 'Invalid property definition: ', render_fallback($prop_def);
4736
1
101
                        next;
4737                }
4738
4739
3
6
                push @properties, {
4740                        name        => $prop_name,
4741                        code        => $prop_code,
4742                        description => $prop_desc,
4743                };
4744        }
4745
4746
7
73
        return @properties;
4747}
4748
4749 - 4826
=head1 NOTES

C<seed> and C<iterations> really should be within C<config>.

=head1 SEE ALSO

=over 4

=item * L<Test Dashboard|https://nigelhorne.github.io/App-Test-Generator/coverage/>

=item * L<App::Test::Generator::Template> - Template of the file of tests created by C<App::Test::Generator>

=item * L<App::Test::Generator::SchemaExtractor> - Create schemas from Perl programs

=item * L<Params::Validate::Strict>: Schema Definition

=item * L<Params::Get>: Input validation

=item * L<Return::Set>: Output validation

=item * L<Test::LectroTest>

=item * L<Test::Most>

=item * L<YAML::XS>

=back

=head1 AUTHOR

Nigel Horne, C<< <njh at nigelhorne.com> >>

Portions of this module's initial design and documentation were created with the
assistance of AI.

=head1 SUPPORT

This module is provided as-is without any warranty.

You can find documentation for this module with the perldoc command.

    perldoc App::Test::Generator

You can also look for information at:

=over 4

=item * MetaCPAN

L<https://metacpan.org/release/App-Test-Generator>

=item * GitHub

L<https://github.com/nigelhorne/App-Test-Generator>

=item * CPANTS

L<http://cpants.cpanauthors.org/dist/App-Test-Generator>

=item * CPAN Testers' Matrix

L<http://matrix.cpantesters.org/?dist=App-Test-Generator>

=item * CPAN Testers Dependencies

L<http://deps.cpantesters.org/?module=App::Test::Generator>

=back

=head1 LICENCE AND COPYRIGHT

Copyright 2025-2026 Nigel Horne.

Usage is subject to the terms of GPL2.
If you use it,
please let me know.

=cut
4827
48281;