File Coverage

File:blib/lib/App/Test/Generator/CoverageGuidedFuzzer.pm
Coverage:84.8%

linestmtbrancondsubtimecode
1package App::Test::Generator::CoverageGuidedFuzzer;
2
3
7
7
7
147625
6
95
use strict;
4
7
7
7
12
6
140
use warnings;
5
7
7
7
14
5
137
use Carp    qw(croak);
6
7
7
7
13
4
369
use feature 'state';
7
7
7
7
433
3230
15269
use Readonly;
8
9# --------------------------------------------------
10# Fuzzing loop parameters
11# --------------------------------------------------
12Readonly my $DEFAULT_ITERATIONS   => 100;
13Readonly my $DEFAULT_TIMEOUT_SECS => 5;     # per-call alarm() timeout; 0 disables
14Readonly my $CORPUS_MUTATE_RATIO  => 0.70;  # 70% mutate, 30% explore
15Readonly my $RANDOM_KEEP_RATIO    => 0.20;  # keep 20% random when no coverage
16Readonly my $EDGE_CASE_RATIO      => 0.40;  # 40% chance to use declared edge case
17Readonly my $INT_BOUNDARY_RATIO   => 0.30;  # 30% chance to use boundary int
18Readonly my $STR_BOUNDARY_RATIO   => 0.30;  # 30% chance to use boundary length
19Readonly my $SEED_CORPUS_SIZE     => 5;     # initial random inputs to seed corpus
20Readonly my $DEFAULT_MAX_STR_LEN  => 64;
21Readonly my $MATCHES_REGEX_TIMEOUT_SECS => 1; # ReDoS guard for schema 'matches' patterns
22Readonly my $DEFAULT_MAX_ARRAY    => 4;     # max elements in random array (0..N)
23Readonly my $INT32_MAX            => 2**31 - 1;
24Readonly my $INT32_MIN            => -(2**31);
25
26# --------------------------------------------------
27# Type name constants — used in schema dispatch
28# --------------------------------------------------
29Readonly my $TYPE_INTEGER => 'integer';
30Readonly my $TYPE_NUMBER  => 'number';
31Readonly my $TYPE_BOOLEAN => 'boolean';
32Readonly my $TYPE_ARRAY   => 'arrayref';
33Readonly my $TYPE_HASH    => 'hashref';
34Readonly my $TYPE_STRING  => 'string';
35
36# --------------------------------------------------
37# JSON module preference order
38# --------------------------------------------------
39Readonly my @JSON_MODULES => qw(JSON::MaybeXS JSON);
40
41our $VERSION = '0.41';
42
43 - 158
=head1 NAME

App::Test::Generator::CoverageGuidedFuzzer - AFL-style coverage-guided fuzzing for App::Test::Generator

=head1 VERSION

Version 0.41

=head1 SYNOPSIS

    use App::Test::Generator::CoverageGuidedFuzzer;

    my $fuzzer = App::Test::Generator::CoverageGuidedFuzzer->new(
        schema     => $yaml_schema,
        target_sub => \&My::Module::validate,
        iterations => 200,
        seed       => 42,
    );

    my $report = $fuzzer->run();
    $fuzzer->save_corpus('t/corpus/validate.json');

=head1 DESCRIPTION

Implements coverage-guided fuzzing on top of App::Test::Generator's
existing schema-driven input generation. Instead of purely random
generation it:

=over 4

=item 1. Generates or mutates a structured input

=item 2. Runs the target sub under Devel::Cover to capture branch hits

=item 3. Keeps inputs that discover new branches in a corpus

=item 4. Preferentially mutates corpus entries in future iterations

=back

This is the Perl equivalent of what AFL/libFuzzer do at the byte level,
but operating on typed, schema-validated Perl data structures.

=head1 METHODS

=head2 new

Construct a new coverage-guided fuzzer.

    my $fuzzer = App::Test::Generator::CoverageGuidedFuzzer->new(
        schema     => $yaml_schema,
        target_sub => \&My::Module::validate,
        iterations => 200,
        seed       => 42,
        instance   => $obj,   # optional pre-built object for method calls
    );

=head3 Arguments

=over 4

=item * C<schema>

A hashref representing the parsed YAML schema for the target function.
Required.

=item * C<target_sub>

A CODE reference to the function under test. Required.

=item * C<iterations>

Number of fuzzing iterations to run. Optional - defaults to 100.

=item * C<seed>

Random seed for reproducible runs. Optional - defaults to C<time()>.

=item * C<instance>

An optional pre-built object to use as the invocant when calling the
target sub as a method.

=item * C<timeout>

Seconds to allow each C<target_sub> call before it is aborted via
C<alarm()> and recorded as a bug. Optional - defaults to 5. Set to 0
to disable the timeout (e.g. for target subs that legitimately block).

=back

=head3 Returns

A blessed hashref. Croaks if C<schema> or C<target_sub> is missing.

=head3 API specification

=head4 input

    {
        schema     => { type => HASHREF },
        target_sub => { type => CODEREF },
        iterations => { type => SCALAR,  optional => 1 },
        seed       => { type => SCALAR,  optional => 1 },
        instance   => { type => OBJECT,  optional => 1 },
        timeout    => { type => SCALAR,  optional => 1 },
    }

=head4 output

    {
        type => OBJECT,
        isa  => 'App::Test::Generator::CoverageGuidedFuzzer',
    }

=cut
159
160sub new {
161
147
578170
        my ($class, %args) = @_;
162
163
147
243
        croak 'schema required'     unless $args{schema};
164
145
150
        croak 'target_sub required' unless $args{target_sub};
165
166        my $self = bless {
167                schema     => $args{schema},
168                target_sub => $args{target_sub},
169                instance   => $args{instance},
170                iterations => $args{iterations} // $DEFAULT_ITERATIONS,
171                seed       => $args{seed}       // time(),
172
143
495
                timeout    => $args{timeout}    // $DEFAULT_TIMEOUT_SECS,
173                corpus     => [],   # [{input => ..., coverage => {...}}]
174                covered    => {},   # "file:line:branch" => 1
175                bugs       => [],   # [{input => ..., error => ...}]
176                stats      => {
177                        total       => 0,
178                        interesting => 0,
179                        bugs        => 0,
180                        coverage    => 0,
181                },
182                _cover_available => undef,
183        }, $class;
184
185
143
930
        srand($self->{seed});
186
187        # Probe for Devel::Cover availability once at construction time
188
143
143
143
108
300
186
        $self->{_cover_available} = eval { require Devel::Cover; 1 } ? 1 : 0;
189
190        # Warn once per process if coverage guidance is unavailable
191
143
102
        state $cover_warned = 0;
192
143
153
        if(!$self->{_cover_available} && !$cover_warned++) {
193
0
0
                warn 'Devel::Cover not available; fuzzing without coverage guidance.';
194        }
195
196
143
206
        return $self;
197}
198
199 - 246
=head2 run

Run the coverage-guided fuzzing loop and return a summary report.

    my $report = $fuzzer->run();
    printf "Branches covered: %d\n", $report->{branches_covered};
    printf "Bugs found:       %d\n", $report->{bugs_found};

=head3 Arguments

None beyond C<$self>.

=head3 Returns

A hashref with keys C<total_iterations>, C<interesting_inputs>,
C<corpus_size>, C<branches_covered>, C<bugs_found>, and C<bugs>.

=head3 Notes

A C<target_sub> call that dies is only recorded in C<bugs> when the
input that triggered it is valid per C<schema>. A die triggered by an
input the schema itself marks invalid (e.g. out of the declared
C<min>/C<max> range) is expected behaviour, not a bug, and is silently
discarded.

=head3 API specification

=head4 input

    {
        self => { type => OBJECT, isa => 'App::Test::Generator::CoverageGuidedFuzzer' },
    }

=head4 output

    {
        type => HASHREF,
        keys => {
            total_iterations   => { type => SCALAR  },
            interesting_inputs => { type => SCALAR  },
            corpus_size        => { type => SCALAR  },
            branches_covered   => { type => SCALAR  },
            bugs_found         => { type => SCALAR  },
            bugs               => { type => ARRAYREF },
        },
    }

=cut
247
248sub run {
249
41
292
        my ($self) = @_;
250
251        # Phase 1: seed the corpus with a small set of random inputs
252
41
66
        $self->_seed_corpus();
253
254        # Phase 2: main fuzzing loop — alternate between mutation and exploration
255
41
49
        for my $i (1 .. $self->{iterations}) {
256
247
159
                my $input;
257
258
247
247
150
363
                if(@{ $self->{corpus} } && rand() < $CORPUS_MUTATE_RATIO) {
259                        # Mutate a randomly chosen corpus entry
260
199
199
493
200
                        my $parent = $self->{corpus}[ int(rand(@{ $self->{corpus} })) ];
261
199
206
                        $input = $self->_mutate($parent->{input});
262                } else {
263                        # Fresh random generation for exploration
264
48
123
                        $input = $self->_generate_random();
265                }
266
267
247
363
                $self->_run_one($input);
268
247
655
                $self->{stats}{total}++;
269        }
270
271
41
41
27
56
        $self->{stats}{coverage} = scalar keys %{ $self->{covered} };
272
41
44
        return $self->_build_report();
273}
274
275 - 292
=head2 corpus

Return the accumulated corpus as an arrayref of hashrefs with keys
C<input> and C<coverage>.

    my $corpus = $fuzzer->corpus();

=head3 API specification

=head4 input

    { self => { type => OBJECT, isa => 'App::Test::Generator::CoverageGuidedFuzzer' } }

=head4 output

    { type => ARRAYREF }

=cut
293
294
18
538
sub corpus { $_[0]->{corpus} }
295
296 - 313
=head2 bugs

Return bugs found as an arrayref of hashrefs with keys C<input> and
C<error>.

    my $bugs = $fuzzer->bugs();

=head3 API specification

=head4 input

    { self => { type => OBJECT, isa => 'App::Test::Generator::CoverageGuidedFuzzer' } }

=head4 output

    { type => ARRAYREF }

=cut
314
315
8
725
sub bugs { $_[0]->{bugs} }
316
317 - 356
=head2 save_corpus

Serialise the corpus to a JSON file for replay or extension on future
runs.

    $fuzzer->save_corpus('t/corpus/validate.json');

=head3 Arguments

=over 4

=item * C<$path>

Path to write the JSON corpus file. Required.

=back

=head3 Returns

Nothing. Croaks if the file cannot be written or no JSON module is
available.

=head3 Side effects

Writes a JSON file to C<$path>.

=head3 API specification

=head4 input

    {
        self => { type => OBJECT, isa => 'App::Test::Generator::CoverageGuidedFuzzer' },
        path => { type => SCALAR },
    }

=head4 output

    { type => UNDEF }

=cut
357
358sub save_corpus {
359
14
1659
        my ($self, $path) = @_;
360
361
14
33
        croak 'path required' unless defined $path;
362
363
12
15
        my $json = _load_json_module();
364
365
12
403
        open my $fh, '>', $path
366                or croak "Cannot write corpus to $path: $!";
367
368        print $fh $json->new->pretty->encode({
369                seed   => $self->{seed},
370
43
10
159
40
                corpus => [ map { { input => $_->{input} } } @{ $self->{corpus} } ],
371                bugs   => $self->{bugs},
372
10
133
        });
373
374
10
215
        close $fh;
375}
376
377 - 416
=head2 load_corpus

Load a previously saved corpus JSON file, pre-seeding the fuzzer so
it continues from where it left off.

    $fuzzer->load_corpus('t/corpus/validate.json');

=head3 Arguments

=over 4

=item * C<$path>

Path to the JSON corpus file to load. Required.

=back

=head3 Returns

Nothing. Croaks if the file cannot be read or no JSON module is
available.

=head3 Side effects

Appends loaded entries to C<< $self->{corpus} >>.

=head3 API specification

=head4 input

    {
        self => { type => OBJECT, isa => 'App::Test::Generator::CoverageGuidedFuzzer' },
        path => { type => SCALAR },
    }

=head4 output

    { type => UNDEF }

=cut
417
418sub load_corpus {
419
9
156
        my ($self, $path) = @_;
420
421
9
19
        croak 'path required' unless defined $path;
422
423
7
9
        my $json = _load_json_module();
424
425
7
118
        open my $fh, '<', $path
426                or croak "Cannot read corpus from $path: $!";
427
428
4
4
4
32
6
63
        my $data = $json->new->decode(do { local $/; <$fh> });
429
3
12
        close $fh;
430
431        # Load corpus entries with empty coverage — coverage state from a
432        # previous process cannot be restored, only the inputs themselves
433
3
3
4
5
        for my $entry (@{ $data->{corpus} // [] }) {
434
12
23
                push @{ $self->{corpus} }, {
435                        input    => $entry->{input},
436
12
8
                        coverage => {},
437                };
438        }
439}
440
441# --------------------------------------------------
442# _load_json_module
443#
444# Find and load the first available JSON
445#     module from the preference list.
446#
447# Entry:      None.
448# Exit:       Returns the name of the loaded module.
449#             Croaks if none are available.
450#
451# Side effects: Loads a JSON module into the process.
452#
453# Notes:      Uses explicit require rather than string
454#             eval for safety. JSON::MaybeXS is
455#             preferred over JSON.
456# --------------------------------------------------
457sub _load_json_module {
458
21
2633
        for my $mod (@JSON_MODULES) {
459                # Convert package name to file path — require $var does not
460                # do the :: -> / conversion that bareword require does
461
21
73
                (my $file = $mod) =~ s{::}{/}g;
462
21
93
                $file .= '.pm';
463
21
21
21
19
39
22
                my $ok = eval { require $file; 1 };
464
21
41
                return $mod if $ok;
465        }
466
0
0
        croak 'No JSON module available; install JSON or JSON::MaybeXS';
467}
468
469# --------------------------------------------------
470# _run_one
471#
472# Run the target sub with a single input,
473#     record coverage, detect bugs, and update
474#     the corpus if the input is interesting.
475#
476# Entry:      $input - the value to pass to target_sub.
477#
478# Exit:       Returns nothing. Updates $self->{corpus},
479#             $self->{bugs}, and $self->{covered}.
480#
481# Side effects: Calls target_sub. May update corpus
482#               and covered hashes.
483#
484# Notes:      When Devel::Cover is available, coverage
485#             is captured via _run_with_cover.
486#             Unexpected warnings are treated as soft
487#             bugs if they match known warning patterns.
488# --------------------------------------------------
489sub _run_one {
490
247
195
        my ($self, $input) = @_;
491
492
247
161
        my ($result, $error, $coverage);
493
494
247
216
        if($self->{_cover_available}) {
495
247
241
                $coverage = $self->_run_with_cover($input, \$result, \$error);
496        } else {
497
0
0
                $coverage = {};
498
499                # Include instance as invocant for method calls
500                my @call_args = defined($self->{instance})
501
0
0
                        ? ($self->{instance}, $input)
502                        : ($input);
503
504
0
0
                my @warnings;
505
0
0
                eval {
506
0
0
0
0
                        local $SIG{__WARN__} = sub { push @warnings, @_ };
507
0
0
                        local $SIG{__DIE__};
508                        # A hanging target_sub call would otherwise hang the
509                        # whole fuzzing run — alarm() bounds it and surfaces
510                        # the timeout as a recorded bug instead.
511
0
0
0
0
                        local $SIG{ALRM} = sub { die "target_sub timed out after $self->{timeout}s\n" };
512
0
0
                        alarm($self->{timeout}) if $self->{timeout};
513
0
0
                        $result = $self->{target_sub}->(@call_args);
514                };
515
0
0
                alarm(0) if $self->{timeout};
516
0
0
                $error = $@ if $@;
517
518                # Treat unexpected warnings matching known bad patterns as soft bugs
519
0
0
                if(!defined($error) && @warnings) {
520
0
0
                        my $w = join '', @warnings;
521
0
0
                        $error = "warning: $w"
522                                if $w =~ /uninitialized|undefined|blessed|invalid/i;
523                }
524        }
525
526        # Record bugs — only when the input was valid per the schema.
527        # A die on invalid input is correct behaviour, not a bug.
528
247
296
        if($error && $self->_input_is_valid($input)) {
529
45
45
31
71
                push @{ $self->{bugs} }, { input => $input, error => "$error" };
530
45
58
                $self->{stats}{bugs}++;
531        }
532
533        # Keep the input in the corpus if it exercised new branches
534
247
234
        if($self->_is_interesting($coverage)) {
535
36
36
127
56
                push @{ $self->{corpus} }, { input => $input, coverage => $coverage };
536
36
43
                $self->_update_covered($coverage);
537
36
40
                $self->{stats}{interesting}++;
538        }
539}
540
541# --------------------------------------------------
542# _run_with_cover
543#
544# Purpose:    Run the target sub with Devel::Cover
545#             active and return the set of newly hit
546#             branches as a hashref.
547#
548# Entry:      $input      - value to pass to target_sub.
549#             $result_ref - scalar ref to store result.
550#             $error_ref  - scalar ref to store error.
551#
552# Exit:       Returns a hashref of newly hit branch
553#             keys ("file:line:branch").
554#
555# Side effects: Calls Devel::Cover::start/stop.
556#               Sets $$result_ref and $$error_ref.
557#
558# Notes:      Snapshot comparison is imprecise for
559#             concurrent use but correct for single-
560#             threaded fuzzing. Instance is passed
561#             as invocant when set. Devel::Cover state
562#             only grows, so this iteration's "before"
563#             is exactly the previous iteration's
564#             "after" -- cached in $self to avoid two
565#             full Devel::Cover walks per iteration.
566# --------------------------------------------------
567sub _run_with_cover {
568
248
369
        my ($self, $input, $result_ref, $error_ref) = @_;
569
570
248
408
        Devel::Cover::start() if Devel::Cover->can('start');
571
572
248
248
161
307
        my %before = %{ $self->{_last_cover_snapshot} || {} };
573
574        # Include instance as invocant for method calls
575        my @call_args = defined($self->{instance})
576
248
270
                ? ($self->{instance}, $input)
577                : ($input);
578
579
248
192
        eval {
580
248
291
                local $SIG{__DIE__};
581                # See _run_one() — bound the call so a hanging target_sub
582                # cannot hang the whole fuzzing run.
583
248
1
932
1000140
                local $SIG{ALRM} = sub { die "target_sub timed out after $self->{timeout}s\n" };
584
248
507
                alarm($self->{timeout}) if $self->{timeout};
585
248
270
                $$result_ref = $self->{target_sub}->(@call_args);
586        };
587
248
1159
        alarm(0) if $self->{timeout};
588
248
232
        $$error_ref = $@ if $@;
589
590
248
225
        my %after = $self->_snapshot_cover();
591
248
313
        $self->{_last_cover_snapshot} = { %after };
592
248
418
        Devel::Cover::stop() if Devel::Cover->can('stop');
593
594        # Return only branches newly hit in this call
595
248
176
        my %delta;
596
248
225
        for my $key (keys %after) {
597
0
0
                $delta{$key} = 1 unless exists $before{$key};
598        }
599
600
248
285
        return \%delta;
601}
602
603# --------------------------------------------------
604# _snapshot_cover
605#
606# Purpose:    Take a lightweight snapshot of the
607#             currently hit branches from Devel::Cover.
608#
609# Entry:      None beyond $self.
610# Exit:       Returns a hash of "file:line:branch" keys.
611#
612# Side effects: Reads Devel::Cover internal state.
613#
614# Notes:      Falls back to empty hash if the
615#             Devel::Cover API is not accessible.
616#             All errors are silently swallowed since
617#             coverage is best-effort.
618# --------------------------------------------------
619sub _snapshot_cover {
620
248
172
        my ($self) = @_;
621
248
157
        my %snap;
622
623
248
155
        eval {
624
248
1133
                my $cover = Devel::Cover::get_coverage();
625
248
226
                return unless $cover;
626
627
248
248
149
838
                for my $file (keys %{$cover}) {
628
0
0
                        my $branch = $cover->{$file}{branch} or next;
629
0
0
0
0
                        for my $line (keys %{$branch}) {
630
0
0
0
0
                                for my $b (0 .. $#{ $branch->{$line} }) {
631                                        $snap{"$file:$line:$b"} = 1
632
0
0
                                                if $branch->{$line}[$b];
633                                }
634                        }
635                }
636        };
637
638
248
54724
        return %snap;
639}
640
641# --------------------------------------------------
642# _is_interesting
643#
644# Purpose:    Return true if the coverage hashref
645#             contains any branch not yet in the
646#             global covered set.
647#
648# Entry:      $coverage - hashref of branch keys.
649# Exit:       Returns 1 if interesting, 0 otherwise.
650#
651# Side effects: None.
652#
653# Notes:      When no coverage data is available,
654#             keeps a random sample of inputs at
655#             RANDOM_KEEP_RATIO so the corpus still
656#             grows even without branch feedback.
657# --------------------------------------------------
658sub _is_interesting {
659
250
214
        my ($self, $coverage) = @_;
660
661        # Check for any newly covered branch
662
250
250
153
212
        for my $key (keys %{$coverage}) {
663
2
4
                return 1 unless $self->{covered}{$key};
664        }
665
666        # No coverage data — keep a random sample to grow the corpus
667
249
249
169
389
        return rand() < $RANDOM_KEEP_RATIO unless %{$coverage};
668
669
1
2
        return 0;
670}
671
672# --------------------------------------------------
673# _update_covered
674#
675# Purpose:    Merge newly covered branches into the
676#             global covered set.
677#
678# Entry:      $coverage - hashref of branch keys.
679# Exit:       Returns nothing. Updates $self->{covered}.
680# Side effects: Modifies $self->{covered}.
681# --------------------------------------------------
682sub _update_covered {
683
38
36
        my ($self, $coverage) = @_;
684
38
38
23
42
        $self->{covered}{$_} = 1 for keys %{$coverage};
685}
686
687# --------------------------------------------------
688# _generate_random
689#
690# Purpose:    Generate a random input value from the
691#             top-level schema input specification.
692#
693# Entry:      None beyond $self.
694# Exit:       Returns a randomly generated value.
695# Side effects: None.
696# --------------------------------------------------
697sub _generate_random {
698
265
174
        my ($self) = @_;
699
265
244
        return $self->_generate_for_schema($self->{schema}{input});
700}
701
702# --------------------------------------------------
703# _generate_for_schema
704#
705# Purpose:    Recursively generate a random value
706#             matching a schema specification hashref.
707#
708# Entry:      $spec - schema spec hashref or scalar
709#             type hint.
710#
711# Exit:       Returns a generated value appropriate
712#             for the spec type, or undef if spec is
713#             absent or 'undef'.
714#
715# Side effects: None.
716#
717# Notes:      Edge cases declared in edge_case_array
718#             are selected at EDGE_CASE_RATIO frequency
719#             to bias toward known interesting values.
720# --------------------------------------------------
721sub _generate_for_schema {
722
312
1488
        my ($self, $spec) = @_;
723
724
312
261
        return undef unless defined $spec;
725
311
289
        return undef if $spec eq 'undef';
726
727
310
348
        my $type = ref($spec) ? ($spec->{type} // $TYPE_STRING) : $TYPE_STRING;
728
729        # Bias toward declared edge cases at EDGE_CASE_RATIO frequency
730
310
507
        if(ref($spec) && $spec->{edge_case_array} && rand() < $EDGE_CASE_RATIO) {
731
6
6
13
5
                my @ec = @{ $spec->{edge_case_array} };
732
6
7
                return $ec[ int(rand(@ec)) ];
733        }
734
735        # Dispatch to type-specific generator
736
304
58
316
130
        if    ($type eq $TYPE_INTEGER) { return $self->_rand_int($spec)    }
737
0
0
        elsif ($type eq $TYPE_NUMBER)  { return $self->_rand_num($spec)    }
738
19
108
        elsif ($type eq $TYPE_BOOLEAN) { return int(rand(2))               }
739
8
51
        elsif ($type eq $TYPE_ARRAY)   { return $self->_rand_array($spec)  }
740
10
80
        elsif ($type eq $TYPE_HASH)    { return $self->_rand_hash($spec)   }
741
209
1593
        else                           { return $self->_rand_string($spec) }
742}
743
744# --------------------------------------------------
745# _rand_int
746#
747# Purpose:    Generate a random integer within the
748#             spec's min/max range, biased toward
749#             boundary values at INT_BOUNDARY_RATIO.
750#
751# Entry:      $spec - schema spec hashref.
752# Exit:       Returns an integer scalar.
753# Side effects: None.
754# --------------------------------------------------
755sub _rand_int {
756
98
5165
        my ($self, $spec) = @_;
757
758
98
100
        my $min = $spec->{min} // $INT32_MIN;
759
98
116
        my $max = $spec->{max} // $INT32_MAX;
760
761        # Bias toward boundary values to probe edge conditions
762
98
148
        if(rand() < $INT_BOUNDARY_RATIO) {
763
41
100
                my @interesting = ($min, $min + 1, 0, -1, 1, $max - 1, $max);
764
41
90
                return $interesting[ int(rand(@interesting)) ];
765        }
766
767
57
158
        return $min + int(rand($max - $min + 1));
768}
769
770# --------------------------------------------------
771# _rand_num
772#
773# Purpose:    Generate a random floating point number
774#             within the spec's min/max range.
775#
776# Entry:      $spec - schema spec hashref.
777# Exit:       Returns a numeric scalar.
778# Side effects: None.
779# --------------------------------------------------
780sub _rand_num {
781
10
1303
        my ($self, $spec) = @_;
782
783
10
12
        my $min = $spec->{min} // -1e9;
784
10
10
        my $max = $spec->{max} //  1e9;
785
786
10
14
        return $min + rand($max - $min);
787}
788
789# --------------------------------------------------
790# _rand_string
791#
792# Purpose:    Generate a random string within the
793#             spec's min/max length range, biased
794#             toward boundary lengths.
795#
796# Entry:      $spec - schema spec hashref.
797# Exit:       Returns a string scalar.
798# Side effects: None.
799#
800# Notes:      Character set includes control chars
801#             and NUL to probe boundary handling.
802# --------------------------------------------------
803sub _rand_string {
804
210
137
        my ($self, $spec) = @_;
805
806
210
251
        my $min_len = $spec->{min} // 0;
807
210
227
        my $max_len = $spec->{max} // $DEFAULT_MAX_STR_LEN;
808
809        # Bias toward boundary lengths at STR_BOUNDARY_RATIO frequency
810
210
560
        my $len;
811
210
188
        if(rand() < $STR_BOUNDARY_RATIO) {
812
18
43
                my @boundary_lens = ($min_len, $min_len + 1, $max_len - 1, $max_len);
813
18
21
                $len = $boundary_lens[ int(rand(@boundary_lens)) ];
814        } else {
815
192
414
                $len = $min_len + int(rand($max_len - $min_len + 1));
816        }
817
818        # Clamp to non-negative
819
210
191
        $len = 0 if $len < 0;
820
821
210
871
        my @chars = ('a'..'z', 'A'..'Z', '0'..'9', ' ', "\t", "\n", "\0");
822
210
8579
263
7817
        return join '', map { $chars[ int(rand(@chars)) ] } 1 .. $len;
823}
824
825# --------------------------------------------------
826# _rand_array
827#
828# Purpose:    Generate a random arrayref with 0 to
829#             DEFAULT_MAX_ARRAY elements, each
830#             generated from the items spec.
831#
832# Entry:      $spec - schema spec hashref.
833# Exit:       Returns an arrayref.
834# Side effects: None.
835# --------------------------------------------------
836sub _rand_array {
837
8
7
        my ($self, $spec) = @_;
838
839
8
13
        my $items = $spec->{items} // {};
840
8
8
        my $count = int(rand($DEFAULT_MAX_ARRAY + 1));
841
842
8
11
22
12
        return [ map { $self->_generate_for_schema($items) } 1 .. $count ];
843}
844
845# --------------------------------------------------
846# _rand_hash
847#
848# Purpose:    Generate a random hashref with values
849#             generated from the properties spec.
850#
851# Entry:      $spec - schema spec hashref.
852# Exit:       Returns a hashref.
853# Side effects: None.
854# --------------------------------------------------
855sub _rand_hash {
856
10
9
        my ($self, $spec) = @_;
857
858
10
13
        my $props = $spec->{properties} // {};
859
10
7
        my %h;
860
861
10
10
10
10
        for my $key (keys %{$props}) {
862
1
3
                $h{$key} = $self->_generate_for_schema($props->{$key});
863        }
864
865
10
17
        return \%h;
866}
867
868# --------------------------------------------------
869# _input_is_valid
870#
871# Purpose:    Return true if the input satisfies all
872#             constraints in the schema. Used to
873#             distinguish real bugs (die on valid
874#             input) from expected failures (die on
875#             invalid input).
876#
877# Entry:      $input - the value to validate.
878# Exit:       Returns 1 if valid, 0 if not.
879#             Returns 1 if no schema is available.
880# Side effects: None.
881# --------------------------------------------------
882sub _input_is_valid {
883
70
81
        my ($self, $input) = @_;
884
885
70
55
        my $spec = $self->{schema}{input};
886
887        # No schema means we cannot judge validity
888
70
107
        return 1 unless defined $spec && ref($spec);
889
890
69
90
        my $input_style = $self->{schema}{input_style} // '';
891
892
69
113
        if($input_style eq 'hash' || ref($input) eq 'HASH') {
893
2
6
                return $self->_validate_hash_input($input, $spec);
894        }
895
896
67
70
        return $self->_validate_value($input, $spec);
897}
898
899# --------------------------------------------------
900# _validate_hash_input
901#
902# Purpose:    Validate a hash-style input against the
903#             schema spec, checking each named field.
904#
905# Entry:      $input - hashref of named parameters.
906#             $spec  - schema spec hashref.
907# Exit:       Returns 1 if valid, 0 if not.
908# Side effects: None.
909# --------------------------------------------------
910sub _validate_hash_input {
911
8
17
        my ($self, $input, $spec) = @_;
912
913
8
9
        return 0 unless defined $input;
914
915
7
7
6
9
        for my $key (keys %{$spec}) {
916                # Skip internal metadata keys
917
8
10
                next if $key =~ /^_/;
918
919
6
5
                my $field_spec = $spec->{$key};
920
6
6
                next unless ref($field_spec) eq 'HASH';
921
922
6
13
                my $value = ref($input) eq 'HASH' ? $input->{$key} : undef;
923
924                # Required field missing is always invalid
925
6
10
                if(!defined($value) && !$field_spec->{optional}) {
926
2
5
                        return 0;
927                }
928
929
4
5
                next unless defined $value;
930
931
3
3
                return 0 unless $self->_validate_value($value, $field_spec);
932        }
933
934
4
8
        return 1;
935}
936
937# --------------------------------------------------
938# _validate_value
939#
940# Purpose:    Validate a single value against a schema
941#             type spec, checking type and constraints.
942#
943# Entry:      $value - the value to validate.
944#             $spec  - schema spec hashref.
945# Exit:       Returns 1 if valid, 0 if not.
946# Side effects: None.
947#
948# Notes:      Number validation accepts both integer
949#             and floating point forms including
950#             scientific notation. Type mismatch
951#             always returns 0.
952# --------------------------------------------------
953sub _validate_value {
954
110
155
        my ($self, $value, $spec) = @_;
955
956        # Undef is never valid unless optional — caller already checked optional
957
110
93
        return 0 unless defined $value;
958
959
108
106
        my $type = $spec->{type} // $TYPE_STRING;
960
961
108
134
        if($type eq $TYPE_INTEGER) {
962
49
174
                return 0 unless $value =~ /^-?\d+$/;
963
46
86
                return 0 if defined($spec->{min}) && $value < $spec->{min};
964
27
44
                return 0 if defined($spec->{max}) && $value > $spec->{max};
965        }
966        elsif($type eq $TYPE_NUMBER) {
967                # Accept integers, decimals, and scientific notation
968
5
33
                return 0 unless $value =~ /^-?(?:\d+\.?\d*|\.\d+)(?:[eE][+-]?\d+)?$/;
969
4
6
                return 0 if defined($spec->{min}) && $value < $spec->{min};
970
4
5
                return 0 if defined($spec->{max}) && $value > $spec->{max};
971        }
972        elsif($type eq $TYPE_STRING) {
973
41
214
                my $len = length($value);
974
41
63
                return 0 if defined($spec->{min}) && $len < $spec->{min};
975
39
62
                return 0 if defined($spec->{max}) && $len > $spec->{max};
976
36
37
                if(defined($spec->{matches})) {
977
5
30
                        (my $pat = $spec->{matches}) =~ s{^/(.+)/$}{$1};
978
979                        # ReDoS guard: a schema-supplied pattern matched against
980                        # fuzzer-generated (attacker-shaped) input could exhibit
981                        # catastrophic backtracking. Bound the match with alarm()
982                        # the same way target_sub calls are bounded elsewhere in
983                        # this module, and treat a timeout as a non-match.
984
5
3
                        my $matched = eval {
985
5
0
27
0
                                local $SIG{ALRM} = sub { die "matches regex timed out\n" };
986
5
7
                                alarm($MATCHES_REGEX_TIMEOUT_SECS);
987
5
107
                                my $m = $value =~ /$pat/;
988
5
9
                                alarm(0);
989
5
16
                                $m;
990                        };
991
5
7
                        alarm(0);
992
5
10
                        return 0 unless $matched;
993                }
994        }
995        elsif($type eq $TYPE_BOOLEAN) {
996
6
51
                return 0 unless $value =~ /^[01]$/;
997        }
998        elsif($type eq $TYPE_ARRAY || $type eq 'array') {
999
4
55
                return 0 unless ref($value) eq 'ARRAY';
1000        }
1001        elsif($type eq $TYPE_HASH || $type eq 'hash') {
1002
3
36
                return 0 unless ref($value) eq 'HASH';
1003        }
1004
1005
68
103
        return 1;
1006}
1007
1008# --------------------------------------------------
1009# _mutate
1010#
1011# Purpose:    Apply a random mutation to an input
1012#             value, dispatching on its type.
1013#
1014# Entry:      $input - the value to mutate.
1015# Exit:       Returns a mutated copy of the input.
1016# Side effects: None.
1017#
1018# Notes:      Blessed references are passed through
1019#             unchanged. Undef is replaced with a
1020#             freshly generated random value.
1021# --------------------------------------------------
1022sub _mutate {
1023
221
908
        my ($self, $input) = @_;
1024
1025
221
159
        my $type = ref($input);
1026
1027
221
233
        if(!defined $input) {
1028                # Replace undef with a fresh random value
1029
2
4
                return $self->_generate_random();
1030        }
1031        elsif(!$type) {
1032                # Dispatch scalar mutation based on apparent type
1033
199
368
                if($input =~ /^-?\d+$/) {
1034
43
45
                        return $self->_mutate_int($input);
1035                } elsif($input =~ /^-?[\d.]+$/) {
1036
2
3
                        return $self->_mutate_num($input);
1037                } else {
1038
154
156
                        return $self->_mutate_string($input);
1039                }
1040        }
1041        elsif($type eq 'ARRAY') {
1042
10
10
                return $self->_mutate_array($input);
1043        }
1044        elsif($type eq 'HASH') {
1045
8
12
                return $self->_mutate_hash($input);
1046        }
1047
1048        # Blessed refs and other types pass through unchanged
1049
2
3
        return $input;
1050}
1051
1052# --------------------------------------------------
1053# _mutate_int
1054#
1055# Purpose:    Apply a random arithmetic mutation to
1056#             an integer value.
1057#
1058# Entry:      $n - the integer to mutate.
1059# Exit:       Returns a mutated integer.
1060# Side effects: None.
1061# --------------------------------------------------
1062sub _mutate_int {
1063
48
1105
        my ($self, $n) = @_;
1064
1065        my @ops = (
1066
8
27
                sub { $n + 1              },
1067
3
11
                sub { $n - 1              },
1068
5
19
                sub { $n * 2              },
1069
8
34
                sub { $n == 0 ? 1 : int($n / 2) },
1070
11
40
                sub { -$n                 },
1071
3
13
                sub { 0                   },
1072
8
9
                sub { $INT32_MAX          },
1073
2
3
                sub { $INT32_MIN          },
1074
48
207
        );
1075
1076
48
60
        return $ops[ int(rand(@ops)) ]->();
1077}
1078
1079# --------------------------------------------------
1080# _mutate_num
1081#
1082# Purpose:    Apply a random arithmetic mutation to
1083#             a floating point value.
1084#
1085# Entry:      $n - the number to mutate.
1086# Exit:       Returns a mutated number.
1087# Side effects: None.
1088# --------------------------------------------------
1089sub _mutate_num {
1090
5
558
        my ($self, $n) = @_;
1091
1092        my @ops = (
1093
0
0
                sub { $n + rand(10)        },
1094
2
8
                sub { $n - rand(10)        },
1095
1
3
                sub { $n * (1 + rand())    },
1096
2
7
                sub { 0                    },
1097
0
0
                sub { -$n                  },
1098
5
15
        );
1099
1100
5
6
        return $ops[ int(rand(@ops)) ]->();
1101}
1102
1103# --------------------------------------------------
1104# _mutate_string
1105#
1106# Purpose:    Apply a random structural mutation to
1107#             a string value — bit flip, insert,
1108#             delete, truncate, repeat, or replace
1109#             with an interesting known value.
1110#
1111# Entry:      $s - the string to mutate.
1112# Exit:       Returns a mutated string.
1113# Side effects: None.
1114# --------------------------------------------------
1115sub _mutate_string {
1116
157
385
        my ($self, $s) = @_;
1117
1118
157
108
        my $len = length($s);
1119
1120        my @ops = (
1121                # Bit flip a random character
1122                sub {
1123
13
12
                        return $s unless $len;
1124
13
23
                        my $pos  = int(rand($len));
1125
13
13
                        my $char = substr($s, $pos, 1);
1126
13
19
                        substr($s, $pos, 1) = chr(ord($char) ^ (1 << int(rand(8))));
1127
13
43
                        $s
1128                },
1129                # Insert a random byte
1130                sub {
1131
52
49
                        my $pos  = int(rand($len + 1));
1132
52
60
                        my $char = chr(int(rand(256)));
1133
52
53
                        substr($s, $pos, 0, $char);
1134
52
243
                        $s
1135                },
1136                # Delete a random character
1137                sub {
1138
10
16
                        return $s unless $len;
1139
9
13
                        substr($s, int(rand($len)), 1, '');
1140
9
38
                        $s
1141                },
1142                # Truncate at a random position
1143
10
44
                sub { substr($s, 0, int(rand($len + 1))) },
1144                # Double the string
1145
44
184
                sub { $s x 2 },
1146                # Replace with a known interesting string
1147                sub {
1148
28
40
                        my @interesting = (
1149                                '', ' ', "\0", "\n", "\t",
1150                                'a' x 256,
1151                                'null', 'undefined',
1152                                "'; DROP TABLE foo; --",
1153                                '<script>alert(1)</script>',
1154                        );
1155
28
139
                        $interesting[ int(rand(@interesting)) ]
1156                },
1157
157
557
        );
1158
1159
157
190
        return $ops[ int(rand(@ops)) ]->();
1160}
1161
1162# --------------------------------------------------
1163# _mutate_array
1164#
1165# Purpose:    Apply a random structural mutation to
1166#             an arrayref — mutate element, duplicate,
1167#             delete, or empty.
1168#
1169# Entry:      $arr - the arrayref to mutate.
1170# Exit:       Returns a mutated arrayref copy.
1171# Side effects: None.
1172# --------------------------------------------------
1173sub _mutate_array {
1174
12
16
        my ($self, $arr) = @_;
1175
1176
12
12
11
16
        my @copy = @{$arr};
1177
1178        my @ops = (
1179                # Mutate a random element
1180                sub {
1181
3
3
                        return [] unless @copy;
1182
3
3
                        my $i = int(rand(@copy));
1183
3
7
                        $copy[$i] = $self->_mutate($copy[$i]);
1184                        \@copy
1185
3
10
                },
1186                # Duplicate a random element
1187                sub {
1188
3
11
                        return \@copy unless @copy;
1189
1
2
                        my $i = int(rand(@copy));
1190
1
1
                        splice @copy, $i, 0, $copy[$i];
1191                        \@copy
1192
1
4
                },
1193                # Delete a random element
1194                sub {
1195
5
7
                        return \@copy unless @copy;
1196
4
8
                        splice @copy, int(rand(@copy)), 1;
1197                        \@copy
1198
4
17
                },
1199                # Return empty array
1200
1
4
                sub { [] },
1201
12
46
        );
1202
1203
12
18
        return $ops[ int(rand(@ops)) ]->();
1204}
1205
1206# --------------------------------------------------
1207# _mutate_hash
1208#
1209# Purpose:    Apply a random mutation to one value
1210#             in a hashref copy.
1211#
1212# Entry:      $h - the hashref to mutate.
1213# Exit:       Returns a mutated hashref copy.
1214# Side effects: None.
1215# --------------------------------------------------
1216sub _mutate_hash {
1217
11
14
        my ($self, $h) = @_;
1218
1219
11
11
5
13
        my %copy = %{$h};
1220
11
13
        my @keys = keys %copy;
1221
1222        # Return unchanged if hash is empty
1223
11
16
        return \%copy unless @keys;
1224
1225
4
6
        my $k = $keys[ int(rand(@keys)) ];
1226
4
6
        $copy{$k} = $self->_mutate($copy{$k});
1227
1228
4
6
        return \%copy;
1229}
1230
1231# --------------------------------------------------
1232# _seed_corpus
1233#
1234# Purpose:    Pre-populate the corpus with a small
1235#             set of randomly generated inputs to
1236#             give the fuzzing loop a starting point.
1237#
1238# Entry:      None beyond $self.
1239# Exit:       Returns nothing. Appends to $self->{corpus}.
1240# Side effects: Modifies $self->{corpus}.
1241# --------------------------------------------------
1242sub _seed_corpus {
1243
43
37
        my $self = $_[0];
1244
1245
43
72
        for (1 .. $SEED_CORPUS_SIZE) {
1246
215
215
221
223
                push @{ $self->{corpus} }, {
1247                        input    => $self->_generate_random(),
1248                        coverage => {},
1249                };
1250        }
1251}
1252
1253# --------------------------------------------------
1254# _build_report
1255#
1256# Purpose:    Construct the summary report hashref
1257#             returned by run().
1258#
1259# Entry:      None beyond $self.
1260# Exit:       Returns a report hashref.
1261# Side effects: None.
1262# --------------------------------------------------
1263sub _build_report {
1264
43
38
        my $self = $_[0];
1265
1266        return {
1267                total_iterations   => $self->{stats}{total},
1268                interesting_inputs => $self->{stats}{interesting},
1269
43
146
                corpus_size        => scalar @{ $self->{corpus} },
1270                branches_covered   => $self->{stats}{coverage},
1271                bugs_found         => $self->{stats}{bugs},
1272                bugs               => $self->{bugs},
1273
43
45
        };
1274}
1275
1276 - 1291
=head1 AUTHOR

Nigel Horne, C<< <njh at nigelhorne.com> >>

Portions of this module's initial design and documentation were created
with the assistance of AI.

=head1 LICENCE AND COPYRIGHT

Copyright 2026 Nigel Horne.

Usage is subject to GPL2 licence terms.
If you use it,
please let me know.

=cut
1292
12931;