File Coverage

File:blib/lib/App/Test/Generator/CoverageGuidedFuzzer.pm
Coverage:85.6%

linestmtbrancondsubtimecode
1package App::Test::Generator::CoverageGuidedFuzzer;
2
3
5
5
5
147686
5
70
use strict;
4
5
5
5
9
3
127
use warnings;
5
5
5
5
10
5
89
use Carp    qw(croak);
6
5
5
5
10
4
280
use feature 'state';
7
5
5
5
406
3293
10568
use Readonly;
8
9# --------------------------------------------------
10# Fuzzing loop parameters
11# --------------------------------------------------
12Readonly my $DEFAULT_ITERATIONS   => 100;
13Readonly my $CORPUS_MUTATE_RATIO  => 0.70;  # 70% mutate, 30% explore
14Readonly my $RANDOM_KEEP_RATIO    => 0.20;  # keep 20% random when no coverage
15Readonly my $EDGE_CASE_RATIO      => 0.40;  # 40% chance to use declared edge case
16Readonly my $INT_BOUNDARY_RATIO   => 0.30;  # 30% chance to use boundary int
17Readonly my $STR_BOUNDARY_RATIO   => 0.30;  # 30% chance to use boundary length
18Readonly my $SEED_CORPUS_SIZE     => 5;     # initial random inputs to seed corpus
19Readonly my $DEFAULT_MAX_STR_LEN  => 64;
20Readonly my $DEFAULT_MAX_ARRAY    => 4;     # max elements in random array (0..N)
21Readonly my $INT32_MAX            => 2**31 - 1;
22Readonly my $INT32_MIN            => -(2**31);
23
24# --------------------------------------------------
25# Type name constants — used in schema dispatch
26# --------------------------------------------------
27Readonly my $TYPE_INTEGER => 'integer';
28Readonly my $TYPE_NUMBER  => 'number';
29Readonly my $TYPE_BOOLEAN => 'boolean';
30Readonly my $TYPE_ARRAY   => 'arrayref';
31Readonly my $TYPE_HASH    => 'hashref';
32Readonly my $TYPE_STRING  => 'string';
33
34# --------------------------------------------------
35# JSON module preference order
36# --------------------------------------------------
37Readonly my @JSON_MODULES => qw(JSON::MaybeXS JSON);
38
39our $VERSION = '0.36';
40
41 - 149
=head1 NAME

App::Test::Generator::CoverageGuidedFuzzer - AFL-style coverage-guided fuzzing for App::Test::Generator

=head1 VERSION

Version 0.36

=head1 SYNOPSIS

    use App::Test::Generator::CoverageGuidedFuzzer;

    my $fuzzer = App::Test::Generator::CoverageGuidedFuzzer->new(
        schema     => $yaml_schema,
        target_sub => \&My::Module::validate,
        iterations => 200,
        seed       => 42,
    );

    my $report = $fuzzer->run();
    $fuzzer->save_corpus('t/corpus/validate.json');

=head1 DESCRIPTION

Implements coverage-guided fuzzing on top of App::Test::Generator's
existing schema-driven input generation. Instead of purely random
generation it:

=over 4

=item 1. Generates or mutates a structured input

=item 2. Runs the target sub under Devel::Cover to capture branch hits

=item 3. Keeps inputs that discover new branches in a corpus

=item 4. Preferentially mutates corpus entries in future iterations

=back

This is the Perl equivalent of what AFL/libFuzzer do at the byte level,
but operating on typed, schema-validated Perl data structures.

=head1 METHODS

=head2 new

Construct a new coverage-guided fuzzer.

    my $fuzzer = App::Test::Generator::CoverageGuidedFuzzer->new(
        schema     => $yaml_schema,
        target_sub => \&My::Module::validate,
        iterations => 200,
        seed       => 42,
        instance   => $obj,   # optional pre-built object for method calls
    );

=head3 Arguments

=over 4

=item * C<schema>

A hashref representing the parsed YAML schema for the target function.
Required.

=item * C<target_sub>

A CODE reference to the function under test. Required.

=item * C<iterations>

Number of fuzzing iterations to run. Optional - defaults to 100.

=item * C<seed>

Random seed for reproducible runs. Optional - defaults to C<time()>.

=item * C<instance>

An optional pre-built object to use as the invocant when calling the
target sub as a method.

=back

=head3 Returns

A blessed hashref. Croaks if C<schema> or C<target_sub> is missing.

=head3 API specification

=head4 input

    {
        schema     => { type => HASHREF },
        target_sub => { type => CODEREF },
        iterations => { type => SCALAR,  optional => 1 },
        seed       => { type => SCALAR,  optional => 1 },
        instance   => { type => OBJECT,  optional => 1 },
    }

=head4 output

    {
        type => OBJECT,
        isa  => 'App::Test::Generator::CoverageGuidedFuzzer',
    }

=cut
150
151sub new {
152
135
574055
        my ($class, %args) = @_;
153
154
135
194
        croak 'schema required'     unless $args{schema};
155
133
143
        croak 'target_sub required' unless $args{target_sub};
156
157        my $self = bless {
158                schema     => $args{schema},
159                target_sub => $args{target_sub},
160                instance   => $args{instance},
161                iterations => $args{iterations} // $DEFAULT_ITERATIONS,
162
131
498
                seed       => $args{seed}       // time(),
163                corpus     => [],   # [{input => ..., coverage => {...}}]
164                covered    => {},   # "file:line:branch" => 1
165                bugs       => [],   # [{input => ..., error => ...}]
166                stats      => {
167                        total       => 0,
168                        interesting => 0,
169                        bugs        => 0,
170                        coverage    => 0,
171                },
172                _cover_available => undef,
173        }, $class;
174
175
131
196
        srand($self->{seed});
176
177        # Probe for Devel::Cover availability once at construction time
178
131
131
131
96
274
171
        $self->{_cover_available} = eval { require Devel::Cover; 1 } ? 1 : 0;
179
180        # Warn once per process if coverage guidance is unavailable
181
131
118
        state $cover_warned = 0;
182
131
140
        if(!$self->{_cover_available} && !$cover_warned++) {
183
0
0
                warn 'Devel::Cover not available; fuzzing without coverage guidance.';
184        }
185
186
131
171
        return $self;
187}
188
189 - 228
=head2 run

Run the coverage-guided fuzzing loop and return a summary report.

    my $report = $fuzzer->run();
    printf "Branches covered: %d\n", $report->{branches_covered};
    printf "Bugs found:       %d\n", $report->{bugs_found};

=head3 Arguments

None beyond C<$self>.

=head3 Returns

A hashref with keys C<total_iterations>, C<interesting_inputs>,
C<corpus_size>, C<branches_covered>, C<bugs_found>, and C<bugs>.

=head3 API specification

=head4 input

    {
        self => { type => OBJECT, isa => 'App::Test::Generator::CoverageGuidedFuzzer' },
    }

=head4 output

    {
        type => HASHREF,
        keys => {
            total_iterations   => { type => SCALAR  },
            interesting_inputs => { type => SCALAR  },
            corpus_size        => { type => SCALAR  },
            branches_covered   => { type => SCALAR  },
            bugs_found         => { type => SCALAR  },
            bugs               => { type => ARRAYREF },
        },
    }

=cut
229
230sub run {
231
40
286
        my ($self) = @_;
232
233        # Phase 1: seed the corpus with a small set of random inputs
234
40
66
        $self->_seed_corpus();
235
236        # Phase 2: main fuzzing loop — alternate between mutation and exploration
237
40
52
        for my $i (1 .. $self->{iterations}) {
238
253
165
                my $input;
239
240
253
253
151
394
                if(@{ $self->{corpus} } && rand() < $CORPUS_MUTATE_RATIO) {
241                        # Mutate a randomly chosen corpus entry
242
210
210
482
211
                        my $parent = $self->{corpus}[ int(rand(@{ $self->{corpus} })) ];
243
210
227
                        $input = $self->_mutate($parent->{input});
244                } else {
245                        # Fresh random generation for exploration
246
43
110
                        $input = $self->_generate_random();
247                }
248
249
253
345
                $self->_run_one($input);
250
253
717
                $self->{stats}{total}++;
251        }
252
253
40
40
29
51
        $self->{stats}{coverage} = scalar keys %{ $self->{covered} };
254
40
50
        return $self->_build_report();
255}
256
257 - 274
=head2 corpus

Return the accumulated corpus as an arrayref of hashrefs with keys
C<input> and C<coverage>.

    my $corpus = $fuzzer->corpus();

=head3 API specification

=head4 input

    { self => { type => OBJECT, isa => 'App::Test::Generator::CoverageGuidedFuzzer' } }

=head4 output

    { type => ARRAYREF }

=cut
275
276
19
697
sub corpus { $_[0]->{corpus} }
277
278 - 295
=head2 bugs

Return bugs found as an arrayref of hashrefs with keys C<input> and
C<error>.

    my $bugs = $fuzzer->bugs();

=head3 API specification

=head4 input

    { self => { type => OBJECT, isa => 'App::Test::Generator::CoverageGuidedFuzzer' } }

=head4 output

    { type => ARRAYREF }

=cut
296
297
5
466
sub bugs { $_[0]->{bugs} }
298
299 - 338
=head2 save_corpus

Serialise the corpus to a JSON file for replay or extension on future
runs.

    $fuzzer->save_corpus('t/corpus/validate.json');

=head3 Arguments

=over 4

=item * C<$path>

Path to write the JSON corpus file. Required.

=back

=head3 Returns

Nothing. Croaks if the file cannot be written or no JSON module is
available.

=head3 Side effects

Writes a JSON file to C<$path>.

=head3 API specification

=head4 input

    {
        self => { type => OBJECT, isa => 'App::Test::Generator::CoverageGuidedFuzzer' },
        path => { type => SCALAR },
    }

=head4 output

    { type => UNDEF }

=cut
339
340sub save_corpus {
341
13
1956
        my ($self, $path) = @_;
342
343
13
26
        croak 'path required' unless defined $path;
344
345
11
12
        my $json = _load_json_module();
346
347
11
340
        open my $fh, '>', $path
348                or croak "Cannot write corpus to $path: $!";
349
350        print $fh $json->new->pretty->encode({
351                seed   => $self->{seed},
352
46
9
185
13
                corpus => [ map { { input => $_->{input} } } @{ $self->{corpus} } ],
353                bugs   => $self->{bugs},
354
9
78
        });
355
356
9
186
        close $fh;
357}
358
359 - 398
=head2 load_corpus

Load a previously saved corpus JSON file, pre-seeding the fuzzer so
it continues from where it left off.

    $fuzzer->load_corpus('t/corpus/validate.json');

=head3 Arguments

=over 4

=item * C<$path>

Path to the JSON corpus file to load. Required.

=back

=head3 Returns

Nothing. Croaks if the file cannot be read or no JSON module is
available.

=head3 Side effects

Appends loaded entries to C<< $self->{corpus} >>.

=head3 API specification

=head4 input

    {
        self => { type => OBJECT, isa => 'App::Test::Generator::CoverageGuidedFuzzer' },
        path => { type => SCALAR },
    }

=head4 output

    { type => UNDEF }

=cut
399
400sub load_corpus {
401
10
174
        my ($self, $path) = @_;
402
403
10
22
        croak 'path required' unless defined $path;
404
405
8
7
        my $json = _load_json_module();
406
407
8
150
        open my $fh, '<', $path
408                or croak "Cannot read corpus from $path: $!";
409
410
5
5
5
39
7
86
        my $data = $json->new->decode(do { local $/; <$fh> });
411
4
16
        close $fh;
412
413        # Load corpus entries with empty coverage — coverage state from a
414        # previous process cannot be restored, only the inputs themselves
415
4
4
3
9
        for my $entry (@{ $data->{corpus} // [] }) {
416
20
44
                push @{ $self->{corpus} }, {
417                        input    => $entry->{input},
418
20
11
                        coverage => {},
419                };
420        }
421}
422
423# --------------------------------------------------
424# _load_json_module
425#
426# Find and load the first available JSON
427#     module from the preference list.
428#
429# Entry:      None.
430# Exit:       Returns the name of the loaded module.
431#             Croaks if none are available.
432#
433# Side effects: Loads a JSON module into the process.
434#
435# Notes:      Uses explicit require rather than string
436#             eval for safety. JSON::MaybeXS is
437#             preferred over JSON.
438# --------------------------------------------------
439sub _load_json_module {
440
21
2640
        for my $mod (@JSON_MODULES) {
441                # Convert package name to file path — require $var does not
442                # do the :: -> / conversion that bareword require does
443
21
68
                (my $file = $mod) =~ s{::}{/}g;
444
21
94
                $file .= '.pm';
445
21
21
21
14
41
18
                my $ok = eval { require $file; 1 };
446
21
39
                return $mod if $ok;
447        }
448
0
0
        croak 'No JSON module available; install JSON or JSON::MaybeXS';
449}
450
451# --------------------------------------------------
452# _run_one
453#
454# Run the target sub with a single input,
455#     record coverage, detect bugs, and update
456#     the corpus if the input is interesting.
457#
458# Entry:      $input - the value to pass to target_sub.
459#
460# Exit:       Returns nothing. Updates $self->{corpus},
461#             $self->{bugs}, and $self->{covered}.
462#
463# Side effects: Calls target_sub. May update corpus
464#               and covered hashes.
465#
466# Notes:      When Devel::Cover is available, coverage
467#             is captured via _run_with_cover.
468#             Unexpected warnings are treated as soft
469#             bugs if they match known warning patterns.
470# --------------------------------------------------
471sub _run_one {
472
253
198
        my ($self, $input) = @_;
473
474
253
186
        my ($result, $error, $coverage);
475
476
253
220
        if($self->{_cover_available}) {
477
253
259
                $coverage = $self->_run_with_cover($input, \$result, \$error);
478        } else {
479
0
0
                $coverage = {};
480
481                # Include instance as invocant for method calls
482                my @call_args = defined($self->{instance})
483
0
0
                        ? ($self->{instance}, $input)
484                        : ($input);
485
486
0
0
                my @warnings;
487
0
0
                eval {
488
0
0
0
0
                        local $SIG{__WARN__} = sub { push @warnings, @_ };
489
0
0
                        local $SIG{__DIE__};
490
0
0
                        $result = $self->{target_sub}->(@call_args);
491                };
492
0
0
                $error = $@ if $@;
493
494                # Treat unexpected warnings matching known bad patterns as soft bugs
495
0
0
                if(!defined($error) && @warnings) {
496
0
0
                        my $w = join '', @warnings;
497
0
0
                        $error = "warning: $w"
498                                if $w =~ /uninitialized|undefined|blessed|invalid/i;
499                }
500        }
501
502        # Record bugs — only when the input was valid per the schema.
503        # A die on invalid input is correct behaviour, not a bug.
504
253
283
        if($error && $self->_input_is_valid($input)) {
505
37
37
32
56
                push @{ $self->{bugs} }, { input => $input, error => "$error" };
506
37
34
                $self->{stats}{bugs}++;
507        }
508
509        # Keep the input in the corpus if it exercised new branches
510
253
247
        if($self->_is_interesting($coverage)) {
511
36
36
92
58
                push @{ $self->{corpus} }, { input => $input, coverage => $coverage };
512
36
64
                $self->_update_covered($coverage);
513
36
42
                $self->{stats}{interesting}++;
514        }
515}
516
517# --------------------------------------------------
518# _run_with_cover
519#
520# Purpose:    Run the target sub with Devel::Cover
521#             active and return the set of newly hit
522#             branches as a hashref.
523#
524# Entry:      $input      - value to pass to target_sub.
525#             $result_ref - scalar ref to store result.
526#             $error_ref  - scalar ref to store error.
527#
528# Exit:       Returns a hashref of newly hit branch
529#             keys ("file:line:branch").
530#
531# Side effects: Calls Devel::Cover::start/stop.
532#               Sets $$result_ref and $$error_ref.
533#
534# Notes:      Snapshot comparison is imprecise for
535#             concurrent use but correct for single-
536#             threaded fuzzing. Instance is passed
537#             as invocant when set.
538# --------------------------------------------------
539sub _run_with_cover {
540
253
203
        my ($self, $input, $result_ref, $error_ref) = @_;
541
542
253
424
        Devel::Cover::start() if Devel::Cover->can('start');
543
544
253
238
        my %before = $self->_snapshot_cover();
545
546        # Include instance as invocant for method calls
547        my @call_args = defined($self->{instance})
548
253
304
                ? ($self->{instance}, $input)
549                : ($input);
550
551
253
231
        eval {
552
253
276
                local $SIG{__DIE__};
553
253
311
                $$result_ref = $self->{target_sub}->(@call_args);
554        };
555
253
649
        $$error_ref = $@ if $@;
556
557
253
258
        my %after = $self->_snapshot_cover();
558
253
454
        Devel::Cover::stop() if Devel::Cover->can('stop');
559
560        # Return only branches newly hit in this call
561
253
182
        my %delta;
562
253
227
        for my $key (keys %after) {
563
0
0
                $delta{$key} = 1 unless exists $before{$key};
564        }
565
566
253
301
        return \%delta;
567}
568
569# --------------------------------------------------
570# _snapshot_cover
571#
572# Purpose:    Take a lightweight snapshot of the
573#             currently hit branches from Devel::Cover.
574#
575# Entry:      None beyond $self.
576# Exit:       Returns a hash of "file:line:branch" keys.
577#
578# Side effects: Reads Devel::Cover internal state.
579#
580# Notes:      Falls back to empty hash if the
581#             Devel::Cover API is not accessible.
582#             All errors are silently swallowed since
583#             coverage is best-effort.
584# --------------------------------------------------
585sub _snapshot_cover {
586
506
366
        my ($self) = @_;
587
506
304
        my %snap;
588
589
506
363
        eval {
590
506
2096
                my $cover = Devel::Cover::get_coverage();
591
506
477
                return unless $cover;
592
593
506
506
334
1579
                for my $file (keys %{$cover}) {
594
0
0
                        my $branch = $cover->{$file}{branch} or next;
595
0
0
0
0
                        for my $line (keys %{$branch}) {
596
0
0
0
0
                                for my $b (0 .. $#{ $branch->{$line} }) {
597                                        $snap{"$file:$line:$b"} = 1
598
0
0
                                                if $branch->{$line}[$b];
599                                }
600                        }
601                }
602        };
603
604
506
133644
        return %snap;
605}
606
607# --------------------------------------------------
608# _is_interesting
609#
610# Purpose:    Return true if the coverage hashref
611#             contains any branch not yet in the
612#             global covered set.
613#
614# Entry:      $coverage - hashref of branch keys.
615# Exit:       Returns 1 if interesting, 0 otherwise.
616#
617# Side effects: None.
618#
619# Notes:      When no coverage data is available,
620#             keeps a random sample of inputs at
621#             RANDOM_KEEP_RATIO so the corpus still
622#             grows even without branch feedback.
623# --------------------------------------------------
624sub _is_interesting {
625
256
212
        my ($self, $coverage) = @_;
626
627        # Check for any newly covered branch
628
256
256
158
239
        for my $key (keys %{$coverage}) {
629
2
4
                return 1 unless $self->{covered}{$key};
630        }
631
632        # No coverage data — keep a random sample to grow the corpus
633
255
255
185
412
        return rand() < $RANDOM_KEEP_RATIO unless %{$coverage};
634
635
1
2
        return 0;
636}
637
638# --------------------------------------------------
639# _update_covered
640#
641# Purpose:    Merge newly covered branches into the
642#             global covered set.
643#
644# Entry:      $coverage - hashref of branch keys.
645# Exit:       Returns nothing. Updates $self->{covered}.
646# Side effects: Modifies $self->{covered}.
647# --------------------------------------------------
648sub _update_covered {
649
38
40
        my ($self, $coverage) = @_;
650
38
38
23
40
        $self->{covered}{$_} = 1 for keys %{$coverage};
651}
652
653# --------------------------------------------------
654# _generate_random
655#
656# Purpose:    Generate a random input value from the
657#             top-level schema input specification.
658#
659# Entry:      None beyond $self.
660# Exit:       Returns a randomly generated value.
661# Side effects: None.
662# --------------------------------------------------
663sub _generate_random {
664
255
181
        my ($self) = @_;
665
255
248
        return $self->_generate_for_schema($self->{schema}{input});
666}
667
668# --------------------------------------------------
669# _generate_for_schema
670#
671# Purpose:    Recursively generate a random value
672#             matching a schema specification hashref.
673#
674# Entry:      $spec - schema spec hashref or scalar
675#             type hint.
676#
677# Exit:       Returns a generated value appropriate
678#             for the spec type, or undef if spec is
679#             absent or 'undef'.
680#
681# Side effects: None.
682#
683# Notes:      Edge cases declared in edge_case_array
684#             are selected at EDGE_CASE_RATIO frequency
685#             to bias toward known interesting values.
686# --------------------------------------------------
687sub _generate_for_schema {
688
302
1482
        my ($self, $spec) = @_;
689
690
302
246
        return undef unless defined $spec;
691
301
277
        return undef if $spec eq 'undef';
692
693
300
329
        my $type = ref($spec) ? ($spec->{type} // $TYPE_STRING) : $TYPE_STRING;
694
695        # Bias toward declared edge cases at EDGE_CASE_RATIO frequency
696
300
447
        if(ref($spec) && $spec->{edge_case_array} && rand() < $EDGE_CASE_RATIO) {
697
6
6
12
6
                my @ec = @{ $spec->{edge_case_array} };
698
6
6
                return $ec[ int(rand(@ec)) ];
699        }
700
701        # Dispatch to type-specific generator
702
294
41
300
86
        if    ($type eq $TYPE_INTEGER) { return $self->_rand_int($spec)    }
703
0
0
        elsif ($type eq $TYPE_NUMBER)  { return $self->_rand_num($spec)    }
704
19
111
        elsif ($type eq $TYPE_BOOLEAN) { return int(rand(2))               }
705
8
52
        elsif ($type eq $TYPE_ARRAY)   { return $self->_rand_array($spec)  }
706
10
76
        elsif ($type eq $TYPE_HASH)    { return $self->_rand_hash($spec)   }
707
216
1653
        else                           { return $self->_rand_string($spec) }
708}
709
710# --------------------------------------------------
711# _rand_int
712#
713# Purpose:    Generate a random integer within the
714#             spec's min/max range, biased toward
715#             boundary values at INT_BOUNDARY_RATIO.
716#
717# Entry:      $spec - schema spec hashref.
718# Exit:       Returns an integer scalar.
719# Side effects: None.
720# --------------------------------------------------
721sub _rand_int {
722
81
5226
        my ($self, $spec) = @_;
723
724
81
76
        my $min = $spec->{min} // $INT32_MIN;
725
81
108
        my $max = $spec->{max} // $INT32_MAX;
726
727        # Bias toward boundary values to probe edge conditions
728
81
123
        if(rand() < $INT_BOUNDARY_RATIO) {
729
32
80
                my @interesting = ($min, $min + 1, 0, -1, 1, $max - 1, $max);
730
32
50
                return $interesting[ int(rand(@interesting)) ];
731        }
732
733
49
134
        return $min + int(rand($max - $min + 1));
734}
735
736# --------------------------------------------------
737# _rand_num
738#
739# Purpose:    Generate a random floating point number
740#             within the spec's min/max range.
741#
742# Entry:      $spec - schema spec hashref.
743# Exit:       Returns a numeric scalar.
744# Side effects: None.
745# --------------------------------------------------
746sub _rand_num {
747
10
1233
        my ($self, $spec) = @_;
748
749
10
13
        my $min = $spec->{min} // -1e9;
750
10
9
        my $max = $spec->{max} //  1e9;
751
752
10
13
        return $min + rand($max - $min);
753}
754
755# --------------------------------------------------
756# _rand_string
757#
758# Purpose:    Generate a random string within the
759#             spec's min/max length range, biased
760#             toward boundary lengths.
761#
762# Entry:      $spec - schema spec hashref.
763# Exit:       Returns a string scalar.
764# Side effects: None.
765#
766# Notes:      Character set includes control chars
767#             and NUL to probe boundary handling.
768# --------------------------------------------------
769sub _rand_string {
770
217
162
        my ($self, $spec) = @_;
771
772
217
256
        my $min_len = $spec->{min} // 0;
773
217
239
        my $max_len = $spec->{max} // $DEFAULT_MAX_STR_LEN;
774
775        # Bias toward boundary lengths at STR_BOUNDARY_RATIO frequency
776
217
534
        my $len;
777
217
218
        if(rand() < $STR_BOUNDARY_RATIO) {
778
21
51
                my @boundary_lens = ($min_len, $min_len + 1, $max_len - 1, $max_len);
779
21
28
                $len = $boundary_lens[ int(rand(@boundary_lens)) ];
780        } else {
781
196
411
                $len = $min_len + int(rand($max_len - $min_len + 1));
782        }
783
784        # Clamp to non-negative
785
217
206
        $len = 0 if $len < 0;
786
787
217
921
        my @chars = ('a'..'z', 'A'..'Z', '0'..'9', ' ', "\t", "\n", "\0");
788
217
8527
260
7882
        return join '', map { $chars[ int(rand(@chars)) ] } 1 .. $len;
789}
790
791# --------------------------------------------------
792# _rand_array
793#
794# Purpose:    Generate a random arrayref with 0 to
795#             DEFAULT_MAX_ARRAY elements, each
796#             generated from the items spec.
797#
798# Entry:      $spec - schema spec hashref.
799# Exit:       Returns an arrayref.
800# Side effects: None.
801# --------------------------------------------------
802sub _rand_array {
803
8
8
        my ($self, $spec) = @_;
804
805
8
12
        my $items = $spec->{items} // {};
806
8
8
        my $count = int(rand($DEFAULT_MAX_ARRAY + 1));
807
808
8
11
24
11
        return [ map { $self->_generate_for_schema($items) } 1 .. $count ];
809}
810
811# --------------------------------------------------
812# _rand_hash
813#
814# Purpose:    Generate a random hashref with values
815#             generated from the properties spec.
816#
817# Entry:      $spec - schema spec hashref.
818# Exit:       Returns a hashref.
819# Side effects: None.
820# --------------------------------------------------
821sub _rand_hash {
822
10
10
        my ($self, $spec) = @_;
823
824
10
15
        my $props = $spec->{properties} // {};
825
10
6
        my %h;
826
827
10
10
9
11
        for my $key (keys %{$props}) {
828
1
2
                $h{$key} = $self->_generate_for_schema($props->{$key});
829        }
830
831
10
16
        return \%h;
832}
833
834# --------------------------------------------------
835# _input_is_valid
836#
837# Purpose:    Return true if the input satisfies all
838#             constraints in the schema. Used to
839#             distinguish real bugs (die on valid
840#             input) from expected failures (die on
841#             invalid input).
842#
843# Entry:      $input - the value to validate.
844# Exit:       Returns 1 if valid, 0 if not.
845#             Returns 1 if no schema is available.
846# Side effects: None.
847# --------------------------------------------------
848sub _input_is_valid {
849
46
50
        my ($self, $input) = @_;
850
851
46
39
        my $spec = $self->{schema}{input};
852
853        # No schema means we cannot judge validity
854
46
78
        return 1 unless defined $spec && ref($spec);
855
856
45
66
        my $input_style = $self->{schema}{input_style} // '';
857
858
45
69
        if($input_style eq 'hash' || ref($input) eq 'HASH') {
859
2
3
                return $self->_validate_hash_input($input, $spec);
860        }
861
862
43
45
        return $self->_validate_value($input, $spec);
863}
864
865# --------------------------------------------------
866# _validate_hash_input
867#
868# Purpose:    Validate a hash-style input against the
869#             schema spec, checking each named field.
870#
871# Entry:      $input - hashref of named parameters.
872#             $spec  - schema spec hashref.
873# Exit:       Returns 1 if valid, 0 if not.
874# Side effects: None.
875# --------------------------------------------------
876sub _validate_hash_input {
877
8
17
        my ($self, $input, $spec) = @_;
878
879
8
9
        return 0 unless defined $input;
880
881
7
7
3
7
        for my $key (keys %{$spec}) {
882                # Skip internal metadata keys
883
8
10
                next if $key =~ /^_/;
884
885
6
6
                my $field_spec = $spec->{$key};
886
6
7
                next unless ref($field_spec) eq 'HASH';
887
888
6
6
                my $value = ref($input) eq 'HASH' ? $input->{$key} : undef;
889
890                # Required field missing is always invalid
891
6
11
                if(!defined($value) && !$field_spec->{optional}) {
892
2
4
                        return 0;
893                }
894
895
4
4
                next unless defined $value;
896
897
3
3
                return 0 unless $self->_validate_value($value, $field_spec);
898        }
899
900
4
7
        return 1;
901}
902
903# --------------------------------------------------
904# _validate_value
905#
906# Purpose:    Validate a single value against a schema
907#             type spec, checking type and constraints.
908#
909# Entry:      $value - the value to validate.
910#             $spec  - schema spec hashref.
911# Exit:       Returns 1 if valid, 0 if not.
912# Side effects: None.
913#
914# Notes:      Number validation accepts both integer
915#             and floating point forms including
916#             scientific notation. Type mismatch
917#             always returns 0.
918# --------------------------------------------------
919sub _validate_value {
920
83
96
        my ($self, $value, $spec) = @_;
921
922        # Undef is never valid unless optional — caller already checked optional
923
83
93
        return 0 unless defined $value;
924
925
81
79
        my $type = $spec->{type} // $TYPE_STRING;
926
927
81
97
        if($type eq $TYPE_INTEGER) {
928
19
74
                return 0 unless $value =~ /^-?\d+$/;
929
16
28
                return 0 if defined($spec->{min}) && $value < $spec->{min};
930
12
17
                return 0 if defined($spec->{max}) && $value > $spec->{max};
931        }
932        elsif($type eq $TYPE_NUMBER) {
933                # Accept integers, decimals, and scientific notation
934
5
49
                return 0 unless $value =~ /^-?(?:\d+\.?\d*|\.\d+)(?:[eE][+-]?\d+)?$/;
935
4
5
                return 0 if defined($spec->{min}) && $value < $spec->{min};
936
4
6
                return 0 if defined($spec->{max}) && $value > $spec->{max};
937        }
938        elsif($type eq $TYPE_STRING) {
939
44
242
                my $len = length($value);
940
44
67
                return 0 if defined($spec->{min}) && $len < $spec->{min};
941
42
74
                return 0 if defined($spec->{max}) && $len > $spec->{max};
942
38
45
                if(defined($spec->{matches})) {
943
2
6
                        (my $pat = $spec->{matches}) =~ s{^/(.+)/$}{$1};
944
2
45
                        return 0 unless $value =~ /$pat/;
945                }
946        }
947        elsif($type eq $TYPE_BOOLEAN) {
948
6
50
                return 0 unless $value =~ /^[01]$/;
949        }
950        elsif($type eq $TYPE_ARRAY || $type eq 'array') {
951
4
39
                return 0 unless ref($value) eq 'ARRAY';
952        }
953        elsif($type eq $TYPE_HASH || $type eq 'hash') {
954
3
43
                return 0 unless ref($value) eq 'HASH';
955        }
956
957
59
89
        return 1;
958}
959
960# --------------------------------------------------
961# _mutate
962#
963# Purpose:    Apply a random mutation to an input
964#             value, dispatching on its type.
965#
966# Entry:      $input - the value to mutate.
967# Exit:       Returns a mutated copy of the input.
968# Side effects: None.
969#
970# Notes:      Blessed references are passed through
971#             unchanged. Undef is replaced with a
972#             freshly generated random value.
973# --------------------------------------------------
974sub _mutate {
975
232
1160
        my ($self, $input) = @_;
976
977
232
178
        my $type = ref($input);
978
979
232
246
        if(!defined $input) {
980                # Replace undef with a fresh random value
981
2
3
                return $self->_generate_random();
982        }
983        elsif(!$type) {
984                # Dispatch scalar mutation based on apparent type
985
210
429
                if($input =~ /^-?\d+$/) {
986
25
28
                        return $self->_mutate_int($input);
987                } elsif($input =~ /^-?[\d.]+$/) {
988
2
3
                        return $self->_mutate_num($input);
989                } else {
990
183
180
                        return $self->_mutate_string($input);
991                }
992        }
993        elsif($type eq 'ARRAY') {
994
10
15
                return $self->_mutate_array($input);
995        }
996        elsif($type eq 'HASH') {
997
8
9
                return $self->_mutate_hash($input);
998        }
999
1000        # Blessed refs and other types pass through unchanged
1001
2
4
        return $input;
1002}
1003
1004# --------------------------------------------------
1005# _mutate_int
1006#
1007# Purpose:    Apply a random arithmetic mutation to
1008#             an integer value.
1009#
1010# Entry:      $n - the integer to mutate.
1011# Exit:       Returns a mutated integer.
1012# Side effects: None.
1013# --------------------------------------------------
1014sub _mutate_int {
1015
30
1090
        my ($self, $n) = @_;
1016
1017        my @ops = (
1018
5
16
                sub { $n + 1              },
1019
0
0
                sub { $n - 1              },
1020
5
20
                sub { $n * 2              },
1021
5
20
                sub { $n == 0 ? 1 : int($n / 2) },
1022
5
18
                sub { -$n                 },
1023
3
13
                sub { 0                   },
1024
6
6
                sub { $INT32_MAX          },
1025
1
5
                sub { $INT32_MIN          },
1026
30
129
        );
1027
1028
30
40
        return $ops[ int(rand(@ops)) ]->();
1029}
1030
1031# --------------------------------------------------
1032# _mutate_num
1033#
1034# Purpose:    Apply a random arithmetic mutation to
1035#             a floating point value.
1036#
1037# Entry:      $n - the number to mutate.
1038# Exit:       Returns a mutated number.
1039# Side effects: None.
1040# --------------------------------------------------
1041sub _mutate_num {
1042
5
540
        my ($self, $n) = @_;
1043
1044        my @ops = (
1045
0
0
                sub { $n + rand(10)        },
1046
2
9
                sub { $n - rand(10)        },
1047
1
3
                sub { $n * (1 + rand())    },
1048
2
6
                sub { 0                    },
1049
0
0
                sub { -$n                  },
1050
5
16
        );
1051
1052
5
8
        return $ops[ int(rand(@ops)) ]->();
1053}
1054
1055# --------------------------------------------------
1056# _mutate_string
1057#
1058# Purpose:    Apply a random structural mutation to
1059#             a string value — bit flip, insert,
1060#             delete, truncate, repeat, or replace
1061#             with an interesting known value.
1062#
1063# Entry:      $s - the string to mutate.
1064# Exit:       Returns a mutated string.
1065# Side effects: None.
1066# --------------------------------------------------
1067sub _mutate_string {
1068
186
439
        my ($self, $s) = @_;
1069
1070
186
126
        my $len = length($s);
1071
1072        my @ops = (
1073                # Bit flip a random character
1074                sub {
1075
19
19
                        return $s unless $len;
1076
19
20
                        my $pos  = int(rand($len));
1077
19
23
                        my $char = substr($s, $pos, 1);
1078
19
27
                        substr($s, $pos, 1) = chr(ord($char) ^ (1 << int(rand(8))));
1079
19
73
                        $s
1080                },
1081                # Insert a random byte
1082                sub {
1083
55
56
                        my $pos  = int(rand($len + 1));
1084
55
63
                        my $char = chr(int(rand(256)));
1085
55
63
                        substr($s, $pos, 0, $char);
1086
55
265
                        $s
1087                },
1088                # Delete a random character
1089                sub {
1090
14
23
                        return $s unless $len;
1091
13
18
                        substr($s, int(rand($len)), 1, '');
1092
13
69
                        $s
1093                },
1094                # Truncate at a random position
1095
12
54
                sub { substr($s, 0, int(rand($len + 1))) },
1096                # Double the string
1097
51
215
                sub { $s x 2 },
1098                # Replace with a known interesting string
1099                sub {
1100
35
60
                        my @interesting = (
1101                                '', ' ', "\0", "\n", "\t",
1102                                'a' x 256,
1103                                'null', 'undefined',
1104                                "'; DROP TABLE foo; --",
1105                                '<script>alert(1)</script>',
1106                        );
1107
35
164
                        $interesting[ int(rand(@interesting)) ]
1108                },
1109
186
722
        );
1110
1111
186
201
        return $ops[ int(rand(@ops)) ]->();
1112}
1113
1114# --------------------------------------------------
1115# _mutate_array
1116#
1117# Purpose:    Apply a random structural mutation to
1118#             an arrayref — mutate element, duplicate,
1119#             delete, or empty.
1120#
1121# Entry:      $arr - the arrayref to mutate.
1122# Exit:       Returns a mutated arrayref copy.
1123# Side effects: None.
1124# --------------------------------------------------
1125sub _mutate_array {
1126
12
15
        my ($self, $arr) = @_;
1127
1128
12
12
11
12
        my @copy = @{$arr};
1129
1130        my @ops = (
1131                # Mutate a random element
1132                sub {
1133
3
4
                        return [] unless @copy;
1134
3
4
                        my $i = int(rand(@copy));
1135
3
4
                        $copy[$i] = $self->_mutate($copy[$i]);
1136                        \@copy
1137
3
11
                },
1138                # Duplicate a random element
1139                sub {
1140
3
13
                        return \@copy unless @copy;
1141
1
2
                        my $i = int(rand(@copy));
1142
1
3
                        splice @copy, $i, 0, $copy[$i];
1143                        \@copy
1144
1
5
                },
1145                # Delete a random element
1146                sub {
1147
5
19
                        return \@copy unless @copy;
1148
4
7
                        splice @copy, int(rand(@copy)), 1;
1149                        \@copy
1150
4
17
                },
1151                # Return empty array
1152
1
4
                sub { [] },
1153
12
62
        );
1154
1155
12
19
        return $ops[ int(rand(@ops)) ]->();
1156}
1157
1158# --------------------------------------------------
1159# _mutate_hash
1160#
1161# Purpose:    Apply a random mutation to one value
1162#             in a hashref copy.
1163#
1164# Entry:      $h - the hashref to mutate.
1165# Exit:       Returns a mutated hashref copy.
1166# Side effects: None.
1167# --------------------------------------------------
1168sub _mutate_hash {
1169
11
31
        my ($self, $h) = @_;
1170
1171
11
11
10
11
        my %copy = %{$h};
1172
11
13
        my @keys = keys %copy;
1173
1174        # Return unchanged if hash is empty
1175
11
15
        return \%copy unless @keys;
1176
1177
4
6
        my $k = $keys[ int(rand(@keys)) ];
1178
4
7
        $copy{$k} = $self->_mutate($copy{$k});
1179
1180
4
6
        return \%copy;
1181}
1182
1183# --------------------------------------------------
1184# _seed_corpus
1185#
1186# Purpose:    Pre-populate the corpus with a small
1187#             set of randomly generated inputs to
1188#             give the fuzzing loop a starting point.
1189#
1190# Entry:      None beyond $self.
1191# Exit:       Returns nothing. Appends to $self->{corpus}.
1192# Side effects: Modifies $self->{corpus}.
1193# --------------------------------------------------
1194sub _seed_corpus {
1195
42
46
        my $self = $_[0];
1196
1197
42
121
        for (1 .. $SEED_CORPUS_SIZE) {
1198
210
210
224
217
                push @{ $self->{corpus} }, {
1199                        input    => $self->_generate_random(),
1200                        coverage => {},
1201                };
1202        }
1203}
1204
1205# --------------------------------------------------
1206# _build_report
1207#
1208# Purpose:    Construct the summary report hashref
1209#             returned by run().
1210#
1211# Entry:      None beyond $self.
1212# Exit:       Returns a report hashref.
1213# Side effects: None.
1214# --------------------------------------------------
1215sub _build_report {
1216
42
42
        my $self = $_[0];
1217
1218        return {
1219                total_iterations   => $self->{stats}{total},
1220                interesting_inputs => $self->{stats}{interesting},
1221
42
157
                corpus_size        => scalar @{ $self->{corpus} },
1222                branches_covered   => $self->{stats}{coverage},
1223                bugs_found         => $self->{stats}{bugs},
1224                bugs               => $self->{bugs},
1225
42
42
        };
1226}
1227
1228 - 1243
=head1 AUTHOR

Nigel Horne, C<< <njh at nigelhorne.com> >>

Portions of this module's initial design and documentation were created
with the assistance of AI.

=head1 LICENCE AND COPYRIGHT

Copyright 2026 Nigel Horne.

Usage is subject to GPL2 licence terms.
If you use it,
please let me know.

=cut
1244
12451;