File Coverage

File:bin/fuzz-harness-generator
Coverage:38.6%

linestmtbrancondsubtimecode
1#!/usr/bin/env perl
2
3
6
6
6
11727
6
96
use strict;
4
6
6
6
8
4
116
use warnings;
5
6
6
6
961
48194
9
use autodie qw(:all);
6
7
6
6
6
59214
9
130
use App::Test::Generator;
8
6
6
6
15
3
49
use File::Spec;
9
6
6
6
7
5
155
use File::Temp;
10
6
6
6
1886
27450
13
use Getopt::Long qw(GetOptions);
11
6
6
6
1579
110509
299
use Pod::Usage;
12
6
6
6
17
4
5004
use YAML::XS qw(LoadFile);
13
14 - 92
=head1 NAME

fuzz-harness-generator - Generate fuzzing + corpus-based test harnesses from test schemas

=head1 SYNOPSIS

  fuzz-harness-generator [-r] [-o output_file] input.yaml
  fuzz-harness-generator --dry-run input.yaml
  fuzz-harness-generator --replay-corpus schemas/corpus/ -o t/fuzz_replay.t
  fuzz-harness-generator --replay-corpus schemas/corpus/translate.json -o t/fuzz_replay.t

=head1 DESCRIPTION

This tool generates a test file that fuzzes and validates a target module's function or method,
using both randomized fuzz cases and a static corpus cases (Perl or YAML).

It can also generate regression test files from corpus JSON files previously
written by C<extract-schemas --fuzz>, using C<--replay-corpus>.

A starter C<input.yaml> can be created using C<extract-schemas> which is also in this package.

=head1 OPTIONS

=over 4

=item B<--help>

Show this help.

=item B<--input>

The input configuration file

=item B<--output>

The (optional) output file.

=item B<--dry-run>

Validate the input configuration and schema extraction without writing any output files or running tests.

=item B<--run>

Call C<prove> on the output file.

C<fuzz-harness-generator -r t/conf/data_text_append.conf> will, therefore, dynamically create and run tests on the C<append> method of L<Data::Text>

=item B<--replay-corpus> PATH

Instead of generating a fuzz harness, generate a regression test file from
one or more corpus JSON files previously written by C<extract-schemas --fuzz>.

PATH may be either:

=over 4

=item * A single corpus file, e.g. C<schemas/corpus/translate.json>

=item * A directory, e.g. C<schemas/corpus/> — all C<*.json> files in that
directory will be included

=back

The generated test file contains one failing test per bug recorded in the
corpus. Each test calls the target method with the exact input that previously
caused a crash and expects it B<not> to die. Tests will be red until the
underlying bug is fixed, at which point they go green and stay green —
acting as permanent regression tests.

Only corpus entries with recorded bugs are included. Clean corpus entries
(inputs that did not cause a bug) are ignored.

=item B<--version>

Prints the version of L<App::Test::Generator>

=back

=cut
93
94
6
430539
my $infile;
95my $outfile;
96
6
0
my $help;
97
6
0
my $run;
98
6
0
my $verbose;
99
6
0
my $version;
100
6
0
my $dry_run;
101
6
0
my $replay_corpus;
102
103
6
13
Getopt::Long::Configure('bundling');
104
105
6
108
GetOptions(
106        'help|h'          => \$help,
107        'input|i=s'       => \$infile,
108        'dry-run|n'       => \$dry_run,
109        'output|o=s'      => \$outfile,
110        'run|r'           => \$run,
111        'verbose|v'       => \$verbose,
112        'version|V'       => \$version,
113        'replay-corpus|R=s' => \$replay_corpus,
114) or pod2usage(2);
115
116
6
3007
pod2usage(-exitval => 0, -verbose => 1) if($help);
117
118
5
6
if($version) {
119
1
5
        print $App::Test::Generator::VERSION, "\n";
120
1
28
        exit 0;
121}
122
123# ---------------------------------------------------------------------------
124# --replay-corpus mode: generate a regression .t from corpus bug entries
125# ---------------------------------------------------------------------------
126
127
4
4
if($replay_corpus) {
128
0
0
        pod2usage('--replay-corpus cannot be combined with --dry-run') if $dry_run;
129
0
0
        pod2usage('--replay-corpus cannot be combined with --input')   if $infile;
130
131
0
0
        my @corpus_files = _collect_corpus_files($replay_corpus);
132
0
0
        die "No corpus JSON files found at: $replay_corpus\n" unless @corpus_files;
133
134
0
0
        my $tap = _generate_replay_tap(@corpus_files);
135
136
0
0
        if($outfile) {
137
0
0
                open(my $fh, '>', $outfile)
138                        or die "Cannot write to $outfile: $!";
139
0
0
                print $fh $tap;
140
0
0
                close $fh;
141
0
0
                chmod 0755, $outfile;
142
0
0
                print "Replay test written to: $outfile\n";
143
0
0
                if($run) {
144
0
0
                        exit system('prove', '-l', $outfile) >> 8;
145                }
146        } else {
147
0
0
                print $tap;
148        }
149
0
0
        exit 0;
150}
151
152
4
12
if($infile && @ARGV) {
153
0
0
        pod2usage('Specify input file either as argument or via --input, not both');
154}
155
156
4
4
if($infile) {
157
4
4
4
8
        my $schema = eval { LoadFile($infile) };
158
4
512
        if($@) {
159
1
3
                die "Cannot parse '$infile' as YAML: $@";
160        }
161
3
5
        unless(ref($schema) eq 'HASH') {
162
0
0
                die "Input file '$infile' does not contain a YAML hash";
163        }
164
3
13
        unless($schema->{function}) {
165
0
0
                die "Input file '$infile' is missing required 'function' key";
166        }
167}
168
169
3
6
$infile ||= shift @ARGV or pod2usage('No config file given');
170
171
3
5
if($dry_run && $run) {
172
0
0
        pod2usage('--dry-run cannot be used with --run');
173}
174
175
3
5
if($dry_run && $outfile) {
176
1
7
        warn '--dry-run specified; --output will be ignored';
177}
178
179
3
166
if($verbose) {
180
0
0
        $ENV{'TEST_VERBOSE'} = 1;
181}
182
183
3
3
if($run && !$outfile) {
184
0
0
        my ($fh, $tmp) = File::Temp::tempfile();
185
0
0
        close $fh;
186
187
0
0
        App::Test::Generator->generate($infile, $tmp);
188
189
0
0
        exit system('prove', '-l', $tmp) >> 8;
190}
191
192
3
10
if($dry_run) {
193
2
4
        my ($fh, $tmp) = File::Temp::tempfile();
194
2
496
        close $fh;
195
196        eval {
197
2
9
                App::Test::Generator->generate($infile, $tmp);
198
2
84
                1;
199
2
806
        } or do {
200
0
0
                die "Dry-run failed for $infile: $@";
201        };
202
203
2
4
        unlink $tmp;
204
2
684
        print "Dry-run OK: $infile parsed and validated successfully\n";
205
2
66
        exit 0;
206} elsif($outfile && -e $outfile && !$run) {
207
0
0
        warn "Overwriting existing file: $outfile";
208}
209
210
1
4
App::Test::Generator->generate($infile, $outfile);
211
212
1
38
if($outfile) {
213
1
1
        chmod 0755, $outfile if($outfile =~ /\.(pl|cgi)$/);
214
1
1
        if($run) {
215                # Use list form to avoid shell interpolation of $outfile
216
0
0
                system('prove', '-l', $outfile);
217        }
218}
219
220
1
25
exit 0;
221
222# ---------------------------------------------------------------------------
223# Helpers for --replay-corpus
224# ---------------------------------------------------------------------------
225
226# --------------------------------------------------
227# _collect_corpus_files
228#
229# Purpose:    Collect the list of corpus JSON files
230#             to process for --replay-corpus mode.
231#             Accepts either a single file path or
232#             a directory, returning all *.json files
233#             found in the directory case.
234#
235# Entry:      $path - filesystem path to either a
236#                     single .json file or a directory
237#                     containing .json files.
238#
239# Exit:       Returns a sorted list of file paths.
240#             Returns an empty list if the path does
241#             not exist or contains no .json files.
242#
243# Side effects: None.
244#
245# Notes:      Directory globbing matches only *.json
246#             files at the top level of the directory;
247#             subdirectories are not recursed into.
248# --------------------------------------------------
249sub _collect_corpus_files {
250
0
        my ($path) = @_;
251
252
0
        if(-f $path) {
253
0
                return ($path);
254        } elsif(-d $path) {
255
0
                my @files = glob(File::Spec->catfile($path, '*.json'));
256
0
                return sort @files;
257        }
258
259
0
        return ();
260}
261
262# --------------------------------------------------
263# _generate_replay_tap
264#
265# Purpose:    Read one or more corpus JSON files and
266#             produce a complete .t file as a string.
267#             Each bug entry in the corpus becomes
268#             one lives_ok test that calls the target
269#             method with the exact input that
270#             previously caused a crash, asserting
271#             that it no longer dies.
272#
273# Entry:      @corpus_files - list of paths to corpus
274#                             JSON files as returned
275#                             by _collect_corpus_files.
276#
277# Exit:       Returns the complete .t file content as
278#             a string. Never returns undef.
279#             Returns a skip_all plan if no bugs are
280#             found across all corpus files.
281#
282# Side effects: Reads corpus JSON files from disk.
283#               Attempts to load JSON::MaybeXS or
284#               JSON via block eval.
285#
286# Notes:      Corpus files that cannot be parsed are
287#             skipped with a warning rather than
288#             aborting the entire run.
289#             Clean corpus entries (those without
290#             recorded bugs) are silently ignored —
291#             only entries with a 'bugs' array are
292#             processed.
293#             The module name for each test is
294#             inferred from the YAML schema file
295#             alongside the corpus file via
296#             _infer_module_from_schema. Falls back
297#             to 'UNKNOWN::Module' if not found.
298# --------------------------------------------------
299sub _generate_replay_tap {
300
0
        my (@corpus_files) = @_;
301
302        # Prefer JSON::MaybeXS for correctness; fall back to JSON
303
0
        my $json_module;
304
0
        for my $mod (qw(JSON::MaybeXS JSON)) {
305
0
0
0
                eval { require $mod; 1 } and $json_module = $mod and last;
306        }
307
0
        die "No JSON module available; install JSON or JSON::MaybeXS\n"
308                unless $json_module;
309
310        # Collect all bugs across all corpus files into a flat list
311
0
        my @tests;
312
313
0
        for my $file (@corpus_files) {
314
0
                open(my $fh, '<', $file)
315                        or die "Cannot read $file: $!";
316
0
                my $data = eval {
317
0
0
0
                        $json_module->new->decode(do { local $/; <$fh> })
318                };
319
0
                close $fh;
320
321
0
                if($@) {
322
0
                        warn "Skipping $file: could not parse JSON: $@\n";
323
0
                        next;
324                }
325
326
0
                my $bugs = $data->{'bugs'} // [];
327
0
0
                next unless @{$bugs};
328
329                # Derive method name from filename: translate.json -> translate
330
0
                my (undef, undef, $fname) = File::Spec->splitpath($file);
331
0
                (my $method = $fname) =~ s/\.json$//;
332
333                # Look up the module name from the companion schema file;
334                # fall back to a placeholder if the schema cannot be found
335
0
                my $module = _infer_module_from_schema($file, $method)
336                        // 'UNKNOWN::Module';
337
338
0
0
                for my $bug (@{$bugs}) {
339                        push @tests, {
340                                module => $module,
341                                method => $method,
342                                input  => $bug->{'input'},
343
0
                                error  => $bug->{'error'},
344                                file   => $file,
345                        };
346                }
347        }
348
349        # Build the .t header — include Test::Exception up front since
350        # lives_ok is always needed when there are tests to emit
351
0
        my $t = <<'HEADER';
352#!/usr/bin/env perl
353# Auto-generated by fuzz-harness-generator --replay-corpus
354# DO NOT EDIT - regenerate from corpus files instead
355use strict;
356use warnings;
357use Test::More;
358use Test::Exception;
359HEADER
360
361
0
        my $test_count = scalar @tests;
362
363
0
        if($test_count == 0) {
364
0
                $t .= "\nplan skip_all => 'No bugs recorded in corpus files';\n";
365
0
                return $t;
366        }
367
368        # Emit one use statement per unique module (excluding the placeholder)
369
0
0
        my %modules = map { $_->{'module'} => 1 } @tests;
370
0
        for my $mod (sort keys %modules) {
371
0
                next if $mod eq 'UNKNOWN::Module';
372
0
                $t .= "use $mod;\n";
373        }
374
375
0
        $t .= "\nplan tests => $test_count;\n\n";
376
377
0
        for my $i (0 .. $#tests) {
378
0
                my $test  = $tests[$i];
379
0
                my $n     = $i + 1;
380
0
                my $input = _format_input($test->{'input'});
381
0
                my $label = "$test->{'method'} does not die on input from $test->{'file'}";
382
383                # Flatten and escape the original error for use as a comment
384
0
                (my $orig_error = $test->{'error'} // '') =~ s/\n/ /g;
385
0
                $orig_error =~ s/'/\\'/g;
386
387
0
                $t .= "# Corpus bug: $orig_error\n";
388
0
                $t .= "lives_ok { $test->{'module'}\->$test->{'method'}($input) }\n";
389
0
                $t .= "    '$label';\n\n";
390        }
391
392
0
        return $t;
393}
394
395# --------------------------------------------------
396# _format_input
397#
398# Purpose:    Format a scalar input value as a Perl
399#             literal string suitable for embedding
400#             directly in generated test source code.
401#
402# Entry:      $input - the input value to format.
403#                      May be undef, a numeric string,
404#                      or an arbitrary string.
405#
406# Exit:       Returns a Perl literal string:
407#               'undef'     if $input is undef
408#               bare number if $input looks numeric
409#               single-quoted string otherwise, with
410#               backslashes and single quotes escaped.
411#
412# Side effects: None.
413#
414# Notes:      Only scalar inputs are handled — corpus
415#             entries with arrayref or hashref inputs
416#             are not currently supported and will be
417#             formatted as a single-quoted string of
418#             the stringified reference, which will
419#             not reproduce the original input.
420# --------------------------------------------------
421sub _format_input {
422
0
        my ($input) = @_;
423
424
0
        return 'undef' unless defined $input;
425
426        # Emit bare numeric literals without quoting
427
0
        return $input if $input =~ /^-?(?:\d+\.?\d*|\.\d+)$/;
428
429        # Escape backslashes first, then single quotes, to avoid
430        # double-escaping when both appear in the same string
431
0
        (my $escaped = $input) =~ s/\\/\\\\/g;
432
0
        $escaped =~ s/'/\\'/g;
433
434
0
        return "'$escaped'";
435}
436
437# --------------------------------------------------
438# _infer_module_from_schema
439#
440# Purpose:    Attempt to determine the Perl module
441#             name for a given corpus method by
442#             locating and reading the companion YAML
443#             schema file that sits alongside the
444#             corpus directory.
445#
446# Entry:      $corpus_file - path to the corpus JSON
447#                            file, e.g.
448#                            schemas/corpus/translate.json
449#             $method      - the method name derived
450#                            from the corpus filename,
451#                            e.g. 'translate'
452#
453# Exit:       Returns the module name string if found,
454#             or undef if no companion schema file
455#             exists or the schema contains no
456#             'module:' line.
457#
458# Side effects: Reads schema files from disk.
459#
460# Notes:      The corpus is expected to live one
461#             directory below the schemas directory,
462#             e.g. schemas/corpus/ alongside
463#             schemas/translate.yaml. This function
464#             walks up one level from the corpus
465#             directory to find the schema.
466#             Both .yaml and .yml extensions are
467#             tried, in that order.
468# --------------------------------------------------
469sub _infer_module_from_schema {
470
0
        my ($corpus_file, $method) = @_;
471
472
0
        my (undef, $corpus_dir) = File::Spec->splitpath($corpus_file);
473
474        # Walk up one directory from corpus/ to reach the schemas/ dir
475
0
        my $schema_dir = File::Spec->catdir($corpus_dir, File::Spec->updir());
476
477
0
        for my $ext (qw(yaml yml)) {
478
0
                my $schema_file = File::Spec->catfile($schema_dir, "$method.$ext");
479
0
                next unless -f $schema_file;
480
481
0
                open(my $fh, '<', $schema_file) or next;
482
0
                while(<$fh>) {
483
0
                        if(/^module:\s*(\S+)/) {
484
0
                                close $fh;
485
0
                                return $1;
486                        }
487                }
488
0
                close $fh;
489        }
490
491
0
        return undef;
492}
493