File Coverage

File:blib/lib/App/Test/Generator/LCSAJ.pm
Coverage:96.7%

linestmtbrancondsubtimecode
1package App::Test::Generator::LCSAJ;
2
3
6
6
6
145201
3
73
use strict;
4
6
6
6
12
4
112
use warnings;
5
6
6
6
10
4
119
use Carp qw(croak);
6
6
6
6
9
4
104
use File::Basename  qw(basename);
7
6
6
6
8
5
94
use File::Path qw(make_path);
8
6
6
6
7
5
35
use File::Spec;
9
6
6
6
6
5
115
use JSON::MaybeXS;
10
6
6
6
432
132214
59
use PPI;
11
6
6
6
425
3200
2881
use Readonly;
12
13# --------------------------------------------------
14# Default output directory for LCSAJ JSON files
15# --------------------------------------------------
16Readonly my $DEFAULT_OUT_DIR => 'lcsaj';
17
18our $VERSION = '0.41';
19
20 - 119
=head1 NAME

App::Test::Generator::LCSAJ - Static LCSAJ extraction for Perl

=head1 SYNOPSIS

    use App::Test::Generator::LCSAJ;

    my $paths = App::Test::Generator::LCSAJ->generate(
        'lib/MyModule.pm',
        'cover_html/mutation_html/lib',
    );

=head1 DESCRIPTION

Extracts Linear Code Sequence and Jump (LCSAJ) paths from Perl source
files using static analysis via L<PPI>. Each LCSAJ path describes a
linear sequence of statements followed by a jump to another sequence,
forming the basis for TER3 (third-level Test Effectiveness Ratio)
measurement.

The extracted paths are written as JSON to a C<.lcsaj.json> file under
the output directory, where they are consumed by
C<bin/test-generator-index> for dashboard display and TER3
calculation.

=head1 VERSION

Version 0.41

=head2 generate

Extract LCSAJ paths from all subroutines in a Perl source file and
write the results to a JSON file.

    my $paths = App::Test::Generator::LCSAJ->generate(
        'lib/MyModule.pm',
        'cover_html/mutation_html/lib',
    );

    printf "Extracted %d LCSAJ paths\n", scalar @{$paths};

=head3 Arguments

=over 4

=item * C<$file>

Path to the Perl source file to analyse.

=item * C<$out_dir>

Directory under which the C<.lcsaj.json> file will be written.
Optional — defaults to C<lcsaj>.

=back

=head3 Returns

An arrayref of LCSAJ path hashrefs, each with keys C<start>, C<end>,
and C<target> representing the first line, last line, and jump target
line of the path respectively.

=head3 Side effects

Creates C<$out_dir> if it does not exist. Writes a C<.lcsaj.json>
file to C<$out_dir>.

=head3 Notes

Only named subroutines are analysed. Anonymous subs and file-level
code are not included. The control flow graph is built using a
simplified model that treats branching compound statements as
split points — complex nested structures may not be fully represented.

=head3 API specification

=head4 input

    {
        class   => { type => SCALAR },
        file    => { type => SCALAR },
        out_dir => { type => SCALAR, optional => 1 },
    }

=head4 output

    {
        type     => ARRAYREF,
        elements => {
            type => HASHREF,
            keys => {
                start  => { type => SCALAR },
                end    => { type => SCALAR },
                target => { type => SCALAR },
            },
        },
    }

=cut
120
121sub generate {
122
42
346188
        my ($class, $file, $out_dir) = @_;
123
124        # Apply default output directory if not supplied
125
42
190
        $out_dir //= $DEFAULT_OUT_DIR;
126
127        # Parse the source file — croak early with a clear message
128
42
401
        my $doc = PPI::Document->new($file) or croak "Cannot parse $file";
129
130        # Find all named subroutines in the document
131
39
137099
        my $subs = $doc->find('PPI::Statement::Sub') || [];
132
133
39
29380
        my @all_paths;
134
135
39
39
43
64
        for my $sub (@{$subs}) {
136                # Build a simplified control flow graph for this sub
137
63
105
                my $blocks = _build_cfg($sub);
138
139                # Convert the CFG to LCSAJ path records
140
63
81
                my $paths  = _cfg_to_lcsaj($blocks);
141
63
63
36
102
                push @all_paths, @{$paths};
142        }
143
144        # Write all extracted paths to a JSON file in the output directory
145
39
85
        _save_lcsaj($file, $out_dir, \@all_paths);
146
147
38
110
        return \@all_paths;
148}
149
150# --------------------------------------------------
151# _build_cfg
152#
153# Purpose:    Build a simplified control flow graph
154#             (CFG) for a single subroutine. Each
155#             block in the CFG represents a linear
156#             sequence of statements terminated by a
157#             branch or the end of the sub.
158#
159# Entry:      $sub - a PPI::Statement::Sub node.
160#
161# Exit:       Returns an arrayref of block hashrefs,
162#             each with keys id, lines, and edges.
163#             Returns an empty arrayref if the sub
164#             has no body.
165#
166# Side effects: None.
167#
168# Notes:      Only compound statements (if, unless,
169#             while, for, foreach) are treated as
170#             branch points. Complex nested structures
171#             may not be fully represented.
172# --------------------------------------------------
173sub _build_cfg {
174
74
39583
        my $sub = $_[0];
175
176        # Return empty graph if the sub has no body block
177
74
170
        my $block = $sub->block() or return [];
178
179
73
697
        my @statements = $block->schildren();
180
73
507
        my @blocks;
181
73
64
        my $id = 1;
182
183        # The frontier holds every block currently accumulating statement
184        # lines. A branch forks the whole frontier into a true and a false
185        # successor — every frontier member gets an edge to both, so
186        # subsequent statements (appended to the new frontier) are recorded
187        # against both arms, not just the true one. This keeps the false
188        # arm of every branch populated with real lines/edges instead of
189        # being silently dropped as an empty leaf.
190
73
99
        my $first = _new_block($id);
191
73
55
        push @blocks, $first;
192
73
94
        my @frontier = ($first);
193
194
73
81
        for my $stmt (@statements) {
195
147
247
                my $line = $stmt->line_number;
196
147
173
32601
165
                push @{ $_->{lines} }, $line for @frontier;
197
198                # Branch points fork the frontier into true/false successors
199
147
123
                if(_is_branch($stmt)) {
200
30
47
                        my $true_block  = _new_block(++$id);
201
30
30
                        my $false_block = _new_block(++$id);
202
30
41
                        _connect_blocks($_, $true_block)  for @frontier;
203
30
44
                        _connect_blocks($_, $false_block) for @frontier;
204
205
30
28
                        push @blocks, $true_block, $false_block;
206
30
34
                        @frontier = ($true_block, $false_block);
207                }
208        }
209
210
73
75
        return \@blocks;
211}
212
213# --------------------------------------------------
214# _new_block
215#
216# Purpose:    Construct a new CFG block hashref with
217#             an id, empty lines list, and empty
218#             edges list.
219#
220# Entry:      $id - integer block identifier.
221# Exit:       Returns a hashref.
222# Side effects: None.
223# --------------------------------------------------
224sub _new_block {
225
150
14502
        my $id = $_[0];
226
227
150
288
        return { id => $id, lines => [], edges => [] };
228}
229
230# --------------------------------------------------
231# _connect_blocks
232#
233# Purpose:    Add a directed edge from one CFG block
234#             to another by recording the target
235#             block's id in the source block's edges.
236#
237# Entry:      $from - source block hashref.
238#             $to   - target block hashref.
239# Exit:       Modifies $from->{edges} in place.
240# Side effects: Modifies $from.
241# --------------------------------------------------
242sub _connect_blocks {
243
66
52
        my ($from, $to) = @_;
244
66
66
40
74
        push @{ $from->{edges} }, $to->{id};
245}
246
247# --------------------------------------------------
248# _is_branch
249#
250# Purpose:    Return true if a PPI statement node
251#             represents a branching control structure
252#             that should split the current CFG block.
253#
254# Entry:      $stmt - a PPI::Statement node.
255# Exit:       Returns 1 if the statement is a branch,
256#             0 otherwise.
257# Side effects: None.
258# Notes:      Only compound statement types are
259#             considered — simple expressions are not.
260# --------------------------------------------------
261sub _is_branch {
262
154
18620
        my $stmt = $_[0];
263
264        # Only compound statements can be branch points
265
154
301
        return 0 unless $stmt->isa('PPI::Statement::Compound');
266
267
35
49
        my $type = $stmt->type // '';
268
35
627
        return $type =~ /^(?:if|unless|while|for|foreach)$/ ? 1 : 0;
269}
270
271# --------------------------------------------------
272# _cfg_to_lcsaj
273#
274# Purpose:    Convert a CFG block list into a list of
275#             LCSAJ path records. Each path represents
276#             one linear sequence from a block's first
277#             line to its last line, with a jump to
278#             the first line of the target block.
279#
280# Entry:      $blocks - arrayref of CFG block hashrefs
281#             as produced by _build_cfg.
282#
283# Exit:       Returns an arrayref of path hashrefs,
284#             each with keys start, end, and target.
285#
286# Side effects: None.
287#
288# Notes:      Blocks with no lines (empty blocks) are
289#             excluded from the id-to-line mapping and
290#             their target lines default to 0.
291# --------------------------------------------------
292sub _cfg_to_lcsaj {
293
75
10938
        my $blocks = $_[0];
294
295        # Build a lookup from block id to its first line number
296
118
234
        my %id2line = map { $_->{id} => $_->{lines}[0] }
297
75
132
132
75
43
72
111
65
                grep { @{ $_->{lines} } } @{$blocks};
298
299
75
60
        my @paths;
300
301
75
75
44
108
        for my $b (@{$blocks}) {
302
132
132
91
129
                next unless @{ $b->{edges} };        # Skip blocks with no outgoing edges — they are leaf nodes
303
33
33
21
36
                next unless @{ $b->{lines} };   # skip empty blocks — avoids null-bounds paths
304
305
32
22
                my $start = $b->{lines}[0];
306
32
19
                my $end   = $b->{lines}[-1];
307
308                # Emit one path record per outgoing edge
309
32
32
17
21
                for my $target_id (@{ $b->{edges} }) {
310                        push @paths, {
311                                start  => $start,
312                                end    => $end,
313
60
146
                                target => $id2line{$target_id} // 0,
314                        };
315                }
316        }
317
318
75
99
        return \@paths;
319}
320
321# --------------------------------------------------
322# _save_lcsaj
323#
324# Purpose:    Serialise LCSAJ path records to a JSON
325#             file in the output directory. The
326#             filename is derived from the source
327#             file's basename.
328#
329# Entry:      $file  - path to the source .pm file.
330#             $dir   - output directory path.
331#             $paths - arrayref of path hashrefs.
332#
333# Exit:       Returns nothing. Writes a .lcsaj.json
334#             file to $dir.
335#
336# Side effects: Creates $dir if it does not exist.
337#               Writes a file to $dir.
338#
339# Notes:      Uses File::Basename::basename for
340#             portability across operating systems.
341# --------------------------------------------------
342sub _save_lcsaj {
343
45
11610
        my ($file, $dir, $paths) = @_;
344
345        # Derive the module-relative path (strip leading .../lib/ prefix)
346
45
34
        my $rel = $file;
347
348        # Strip leading path up to and including 'lib/' — handles both
349        # absolute paths (/home/runner/.../lib/App/...) and relative (lib/App/...)
350        # Handle both Unix / and Windows \ separators
351
45
151
        $rel =~ s{^(?:.*[/\\])?lib[/\\]}{};
352
353        # If $file had no 'lib/' segment (e.g. a File::Temp tempfile), the
354        # substitution above is a no-op and $rel is still $file's full
355        # absolute path. Joining that onto $dir below would embed a second
356        # absolute path — and, on Windows, a second drive letter — inside
357        # the constructed directory name, which File::Path::make_path
358        # rejects outright ("Invalid argument"). Fall back to the basename.
359
45
415
        $rel = basename($rel) if File::Spec->file_name_is_absolute($rel);
360
361
45
945
        my $base = basename($rel);
362
363        # Mirror the directory structure expected by _lcsaj_coverage_for_file:
364        # $dir / $rel.lcsaj / $base.lcsaj.json
365
45
186
        my $subdir  = File::Spec->catfile($dir, "$rel.lcsaj");
366
367        # Create the output directory if it does not exist
368
45
4594
        make_path($subdir) unless -d $subdir;
369
44
214
        my $out = File::Spec->catfile($subdir, "$base.lcsaj.json");
370
371        # Remove degenerate paths (null bounds) and exact duplicates
372        # before serialising — guards against empty CFG blocks producing
373        # null start/end values, and branch splits creating identical records
374
44
44
        my %seen;
375        my @clean = grep {
376                defined $_->{start} && defined $_->{end}
377
57
240
                && !$seen{"$_->{start}:$_->{end}:$_->{target}"}++
378
44
44
30
41
        } @{$paths};
379
380
44
1264
        open my $fh, '>', $out or croak "Cannot write LCSAJ output to $out: $!";
381
44
237
        print $fh encode_json(\@clean);
382
44
713
        close $fh;
383}
384
3851;