| File: | blib/lib/App/Test/Generator/LCSAJ.pm |
| Coverage: | 96.8% |
| line | stmt | bran | cond | sub | time | code |
|---|---|---|---|---|---|---|
| 1 | package App::Test::Generator::LCSAJ; | |||||
| 2 | ||||||
| 3 | 5 5 5 | 146524 3 61 | use strict; | |||
| 4 | 5 5 5 | 9 2 91 | use warnings; | |||
| 5 | 5 5 5 | 9 3 135 | use Carp qw(croak); | |||
| 6 | 5 5 5 | 9 7 130 | use File::Basename qw(basename); | |||
| 7 | 5 5 5 | 14 3 75 | use File::Path qw(make_path); | |||
| 8 | 5 5 5 | 10 0 40 | use File::Spec; | |||
| 9 | 5 5 5 | 8 5 135 | use JSON::MaybeXS; | |||
| 10 | 5 5 5 | 406 132772 56 | use PPI; | |||
| 11 | 5 5 5 | 388 3459 2388 | use Readonly; | |||
| 12 | ||||||
| 13 | # -------------------------------------------------- | |||||
| 14 | # Default output directory for LCSAJ JSON files | |||||
| 15 | # -------------------------------------------------- | |||||
| 16 | Readonly my $DEFAULT_OUT_DIR => 'lcsaj'; | |||||
| 17 | ||||||
| 18 | our $VERSION = '0.36'; | |||||
| 19 | ||||||
| 20 - 119 | =head1 NAME
App::Test::Generator::LCSAJ - Static LCSAJ extraction for Perl
=head1 SYNOPSIS
use App::Test::Generator::LCSAJ;
my $paths = App::Test::Generator::LCSAJ->generate(
'lib/MyModule.pm',
'cover_html/mutation_html/lib',
);
=head1 DESCRIPTION
Extracts Linear Code Sequence and Jump (LCSAJ) paths from Perl source
files using static analysis via L<PPI>. Each LCSAJ path describes a
linear sequence of statements followed by a jump to another sequence,
forming the basis for TER3 (third-level Test Effectiveness Ratio)
measurement.
The extracted paths are written as JSON to a C<.lcsaj.json> file under
the output directory, where they are consumed by
C<bin/test-generator-index> for dashboard display and TER3
calculation.
=head1 VERSION
Version 0.36
=head2 generate
Extract LCSAJ paths from all subroutines in a Perl source file and
write the results to a JSON file.
my $paths = App::Test::Generator::LCSAJ->generate(
'lib/MyModule.pm',
'cover_html/mutation_html/lib',
);
printf "Extracted %d LCSAJ paths\n", scalar @{$paths};
=head3 Arguments
=over 4
=item * C<$file>
Path to the Perl source file to analyse. Required.
=item * C<$out_dir>
Directory under which the C<.lcsaj.json> file will be written.
Optional â defaults to C<lcsaj>.
=back
=head3 Returns
An arrayref of LCSAJ path hashrefs, each with keys C<start>, C<end>,
and C<target> representing the first line, last line, and jump target
line of the path respectively.
=head3 Side effects
Creates C<$out_dir> if it does not exist. Writes a C<.lcsaj.json>
file to C<$out_dir>.
=head3 Notes
Only named subroutines are analysed. Anonymous subs and file-level
code are not included. The control flow graph is built using a
simplified model that treats branching compound statements as
split points â complex nested structures may not be fully represented.
=head3 API specification
=head4 input
{
class => { type => SCALAR },
file => { type => SCALAR },
out_dir => { type => SCALAR, optional => 1 },
}
=head4 output
{
type => ARRAYREF,
elements => {
type => HASHREF,
keys => {
start => { type => SCALAR },
end => { type => SCALAR },
target => { type => SCALAR },
},
},
}
=cut | |||||
| 120 | ||||||
| 121 | sub generate { | |||||
| 122 | 40 | 340773 | my ($class, $file, $out_dir) = @_; | |||
| 123 | ||||||
| 124 | # Apply default output directory if not supplied | |||||
| 125 | 40 | 200 | $out_dir //= $DEFAULT_OUT_DIR; | |||
| 126 | ||||||
| 127 | # Parse the source file â croak early with a clear message | |||||
| 128 | 40 | 392 | my $doc = PPI::Document->new($file) or croak "Cannot parse $file"; | |||
| 129 | ||||||
| 130 | # Find all named subroutines in the document | |||||
| 131 | 38 | 135892 | my $subs = $doc->find('PPI::Statement::Sub') || []; | |||
| 132 | ||||||
| 133 | 38 | 29167 | my @all_paths; | |||
| 134 | ||||||
| 135 | 38 38 | 30 79 | for my $sub (@{$subs}) { | |||
| 136 | # Build a simplified control flow graph for this sub | |||||
| 137 | 62 | 97 | my $blocks = _build_cfg($sub); | |||
| 138 | ||||||
| 139 | # Convert the CFG to LCSAJ path records | |||||
| 140 | 62 | 73 | my $paths = _cfg_to_lcsaj($blocks); | |||
| 141 | 62 62 | 50 93 | push @all_paths, @{$paths}; | |||
| 142 | } | |||||
| 143 | ||||||
| 144 | # Write all extracted paths to a JSON file in the output directory | |||||
| 145 | 38 | 86 | _save_lcsaj($file, $out_dir, \@all_paths); | |||
| 146 | ||||||
| 147 | 37 | 92 | return \@all_paths; | |||
| 148 | } | |||||
| 149 | ||||||
| 150 | # -------------------------------------------------- | |||||
| 151 | # _build_cfg | |||||
| 152 | # | |||||
| 153 | # Purpose: Build a simplified control flow graph | |||||
| 154 | # (CFG) for a single subroutine. Each | |||||
| 155 | # block in the CFG represents a linear | |||||
| 156 | # sequence of statements terminated by a | |||||
| 157 | # branch or the end of the sub. | |||||
| 158 | # | |||||
| 159 | # Entry: $sub - a PPI::Statement::Sub node. | |||||
| 160 | # | |||||
| 161 | # Exit: Returns an arrayref of block hashrefs, | |||||
| 162 | # each with keys id, lines, and edges. | |||||
| 163 | # Returns an empty arrayref if the sub | |||||
| 164 | # has no body. | |||||
| 165 | # | |||||
| 166 | # Side effects: None. | |||||
| 167 | # | |||||
| 168 | # Notes: Only compound statements (if, unless, | |||||
| 169 | # while, for, foreach) are treated as | |||||
| 170 | # branch points. Complex nested structures | |||||
| 171 | # may not be fully represented. | |||||
| 172 | # -------------------------------------------------- | |||||
| 173 | sub _build_cfg { | |||||
| 174 | 72 | 36890 | my $sub = $_[0]; | |||
| 175 | ||||||
| 176 | # Return empty graph if the sub has no body block | |||||
| 177 | 72 | 198 | my $block = $sub->block() or return []; | |||
| 178 | ||||||
| 179 | 71 | 673 | my @statements = $block->schildren(); | |||
| 180 | 71 | 485 | my @blocks; | |||
| 181 | 71 | 66 | my $id = 1; | |||
| 182 | 71 | 73 | my $current = _new_block($id); | |||
| 183 | ||||||
| 184 | 71 | 81 | for my $stmt (@statements) { | |||
| 185 | 142 | 251 | my $line = $stmt->line_number; | |||
| 186 | 142 142 | 32465 125 | push @{ $current->{lines} }, $line; | |||
| 187 | ||||||
| 188 | # Branch points split the current block and start two new ones | |||||
| 189 | 142 | 119 | if(_is_branch($stmt)) { | |||
| 190 | 29 | 28 | push @blocks, $current; | |||
| 191 | ||||||
| 192 | # Create true and false successor blocks | |||||
| 193 | 29 | 32 | my $true_block = _new_block(++$id); | |||
| 194 | 29 | 30 | my $false_block = _new_block(++$id); | |||
| 195 | 29 | 39 | _connect_blocks($current, $true_block); | |||
| 196 | 29 | 26 | _connect_blocks($current, $false_block); | |||
| 197 | ||||||
| 198 | 29 | 26 | push @blocks, $true_block, $false_block; | |||
| 199 | 29 | 26 | $current = $true_block; | |||
| 200 | } | |||||
| 201 | } | |||||
| 202 | ||||||
| 203 | # Append the final block after all statements are processed | |||||
| 204 | 71 | 55 | push @blocks, $current; | |||
| 205 | ||||||
| 206 | # Connect sequential fallthrough edges between adjacent blocks | |||||
| 207 | # that have no outgoing edges yet | |||||
| 208 | 71 | 78 | for(my $i = 0; $i < $#blocks; $i++) { | |||
| 209 | 87 87 | 54 84 | next if @{ $blocks[$i]{edges} }; | |||
| 210 | 57 | 44 | _connect_blocks($blocks[$i], $blocks[$i + 1]); | |||
| 211 | } | |||||
| 212 | ||||||
| 213 | 71 | 75 | return \@blocks; | |||
| 214 | } | |||||
| 215 | ||||||
| 216 | # -------------------------------------------------- | |||||
| 217 | # _new_block | |||||
| 218 | # | |||||
| 219 | # Purpose: Construct a new CFG block hashref with | |||||
| 220 | # an id, empty lines list, and empty | |||||
| 221 | # edges list. | |||||
| 222 | # | |||||
| 223 | # Entry: $id - integer block identifier. | |||||
| 224 | # Exit: Returns a hashref. | |||||
| 225 | # Side effects: None. | |||||
| 226 | # -------------------------------------------------- | |||||
| 227 | sub _new_block { | |||||
| 228 | 146 | 14664 | my $id = $_[0]; | |||
| 229 | ||||||
| 230 | 146 | 288 | return { id => $id, lines => [], edges => [] }; | |||
| 231 | } | |||||
| 232 | ||||||
| 233 | # -------------------------------------------------- | |||||
| 234 | # _connect_blocks | |||||
| 235 | # | |||||
| 236 | # Purpose: Add a directed edge from one CFG block | |||||
| 237 | # to another by recording the target | |||||
| 238 | # block's id in the source block's edges. | |||||
| 239 | # | |||||
| 240 | # Entry: $from - source block hashref. | |||||
| 241 | # $to - target block hashref. | |||||
| 242 | # Exit: Modifies $from->{edges} in place. | |||||
| 243 | # Side effects: Modifies $from. | |||||
| 244 | # -------------------------------------------------- | |||||
| 245 | sub _connect_blocks { | |||||
| 246 | 119 | 78 | my ($from, $to) = @_; | |||
| 247 | 119 119 | 68 128 | push @{ $from->{edges} }, $to->{id}; | |||
| 248 | } | |||||
| 249 | ||||||
| 250 | # -------------------------------------------------- | |||||
| 251 | # _is_branch | |||||
| 252 | # | |||||
| 253 | # Purpose: Return true if a PPI statement node | |||||
| 254 | # represents a branching control structure | |||||
| 255 | # that should split the current CFG block. | |||||
| 256 | # | |||||
| 257 | # Entry: $stmt - a PPI::Statement node. | |||||
| 258 | # Exit: Returns 1 if the statement is a branch, | |||||
| 259 | # 0 otherwise. | |||||
| 260 | # Side effects: None. | |||||
| 261 | # Notes: Only compound statement types are | |||||
| 262 | # considered â simple expressions are not. | |||||
| 263 | # -------------------------------------------------- | |||||
| 264 | sub _is_branch { | |||||
| 265 | 149 | 18953 | my $stmt = $_[0]; | |||
| 266 | ||||||
| 267 | # Only compound statements can be branch points | |||||
| 268 | 149 | 270 | return 0 unless $stmt->isa('PPI::Statement::Compound'); | |||
| 269 | ||||||
| 270 | 34 | 63 | my $type = $stmt->type // ''; | |||
| 271 | 34 | 624 | return $type =~ /^(?:if|unless|while|for|foreach)$/ ? 1 : 0; | |||
| 272 | } | |||||
| 273 | ||||||
| 274 | # -------------------------------------------------- | |||||
| 275 | # _cfg_to_lcsaj | |||||
| 276 | # | |||||
| 277 | # Purpose: Convert a CFG block list into a list of | |||||
| 278 | # LCSAJ path records. Each path represents | |||||
| 279 | # one linear sequence from a block's first | |||||
| 280 | # line to its last line, with a jump to | |||||
| 281 | # the first line of the target block. | |||||
| 282 | # | |||||
| 283 | # Entry: $blocks - arrayref of CFG block hashrefs | |||||
| 284 | # as produced by _build_cfg. | |||||
| 285 | # | |||||
| 286 | # Exit: Returns an arrayref of path hashrefs, | |||||
| 287 | # each with keys start, end, and target. | |||||
| 288 | # | |||||
| 289 | # Side effects: None. | |||||
| 290 | # | |||||
| 291 | # Notes: Blocks with no lines (empty blocks) are | |||||
| 292 | # excluded from the id-to-line mapping and | |||||
| 293 | # their target lines default to 0. | |||||
| 294 | # -------------------------------------------------- | |||||
| 295 | sub _cfg_to_lcsaj { | |||||
| 296 | 74 | 11323 | my $blocks = $_[0]; | |||
| 297 | ||||||
| 298 | # Build a lookup from block id to its first line number | |||||
| 299 | 117 | 208 | my %id2line = map { $_->{id} => $_->{lines}[0] } | |||
| 300 | 74 155 155 74 | 50 83 129 59 | grep { @{ $_->{lines} } } @{$blocks}; | |||
| 301 | ||||||
| 302 | 74 | 52 | my @paths; | |||
| 303 | ||||||
| 304 | 74 74 | 47 90 | for my $b (@{$blocks}) { | |||
| 305 | 155 155 | 87 134 | next unless @{ $b->{edges} }; # Skip blocks with no outgoing edges â they are leaf nodes | |||
| 306 | 103 103 | 52 81 | next unless @{ $b->{lines} }; # skip empty blocks â avoids null-bounds paths | |||
| 307 | ||||||
| 308 | 70 | 48 | my $start = $b->{lines}[0]; | |||
| 309 | 70 | 59 | my $end = $b->{lines}[-1]; | |||
| 310 | ||||||
| 311 | # Emit one path record per outgoing edge | |||||
| 312 | 70 70 | 38 64 | for my $target_id (@{ $b->{edges} }) { | |||
| 313 | push @paths, { | |||||
| 314 | start => $start, | |||||
| 315 | end => $end, | |||||
| 316 | 98 | 238 | target => $id2line{$target_id} // 0, | |||
| 317 | }; | |||||
| 318 | } | |||||
| 319 | } | |||||
| 320 | ||||||
| 321 | 74 | 85 | return \@paths; | |||
| 322 | } | |||||
| 323 | ||||||
| 324 | # -------------------------------------------------- | |||||
| 325 | # _save_lcsaj | |||||
| 326 | # | |||||
| 327 | # Purpose: Serialise LCSAJ path records to a JSON | |||||
| 328 | # file in the output directory. The | |||||
| 329 | # filename is derived from the source | |||||
| 330 | # file's basename. | |||||
| 331 | # | |||||
| 332 | # Entry: $file - path to the source .pm file. | |||||
| 333 | # $dir - output directory path. | |||||
| 334 | # $paths - arrayref of path hashrefs. | |||||
| 335 | # | |||||
| 336 | # Exit: Returns nothing. Writes a .lcsaj.json | |||||
| 337 | # file to $dir. | |||||
| 338 | # | |||||
| 339 | # Side effects: Creates $dir if it does not exist. | |||||
| 340 | # Writes a file to $dir. | |||||
| 341 | # | |||||
| 342 | # Notes: Uses File::Basename::basename for | |||||
| 343 | # portability across operating systems. | |||||
| 344 | # -------------------------------------------------- | |||||
| 345 | sub _save_lcsaj { | |||||
| 346 | 44 | 11893 | my ($file, $dir, $paths) = @_; | |||
| 347 | ||||||
| 348 | # Derive the module-relative path (strip leading .../lib/ prefix) | |||||
| 349 | 44 | 45 | my $rel = $file; | |||
| 350 | ||||||
| 351 | # Strip leading path up to and including 'lib/' â handles both | |||||
| 352 | # absolute paths (/home/runner/.../lib/App/...) and relative (lib/App/...) | |||||
| 353 | # Handle both Unix / and Windows \ separators | |||||
| 354 | 44 | 129 | $rel =~ s{^(?:.*[/\\])?lib[/\\]}{}; | |||
| 355 | ||||||
| 356 | 44 | 1056 | my $base = basename($rel); | |||
| 357 | ||||||
| 358 | # Mirror the directory structure expected by _lcsaj_coverage_for_file: | |||||
| 359 | # $dir / $rel.lcsaj / $base.lcsaj.json | |||||
| 360 | 44 | 237 | my $subdir = File::Spec->catfile($dir, "$rel.lcsaj"); | |||
| 361 | ||||||
| 362 | # Create the output directory if it does not exist | |||||
| 363 | 44 | 4670 | make_path($subdir) unless -d $subdir; | |||
| 364 | 43 | 224 | my $out = File::Spec->catfile($subdir, "$base.lcsaj.json"); | |||
| 365 | ||||||
| 366 | # Remove degenerate paths (null bounds) and exact duplicates | |||||
| 367 | # before serialising â guards against empty CFG blocks producing | |||||
| 368 | # null start/end values, and branch splits creating identical records | |||||
| 369 | 43 | 51 | my %seen; | |||
| 370 | my @clean = grep { | |||||
| 371 | defined $_->{start} && defined $_->{end} | |||||
| 372 | 93 | 263 | && !$seen{"$_->{start}:$_->{end}:$_->{target}"}++ | |||
| 373 | 43 43 | 32 40 | } @{$paths}; | |||
| 374 | ||||||
| 375 | 43 | 1250 | open my $fh, '>', $out or croak "Cannot write LCSAJ output to $out: $!"; | |||
| 376 | 43 | 280 | print $fh encode_json(\@clean); | |||
| 377 | 43 | 712 | close $fh; | |||
| 378 | } | |||||
| 379 | ||||||
| 380 | 1; | |||||