| File: | blib/lib/App/Test/Generator/LCSAJ.pm |
| Coverage: | 96.7% |
| line | stmt | bran | cond | sub | time | code |
|---|---|---|---|---|---|---|
| 1 | package App::Test::Generator::LCSAJ; | |||||
| 2 | ||||||
| 3 | 6 6 6 | 145201 3 73 | use strict; | |||
| 4 | 6 6 6 | 12 4 112 | use warnings; | |||
| 5 | 6 6 6 | 10 4 119 | use Carp qw(croak); | |||
| 6 | 6 6 6 | 9 4 104 | use File::Basename qw(basename); | |||
| 7 | 6 6 6 | 8 5 94 | use File::Path qw(make_path); | |||
| 8 | 6 6 6 | 7 5 35 | use File::Spec; | |||
| 9 | 6 6 6 | 6 5 115 | use JSON::MaybeXS; | |||
| 10 | 6 6 6 | 432 132214 59 | use PPI; | |||
| 11 | 6 6 6 | 425 3200 2881 | use Readonly; | |||
| 12 | ||||||
| 13 | # -------------------------------------------------- | |||||
| 14 | # Default output directory for LCSAJ JSON files | |||||
| 15 | # -------------------------------------------------- | |||||
| 16 | Readonly my $DEFAULT_OUT_DIR => 'lcsaj'; | |||||
| 17 | ||||||
| 18 | our $VERSION = '0.41'; | |||||
| 19 | ||||||
| 20 - 119 | =head1 NAME
App::Test::Generator::LCSAJ - Static LCSAJ extraction for Perl
=head1 SYNOPSIS
use App::Test::Generator::LCSAJ;
my $paths = App::Test::Generator::LCSAJ->generate(
'lib/MyModule.pm',
'cover_html/mutation_html/lib',
);
=head1 DESCRIPTION
Extracts Linear Code Sequence and Jump (LCSAJ) paths from Perl source
files using static analysis via L<PPI>. Each LCSAJ path describes a
linear sequence of statements followed by a jump to another sequence,
forming the basis for TER3 (third-level Test Effectiveness Ratio)
measurement.
The extracted paths are written as JSON to a C<.lcsaj.json> file under
the output directory, where they are consumed by
C<bin/test-generator-index> for dashboard display and TER3
calculation.
=head1 VERSION
Version 0.41
=head2 generate
Extract LCSAJ paths from all subroutines in a Perl source file and
write the results to a JSON file.
my $paths = App::Test::Generator::LCSAJ->generate(
'lib/MyModule.pm',
'cover_html/mutation_html/lib',
);
printf "Extracted %d LCSAJ paths\n", scalar @{$paths};
=head3 Arguments
=over 4
=item * C<$file>
Path to the Perl source file to analyse.
=item * C<$out_dir>
Directory under which the C<.lcsaj.json> file will be written.
Optional â defaults to C<lcsaj>.
=back
=head3 Returns
An arrayref of LCSAJ path hashrefs, each with keys C<start>, C<end>,
and C<target> representing the first line, last line, and jump target
line of the path respectively.
=head3 Side effects
Creates C<$out_dir> if it does not exist. Writes a C<.lcsaj.json>
file to C<$out_dir>.
=head3 Notes
Only named subroutines are analysed. Anonymous subs and file-level
code are not included. The control flow graph is built using a
simplified model that treats branching compound statements as
split points â complex nested structures may not be fully represented.
=head3 API specification
=head4 input
{
class => { type => SCALAR },
file => { type => SCALAR },
out_dir => { type => SCALAR, optional => 1 },
}
=head4 output
{
type => ARRAYREF,
elements => {
type => HASHREF,
keys => {
start => { type => SCALAR },
end => { type => SCALAR },
target => { type => SCALAR },
},
},
}
=cut | |||||
| 120 | ||||||
| 121 | sub generate { | |||||
| 122 | 42 | 346188 | my ($class, $file, $out_dir) = @_; | |||
| 123 | ||||||
| 124 | # Apply default output directory if not supplied | |||||
| 125 | 42 | 190 | $out_dir //= $DEFAULT_OUT_DIR; | |||
| 126 | ||||||
| 127 | # Parse the source file â croak early with a clear message | |||||
| 128 | 42 | 401 | my $doc = PPI::Document->new($file) or croak "Cannot parse $file"; | |||
| 129 | ||||||
| 130 | # Find all named subroutines in the document | |||||
| 131 | 39 | 137099 | my $subs = $doc->find('PPI::Statement::Sub') || []; | |||
| 132 | ||||||
| 133 | 39 | 29380 | my @all_paths; | |||
| 134 | ||||||
| 135 | 39 39 | 43 64 | for my $sub (@{$subs}) { | |||
| 136 | # Build a simplified control flow graph for this sub | |||||
| 137 | 63 | 105 | my $blocks = _build_cfg($sub); | |||
| 138 | ||||||
| 139 | # Convert the CFG to LCSAJ path records | |||||
| 140 | 63 | 81 | my $paths = _cfg_to_lcsaj($blocks); | |||
| 141 | 63 63 | 36 102 | push @all_paths, @{$paths}; | |||
| 142 | } | |||||
| 143 | ||||||
| 144 | # Write all extracted paths to a JSON file in the output directory | |||||
| 145 | 39 | 85 | _save_lcsaj($file, $out_dir, \@all_paths); | |||
| 146 | ||||||
| 147 | 38 | 110 | return \@all_paths; | |||
| 148 | } | |||||
| 149 | ||||||
| 150 | # -------------------------------------------------- | |||||
| 151 | # _build_cfg | |||||
| 152 | # | |||||
| 153 | # Purpose: Build a simplified control flow graph | |||||
| 154 | # (CFG) for a single subroutine. Each | |||||
| 155 | # block in the CFG represents a linear | |||||
| 156 | # sequence of statements terminated by a | |||||
| 157 | # branch or the end of the sub. | |||||
| 158 | # | |||||
| 159 | # Entry: $sub - a PPI::Statement::Sub node. | |||||
| 160 | # | |||||
| 161 | # Exit: Returns an arrayref of block hashrefs, | |||||
| 162 | # each with keys id, lines, and edges. | |||||
| 163 | # Returns an empty arrayref if the sub | |||||
| 164 | # has no body. | |||||
| 165 | # | |||||
| 166 | # Side effects: None. | |||||
| 167 | # | |||||
| 168 | # Notes: Only compound statements (if, unless, | |||||
| 169 | # while, for, foreach) are treated as | |||||
| 170 | # branch points. Complex nested structures | |||||
| 171 | # may not be fully represented. | |||||
| 172 | # -------------------------------------------------- | |||||
| 173 | sub _build_cfg { | |||||
| 174 | 74 | 39583 | my $sub = $_[0]; | |||
| 175 | ||||||
| 176 | # Return empty graph if the sub has no body block | |||||
| 177 | 74 | 170 | my $block = $sub->block() or return []; | |||
| 178 | ||||||
| 179 | 73 | 697 | my @statements = $block->schildren(); | |||
| 180 | 73 | 507 | my @blocks; | |||
| 181 | 73 | 64 | my $id = 1; | |||
| 182 | ||||||
| 183 | # The frontier holds every block currently accumulating statement | |||||
| 184 | # lines. A branch forks the whole frontier into a true and a false | |||||
| 185 | # successor â every frontier member gets an edge to both, so | |||||
| 186 | # subsequent statements (appended to the new frontier) are recorded | |||||
| 187 | # against both arms, not just the true one. This keeps the false | |||||
| 188 | # arm of every branch populated with real lines/edges instead of | |||||
| 189 | # being silently dropped as an empty leaf. | |||||
| 190 | 73 | 99 | my $first = _new_block($id); | |||
| 191 | 73 | 55 | push @blocks, $first; | |||
| 192 | 73 | 94 | my @frontier = ($first); | |||
| 193 | ||||||
| 194 | 73 | 81 | for my $stmt (@statements) { | |||
| 195 | 147 | 247 | my $line = $stmt->line_number; | |||
| 196 | 147 173 | 32601 165 | push @{ $_->{lines} }, $line for @frontier; | |||
| 197 | ||||||
| 198 | # Branch points fork the frontier into true/false successors | |||||
| 199 | 147 | 123 | if(_is_branch($stmt)) { | |||
| 200 | 30 | 47 | my $true_block = _new_block(++$id); | |||
| 201 | 30 | 30 | my $false_block = _new_block(++$id); | |||
| 202 | 30 | 41 | _connect_blocks($_, $true_block) for @frontier; | |||
| 203 | 30 | 44 | _connect_blocks($_, $false_block) for @frontier; | |||
| 204 | ||||||
| 205 | 30 | 28 | push @blocks, $true_block, $false_block; | |||
| 206 | 30 | 34 | @frontier = ($true_block, $false_block); | |||
| 207 | } | |||||
| 208 | } | |||||
| 209 | ||||||
| 210 | 73 | 75 | return \@blocks; | |||
| 211 | } | |||||
| 212 | ||||||
| 213 | # -------------------------------------------------- | |||||
| 214 | # _new_block | |||||
| 215 | # | |||||
| 216 | # Purpose: Construct a new CFG block hashref with | |||||
| 217 | # an id, empty lines list, and empty | |||||
| 218 | # edges list. | |||||
| 219 | # | |||||
| 220 | # Entry: $id - integer block identifier. | |||||
| 221 | # Exit: Returns a hashref. | |||||
| 222 | # Side effects: None. | |||||
| 223 | # -------------------------------------------------- | |||||
| 224 | sub _new_block { | |||||
| 225 | 150 | 14502 | my $id = $_[0]; | |||
| 226 | ||||||
| 227 | 150 | 288 | return { id => $id, lines => [], edges => [] }; | |||
| 228 | } | |||||
| 229 | ||||||
| 230 | # -------------------------------------------------- | |||||
| 231 | # _connect_blocks | |||||
| 232 | # | |||||
| 233 | # Purpose: Add a directed edge from one CFG block | |||||
| 234 | # to another by recording the target | |||||
| 235 | # block's id in the source block's edges. | |||||
| 236 | # | |||||
| 237 | # Entry: $from - source block hashref. | |||||
| 238 | # $to - target block hashref. | |||||
| 239 | # Exit: Modifies $from->{edges} in place. | |||||
| 240 | # Side effects: Modifies $from. | |||||
| 241 | # -------------------------------------------------- | |||||
| 242 | sub _connect_blocks { | |||||
| 243 | 66 | 52 | my ($from, $to) = @_; | |||
| 244 | 66 66 | 40 74 | push @{ $from->{edges} }, $to->{id}; | |||
| 245 | } | |||||
| 246 | ||||||
| 247 | # -------------------------------------------------- | |||||
| 248 | # _is_branch | |||||
| 249 | # | |||||
| 250 | # Purpose: Return true if a PPI statement node | |||||
| 251 | # represents a branching control structure | |||||
| 252 | # that should split the current CFG block. | |||||
| 253 | # | |||||
| 254 | # Entry: $stmt - a PPI::Statement node. | |||||
| 255 | # Exit: Returns 1 if the statement is a branch, | |||||
| 256 | # 0 otherwise. | |||||
| 257 | # Side effects: None. | |||||
| 258 | # Notes: Only compound statement types are | |||||
| 259 | # considered â simple expressions are not. | |||||
| 260 | # -------------------------------------------------- | |||||
| 261 | sub _is_branch { | |||||
| 262 | 154 | 18620 | my $stmt = $_[0]; | |||
| 263 | ||||||
| 264 | # Only compound statements can be branch points | |||||
| 265 | 154 | 301 | return 0 unless $stmt->isa('PPI::Statement::Compound'); | |||
| 266 | ||||||
| 267 | 35 | 49 | my $type = $stmt->type // ''; | |||
| 268 | 35 | 627 | return $type =~ /^(?:if|unless|while|for|foreach)$/ ? 1 : 0; | |||
| 269 | } | |||||
| 270 | ||||||
| 271 | # -------------------------------------------------- | |||||
| 272 | # _cfg_to_lcsaj | |||||
| 273 | # | |||||
| 274 | # Purpose: Convert a CFG block list into a list of | |||||
| 275 | # LCSAJ path records. Each path represents | |||||
| 276 | # one linear sequence from a block's first | |||||
| 277 | # line to its last line, with a jump to | |||||
| 278 | # the first line of the target block. | |||||
| 279 | # | |||||
| 280 | # Entry: $blocks - arrayref of CFG block hashrefs | |||||
| 281 | # as produced by _build_cfg. | |||||
| 282 | # | |||||
| 283 | # Exit: Returns an arrayref of path hashrefs, | |||||
| 284 | # each with keys start, end, and target. | |||||
| 285 | # | |||||
| 286 | # Side effects: None. | |||||
| 287 | # | |||||
| 288 | # Notes: Blocks with no lines (empty blocks) are | |||||
| 289 | # excluded from the id-to-line mapping and | |||||
| 290 | # their target lines default to 0. | |||||
| 291 | # -------------------------------------------------- | |||||
| 292 | sub _cfg_to_lcsaj { | |||||
| 293 | 75 | 10938 | my $blocks = $_[0]; | |||
| 294 | ||||||
| 295 | # Build a lookup from block id to its first line number | |||||
| 296 | 118 | 234 | my %id2line = map { $_->{id} => $_->{lines}[0] } | |||
| 297 | 75 132 132 75 | 43 72 111 65 | grep { @{ $_->{lines} } } @{$blocks}; | |||
| 298 | ||||||
| 299 | 75 | 60 | my @paths; | |||
| 300 | ||||||
| 301 | 75 75 | 44 108 | for my $b (@{$blocks}) { | |||
| 302 | 132 132 | 91 129 | next unless @{ $b->{edges} }; # Skip blocks with no outgoing edges â they are leaf nodes | |||
| 303 | 33 33 | 21 36 | next unless @{ $b->{lines} }; # skip empty blocks â avoids null-bounds paths | |||
| 304 | ||||||
| 305 | 32 | 22 | my $start = $b->{lines}[0]; | |||
| 306 | 32 | 19 | my $end = $b->{lines}[-1]; | |||
| 307 | ||||||
| 308 | # Emit one path record per outgoing edge | |||||
| 309 | 32 32 | 17 21 | for my $target_id (@{ $b->{edges} }) { | |||
| 310 | push @paths, { | |||||
| 311 | start => $start, | |||||
| 312 | end => $end, | |||||
| 313 | 60 | 146 | target => $id2line{$target_id} // 0, | |||
| 314 | }; | |||||
| 315 | } | |||||
| 316 | } | |||||
| 317 | ||||||
| 318 | 75 | 99 | return \@paths; | |||
| 319 | } | |||||
| 320 | ||||||
| 321 | # -------------------------------------------------- | |||||
| 322 | # _save_lcsaj | |||||
| 323 | # | |||||
| 324 | # Purpose: Serialise LCSAJ path records to a JSON | |||||
| 325 | # file in the output directory. The | |||||
| 326 | # filename is derived from the source | |||||
| 327 | # file's basename. | |||||
| 328 | # | |||||
| 329 | # Entry: $file - path to the source .pm file. | |||||
| 330 | # $dir - output directory path. | |||||
| 331 | # $paths - arrayref of path hashrefs. | |||||
| 332 | # | |||||
| 333 | # Exit: Returns nothing. Writes a .lcsaj.json | |||||
| 334 | # file to $dir. | |||||
| 335 | # | |||||
| 336 | # Side effects: Creates $dir if it does not exist. | |||||
| 337 | # Writes a file to $dir. | |||||
| 338 | # | |||||
| 339 | # Notes: Uses File::Basename::basename for | |||||
| 340 | # portability across operating systems. | |||||
| 341 | # -------------------------------------------------- | |||||
| 342 | sub _save_lcsaj { | |||||
| 343 | 45 | 11610 | my ($file, $dir, $paths) = @_; | |||
| 344 | ||||||
| 345 | # Derive the module-relative path (strip leading .../lib/ prefix) | |||||
| 346 | 45 | 34 | my $rel = $file; | |||
| 347 | ||||||
| 348 | # Strip leading path up to and including 'lib/' â handles both | |||||
| 349 | # absolute paths (/home/runner/.../lib/App/...) and relative (lib/App/...) | |||||
| 350 | # Handle both Unix / and Windows \ separators | |||||
| 351 | 45 | 151 | $rel =~ s{^(?:.*[/\\])?lib[/\\]}{}; | |||
| 352 | ||||||
| 353 | # If $file had no 'lib/' segment (e.g. a File::Temp tempfile), the | |||||
| 354 | # substitution above is a no-op and $rel is still $file's full | |||||
| 355 | # absolute path. Joining that onto $dir below would embed a second | |||||
| 356 | # absolute path â and, on Windows, a second drive letter â inside | |||||
| 357 | # the constructed directory name, which File::Path::make_path | |||||
| 358 | # rejects outright ("Invalid argument"). Fall back to the basename. | |||||
| 359 | 45 | 415 | $rel = basename($rel) if File::Spec->file_name_is_absolute($rel); | |||
| 360 | ||||||
| 361 | 45 | 945 | my $base = basename($rel); | |||
| 362 | ||||||
| 363 | # Mirror the directory structure expected by _lcsaj_coverage_for_file: | |||||
| 364 | # $dir / $rel.lcsaj / $base.lcsaj.json | |||||
| 365 | 45 | 186 | my $subdir = File::Spec->catfile($dir, "$rel.lcsaj"); | |||
| 366 | ||||||
| 367 | # Create the output directory if it does not exist | |||||
| 368 | 45 | 4594 | make_path($subdir) unless -d $subdir; | |||
| 369 | 44 | 214 | my $out = File::Spec->catfile($subdir, "$base.lcsaj.json"); | |||
| 370 | ||||||
| 371 | # Remove degenerate paths (null bounds) and exact duplicates | |||||
| 372 | # before serialising â guards against empty CFG blocks producing | |||||
| 373 | # null start/end values, and branch splits creating identical records | |||||
| 374 | 44 | 44 | my %seen; | |||
| 375 | my @clean = grep { | |||||
| 376 | defined $_->{start} && defined $_->{end} | |||||
| 377 | 57 | 240 | && !$seen{"$_->{start}:$_->{end}:$_->{target}"}++ | |||
| 378 | 44 44 | 30 41 | } @{$paths}; | |||
| 379 | ||||||
| 380 | 44 | 1264 | open my $fh, '>', $out or croak "Cannot write LCSAJ output to $out: $!"; | |||
| 381 | 44 | 237 | print $fh encode_json(\@clean); | |||
| 382 | 44 | 713 | close $fh; | |||
| 383 | } | |||||
| 384 | ||||||
| 385 | 1; | |||||