lib/App/Test/Generator/LCSAJ.pm

Structural Coverage (Approximate)

TER1 (Statement): 100.00%
TER2 (Branch): 86.36%
TER3 (LCSAJ): 100.0% (3/3)
Approximate LCSAJ segments: 23

LCSAJ Legend

Covered — this LCSAJ path was executed during testing.

Not covered — this LCSAJ path was never executed. These are the paths to focus on.

Multiple dots on a line indicate that multiple control-flow paths begin at that line. Hovering over any dot shows:

        start → end → jump
        

Uncovered paths show [NOT COVERED] in the tooltip.

Mutant Testing Legend

Survived (tests missed this) Killed (tests detected this) No mutation
    1: package App::Test::Generator::LCSAJ;
    2: 
    3: use strict;
    4: use warnings;
    5: use Carp qw(croak);
    6: use File::Basename  qw(basename);
    7: use File::Path qw(make_path);
    8: use File::Spec;
    9: use JSON::MaybeXS;
   10: use PPI;
   11: use Readonly;
   12: 
   13: # --------------------------------------------------
   14: # Default output directory for LCSAJ JSON files
   15: # --------------------------------------------------
   16: Readonly my $DEFAULT_OUT_DIR => 'lcsaj';
   17: 
   18: our $VERSION = '0.41';
   19: 
   20: =head1 NAME
   21: 
   22: App::Test::Generator::LCSAJ - Static LCSAJ extraction for Perl
   23: 
   24: =head1 SYNOPSIS
   25: 
   26:     use App::Test::Generator::LCSAJ;
   27: 
   28:     my $paths = App::Test::Generator::LCSAJ->generate(
   29:         'lib/MyModule.pm',
   30:         'cover_html/mutation_html/lib',
   31:     );
   32: 
   33: =head1 DESCRIPTION
   34: 
   35: Extracts Linear Code Sequence and Jump (LCSAJ) paths from Perl source
   36: files using static analysis via L<PPI>. Each LCSAJ path describes a
   37: linear sequence of statements followed by a jump to another sequence,
   38: forming the basis for TER3 (third-level Test Effectiveness Ratio)
   39: measurement.
   40: 
   41: The extracted paths are written as JSON to a C<.lcsaj.json> file under
   42: the output directory, where they are consumed by
   43: C<bin/test-generator-index> for dashboard display and TER3
   44: calculation.
   45: 
   46: =head1 VERSION
   47: 
   48: Version 0.41
   49: 
   50: =head2 generate
   51: 
   52: Extract LCSAJ paths from all subroutines in a Perl source file and
   53: write the results to a JSON file.
   54: 
   55:     my $paths = App::Test::Generator::LCSAJ->generate(
   56:         'lib/MyModule.pm',
   57:         'cover_html/mutation_html/lib',
   58:     );
   59: 
   60:     printf "Extracted %d LCSAJ paths\n", scalar @{$paths};
   61: 
   62: =head3 Arguments
   63: 
   64: =over 4
   65: 
   66: =item * C<$file>
   67: 
   68: Path to the Perl source file to analyse.
   69: 
   70: =item * C<$out_dir>
   71: 
   72: Directory under which the C<.lcsaj.json> file will be written.
   73: Optional — defaults to C<lcsaj>.
   74: 
   75: =back
   76: 
   77: =head3 Returns
   78: 
   79: An arrayref of LCSAJ path hashrefs, each with keys C<start>, C<end>,
   80: and C<target> representing the first line, last line, and jump target
   81: line of the path respectively.
   82: 
   83: =head3 Side effects
   84: 
   85: Creates C<$out_dir> if it does not exist. Writes a C<.lcsaj.json>
   86: file to C<$out_dir>.
   87: 
   88: =head3 Notes
   89: 
   90: Only named subroutines are analysed. Anonymous subs and file-level
   91: code are not included. The control flow graph is built using a
   92: simplified model that treats branching compound statements as
   93: split points — complex nested structures may not be fully represented.
   94: 
   95: =head3 API specification
   96: 
   97: =head4 input
   98: 
   99:     {
  100:         class   => { type => SCALAR },
  101:         file    => { type => SCALAR },
  102:         out_dir => { type => SCALAR, optional => 1 },
  103:     }
  104: 
  105: =head4 output
  106: 
  107:     {
  108:         type     => ARRAYREF,
  109:         elements => {
  110:             type => HASHREF,
  111:             keys => {
  112:                 start  => { type => SCALAR },
  113:                 end    => { type => SCALAR },
  114:                 target => { type => SCALAR },
  115:             },
  116:         },
  117:     }
  118: 
  119: =cut
  120: 
  121: sub generate {
122 → 135 → 145  122: 	my ($class, $file, $out_dir) = @_;
  123: 
  124: 	# Apply default output directory if not supplied
  125: 	$out_dir //= $DEFAULT_OUT_DIR;
  126: 
  127: 	# Parse the source file — croak early with a clear message
  128: 	my $doc = PPI::Document->new($file) or croak "Cannot parse $file";
  129: 
  130: 	# Find all named subroutines in the document
  131: 	my $subs = $doc->find('PPI::Statement::Sub') || [];
  132: 
  133: 	my @all_paths;
  134: 
  135: 	for my $sub (@{$subs}) {
  136: 		# Build a simplified control flow graph for this sub
  137: 		my $blocks = _build_cfg($sub);
  138: 
  139: 		# Convert the CFG to LCSAJ path records
  140: 		my $paths  = _cfg_to_lcsaj($blocks);
  141: 		push @all_paths, @{$paths};
  142: 	}
  143: 
  144: 	# Write all extracted paths to a JSON file in the output directory
  145: 	_save_lcsaj($file, $out_dir, \@all_paths);
  146: 
  147: 	return \@all_paths;
  148: }
  149: 
  150: # --------------------------------------------------
  151: # _build_cfg
  152: #
  153: # Purpose:    Build a simplified control flow graph
  154: #             (CFG) for a single subroutine. Each
  155: #             block in the CFG represents a linear
  156: #             sequence of statements terminated by a
  157: #             branch or the end of the sub.
  158: #
  159: # Entry:      $sub - a PPI::Statement::Sub node.
  160: #
  161: # Exit:       Returns an arrayref of block hashrefs,
  162: #             each with keys id, lines, and edges.
  163: #             Returns an empty arrayref if the sub
  164: #             has no body.
  165: #
  166: # Side effects: None.
  167: #
  168: # Notes:      Only compound statements (if, unless,
  169: #             while, for, foreach) are treated as
  170: #             branch points. Complex nested structures
  171: #             may not be fully represented.
  172: # --------------------------------------------------
  173: sub _build_cfg {
174 → 194 → 210  174: 	my $sub = $_[0];
  175: 
  176: 	# Return empty graph if the sub has no body block
  177: 	my $block = $sub->block() or return [];
  178: 
  179: 	my @statements = $block->schildren();
  180: 	my @blocks;
  181: 	my $id = 1;
  182: 
  183: 	# The frontier holds every block currently accumulating statement
  184: 	# lines. A branch forks the whole frontier into a true and a false
  185: 	# successor — every frontier member gets an edge to both, so
  186: 	# subsequent statements (appended to the new frontier) are recorded
  187: 	# against both arms, not just the true one. This keeps the false
  188: 	# arm of every branch populated with real lines/edges instead of
  189: 	# being silently dropped as an empty leaf.
  190: 	my $first = _new_block($id);
  191: 	push @blocks, $first;
  192: 	my @frontier = ($first);
  193: 
  194: 	for my $stmt (@statements) {
  195: 		my $line = $stmt->line_number;
  196: 		push @{ $_->{lines} }, $line for @frontier;
  197: 
  198: 		# Branch points fork the frontier into true/false successors
  199: 		if(_is_branch($stmt)) {

Mutants (Total: 1, Killed: 1, Survived: 0)

200: my $true_block = _new_block(++$id); 201: my $false_block = _new_block(++$id); 202: _connect_blocks($_, $true_block) for @frontier; 203: _connect_blocks($_, $false_block) for @frontier; 204: 205: push @blocks, $true_block, $false_block; 206: @frontier = ($true_block, $false_block); 207: } 208: } 209: 210: return \@blocks; 211: } 212: 213: # -------------------------------------------------- 214: # _new_block 215: # 216: # Purpose: Construct a new CFG block hashref with 217: # an id, empty lines list, and empty 218: # edges list. 219: # 220: # Entry: $id - integer block identifier. 221: # Exit: Returns a hashref. 222: # Side effects: None. 223: # -------------------------------------------------- 224: sub _new_block { 225: my $id = $_[0]; 226: 227: return { id => $id, lines => [], edges => [] }; 228: } 229: 230: # -------------------------------------------------- 231: # _connect_blocks 232: # 233: # Purpose: Add a directed edge from one CFG block 234: # to another by recording the target 235: # block's id in the source block's edges. 236: # 237: # Entry: $from - source block hashref. 238: # $to - target block hashref. 239: # Exit: Modifies $from->{edges} in place. 240: # Side effects: Modifies $from. 241: # -------------------------------------------------- 242: sub _connect_blocks { 243: my ($from, $to) = @_; 244: push @{ $from->{edges} }, $to->{id}; 245: } 246: 247: # -------------------------------------------------- 248: # _is_branch 249: # 250: # Purpose: Return true if a PPI statement node 251: # represents a branching control structure 252: # that should split the current CFG block. 253: # 254: # Entry: $stmt - a PPI::Statement node. 255: # Exit: Returns 1 if the statement is a branch, 256: # 0 otherwise. 257: # Side effects: None. 258: # Notes: Only compound statement types are 259: # considered — simple expressions are not. 260: # -------------------------------------------------- 261: sub _is_branch { 262: my $stmt = $_[0]; 263: 264: # Only compound statements can be branch points 265: return 0 unless $stmt->isa('PPI::Statement::Compound');

Mutants (Total: 2, Killed: 2, Survived: 0)

266: 267: my $type = $stmt->type // ''; 268: return $type =~ /^(?:if|unless|while|for|foreach)$/ ? 1 : 0;

Mutants (Total: 2, Killed: 2, Survived: 0)

269: } 270: 271: # -------------------------------------------------- 272: # _cfg_to_lcsaj 273: # 274: # Purpose: Convert a CFG block list into a list of 275: # LCSAJ path records. Each path represents 276: # one linear sequence from a block's first 277: # line to its last line, with a jump to 278: # the first line of the target block. 279: # 280: # Entry: $blocks - arrayref of CFG block hashrefs 281: # as produced by _build_cfg. 282: # 283: # Exit: Returns an arrayref of path hashrefs, 284: # each with keys start, end, and target. 285: # 286: # Side effects: None. 287: # 288: # Notes: Blocks with no lines (empty blocks) are 289: # excluded from the id-to-line mapping and 290: # their target lines default to 0. 291: # -------------------------------------------------- 292: sub _cfg_to_lcsaj { 293 → 301 → 318 293: my $blocks = $_[0]; 294: 295: # Build a lookup from block id to its first line number 296: my %id2line = map { $_->{id} => $_->{lines}[0] } 297: grep { @{ $_->{lines} } } @{$blocks}; 298: 299: my @paths; 300: 301: for my $b (@{$blocks}) { 302: next unless @{ $b->{edges} }; # Skip blocks with no outgoing edges — they are leaf nodes 303: next unless @{ $b->{lines} }; # skip empty blocks — avoids null-bounds paths 304: 305: my $start = $b->{lines}[0]; 306: my $end = $b->{lines}[-1]; 307: 308: # Emit one path record per outgoing edge 309: for my $target_id (@{ $b->{edges} }) { 310: push @paths, { 311: start => $start, 312: end => $end, 313: target => $id2line{$target_id} // 0, 314: }; 315: } 316: } 317: 318: return \@paths; 319: } 320: 321: # -------------------------------------------------- 322: # _save_lcsaj 323: # 324: # Purpose: Serialise LCSAJ path records to a JSON 325: # file in the output directory. The 326: # filename is derived from the source 327: # file's basename. 328: # 329: # Entry: $file - path to the source .pm file. 330: # $dir - output directory path. 331: # $paths - arrayref of path hashrefs. 332: # 333: # Exit: Returns nothing. Writes a .lcsaj.json 334: # file to $dir. 335: # 336: # Side effects: Creates $dir if it does not exist. 337: # Writes a file to $dir. 338: # 339: # Notes: Uses File::Basename::basename for 340: # portability across operating systems. 341: # -------------------------------------------------- 342: sub _save_lcsaj { 343: my ($file, $dir, $paths) = @_; 344: 345: # Derive the module-relative path (strip leading .../lib/ prefix) 346: my $rel = $file; 347: 348: # Strip leading path up to and including 'lib/' — handles both 349: # absolute paths (/home/runner/.../lib/App/...) and relative (lib/App/...) 350: # Handle both Unix / and Windows \ separators 351: $rel =~ s{^(?:.*[/\\])?lib[/\\]}{}; 352: 353: # If $file had no 'lib/' segment (e.g. a File::Temp tempfile), the 354: # substitution above is a no-op and $rel is still $file's full 355: # absolute path. Joining that onto $dir below would embed a second 356: # absolute path — and, on Windows, a second drive letter — inside 357: # the constructed directory name, which File::Path::make_path 358: # rejects outright ("Invalid argument"). Fall back to the basename. 359: $rel = basename($rel) if File::Spec->file_name_is_absolute($rel); 360: 361: my $base = basename($rel); 362: 363: # Mirror the directory structure expected by _lcsaj_coverage_for_file: 364: # $dir / $rel.lcsaj / $base.lcsaj.json 365: my $subdir = File::Spec->catfile($dir, "$rel.lcsaj"); 366: 367: # Create the output directory if it does not exist 368: make_path($subdir) unless -d $subdir; 369: my $out = File::Spec->catfile($subdir, "$base.lcsaj.json"); 370: 371: # Remove degenerate paths (null bounds) and exact duplicates 372: # before serialising — guards against empty CFG blocks producing 373: # null start/end values, and branch splits creating identical records 374: my %seen; 375: my @clean = grep { 376: defined $_->{start} && defined $_->{end} 377: && !$seen{"$_->{start}:$_->{end}:$_->{target}"}++ 378: } @{$paths}; 379: 380: open my $fh, '>', $out or croak "Cannot write LCSAJ output to $out: $!"; 381: print $fh encode_json(\@clean); 382: close $fh; 383: } 384: 385: 1;