| File: | blib/lib/App/Test/Generator/SchemaExtractor.pm |
| Coverage: | 77.0% |
| line | stmt | bran | cond | sub | time | code |
|---|---|---|---|---|---|---|
| 1 | package App::Test::Generator::SchemaExtractor; | |||||
| 2 | ||||||
| 3 | 23 23 23 | 1263909 21 322 | use strict; | |||
| 4 | 23 23 23 | 38 19 425 | use warnings; | |||
| 5 | 23 23 23 | 3658 129853 41 | use autodie qw(:all); | |||
| 6 | ||||||
| 7 | 23 23 23 | 170688 24 431 | use App::Test::Generator::Model::Method; | |||
| 8 | 23 23 23 | 3394 35 380 | use App::Test::Generator::Analyzer::Complexity; | |||
| 9 | 23 23 23 | 3258 27 375 | use App::Test::Generator::Analyzer::Return; | |||
| 10 | 23 23 23 | 2997 25 347 | use App::Test::Generator::Analyzer::ReturnMeta; | |||
| 11 | 23 23 23 | 3240 27 360 | use App::Test::Generator::Analyzer::SideEffect; | |||
| 12 | ||||||
| 13 | 23 23 23 | 47 19 465 | use Carp qw(carp croak); | |||
| 14 | 23 23 23 | 4229 1473280 378 | use PPI; | |||
| 15 | 23 23 23 | 3759 307593 363 | use Pod::Simple::Text; | |||
| 16 | 23 23 23 | 60 17 788 | use File::Basename; | |||
| 17 | 23 23 23 | 42 23 413 | use File::Path qw(make_path); | |||
| 18 | 23 23 23 | 3474 71482 407 | use Params::Get; | |||
| 19 | 23 23 23 | 4786 94107 366 | use Safe; | |||
| 20 | 23 23 23 | 93 19 433 | use Scalar::Util qw(looks_like_number); | |||
| 21 | 23 23 23 | 2840 22677 571 | use YAML::XS; | |||
| 22 | 23 23 23 | 4243 26884 539 | use IPC::Open3; | |||
| 23 | 23 23 23 | 55 16 537 | use JSON::MaybeXS qw(encode_json decode_json); | |||
| 24 | 23 23 23 | 45 19 342 | use Readonly; | |||
| 25 | 23 23 23 | 36 14 497932 | use Symbol qw(gensym); | |||
| 26 | ||||||
| 27 | # -------------------------------------------------- | |||||
| 28 | # Confidence score thresholds for input and output analysis | |||||
| 29 | # -------------------------------------------------- | |||||
| 30 | Readonly my $CONFIDENCE_HIGH_THRESHOLD => 60; | |||||
| 31 | Readonly my $CONFIDENCE_MEDIUM_THRESHOLD => 35; | |||||
| 32 | Readonly my $CONFIDENCE_LOW_THRESHOLD => 15; | |||||
| 33 | ||||||
| 34 | # -------------------------------------------------- | |||||
| 35 | # Confidence level label strings | |||||
| 36 | # -------------------------------------------------- | |||||
| 37 | Readonly my $LEVEL_HIGH => 'high'; | |||||
| 38 | Readonly my $LEVEL_MEDIUM => 'medium'; | |||||
| 39 | Readonly my $LEVEL_LOW => 'low'; | |||||
| 40 | Readonly my $LEVEL_VERY_LOW => 'very_low'; | |||||
| 41 | Readonly my $LEVEL_NONE => 'none'; | |||||
| 42 | ||||||
| 43 | # -------------------------------------------------- | |||||
| 44 | # Analysis limits | |||||
| 45 | # -------------------------------------------------- | |||||
| 46 | Readonly my $DEFAULT_MAX_PARAMETERS => 20; | |||||
| 47 | Readonly my $DEFAULT_CONFIDENCE_THRESH => 0.5; | |||||
| 48 | Readonly my $POD_WALK_LIMIT => 200; | |||||
| 49 | Readonly my $SIGNATURE_TIMEOUT_SECS => 3; | |||||
| 50 | Readonly my $MEMORY_LIMIT_BYTES => 50_000_000; | |||||
| 51 | ||||||
| 52 | # -------------------------------------------------- | |||||
| 53 | # Numeric boundary values for test hint generation | |||||
| 54 | # -------------------------------------------------- | |||||
| 55 | Readonly my $INT32_MAX => 2_147_483_647; | |||||
| 56 | ||||||
| 57 | # -------------------------------------------------- | |||||
| 58 | # Boolean return score thresholds | |||||
| 59 | # -------------------------------------------------- | |||||
| 60 | Readonly my $BOOLEAN_SCORE_THRESHOLD => 30; | |||||
| 61 | ||||||
| 62 - 70 | =head1 NAME App::Test::Generator::SchemaExtractor - Extract test schemas from Perl modules =head1 VERSION Version 0.36 =cut | |||||
| 71 | ||||||
| 72 | our $VERSION = '0.36'; | |||||
| 73 | ||||||
| 74 - 1272 | =head1 SYNOPSIS
use App::Test::Generator::SchemaExtractor;
my $extractor = App::Test::Generator::SchemaExtractor->new(
input_file => 'lib/MyModule.pm',
output_dir => 'schemas/',
verbose => 1,
);
my $schemas = $extractor->extract_all();
=head1 DESCRIPTION
App::Test::Generator::SchemaExtractor analyzes Perl modules and generates
structured YAML schema files suitable for automated test generation by L<App::Test::Generator>.
This module employs
static analysis techniques to infer parameter types, constraints, and
method behaviors directly from your source code.
=head2 Analysis Methods
The extractor combines multiple analysis approaches for a comprehensive schema generation:
=over 4
=item * B<POD Documentation Analysis>
Parses embedded documentation to extract:
- Parameter names, types, and descriptions from =head2 sections
- Method signatures with positional parameters
- Return value specifications from "Returns:" sections
- Constraints (ranges, patterns, required/optional status)
- Semantic type detection (email, URL, filename)
=item * B<Code Pattern Detection>
Analyzes source code using PPI to identify:
- Method signatures and parameter extraction patterns
- Type validation (ref(), isa(), blessed())
- Constraint patterns (length checks, numeric comparisons, regex matches)
- Return statement analysis and value type inference
- Object instantiation requirements and accessor methods
=item * B<Signature Analysis>
Examines method declarations for:
- Parameter names and positional information
- Instance vs. class method detection
- Method modifiers (Moose-style before/after/around)
- Various parameter declaration styles (shift, @_ assignment)
=item * B<Heuristic Inference>
Applies Perl-specific domain knowledge:
- Boolean return detection from method names (is_*, has_*, can_*)
- Common Perl idioms and coding patterns
- Context awareness (scalar vs list, wantarray usage)
- Object-oriented patterns (constructors, accessors, chaining)
=back
=head2 Generated Schema Structure
The extracted schemas follow this YAML structure:
function: method_name
module: Package::Name
input:
param1:
type: string
min: 3
max: 50
optional: 0
position: 0
param2:
type: integer
min: 0
max: 100
optional: 1
position: 1
output:
type: boolean
value: 1
new: Package::Name # if object instantiation required
config:
test_empty: 1
test_nuls: 0
test_undef: 0
test_non_ascii: 0
=head2 Advanced Detection Capabilities
=over 4
=item * B<Accessor Method Detection>
Automatically identifies getter, setter, and combined accessor methods
by analyzing common patterns like C<return $self-E<gt>{property}> and
C<$self-E<gt>{property} = $value>.
=item * B<Boolean Return Inference>
Detects boolean-returning methods through multiple signals:
- Method name patterns (is_*, has_*, can_*)
- Return patterns (consistent 1/0 returns)
- POD descriptions ("returns true on success")
- Ternary operators with boolean results
=item * B<Context Awareness>
Identifies methods that use C<wantarray> and can return different
values in scalar vs list context.
=item * B<Object Lifecycle Management>
Detects instance methods requiring object instantiation and
automatically adds the C<new> field to schemas.
=item * B<Enhanced Object Detection>
The extractor includes sophisticated object detection capabilities that go beyond simple instance method identification:
=over 4
=item * B<Factory Method Recognition>
Automatically identifies methods that create and return object instances, such as methods named C<create_*>, C<make_*>, C<build_*>, or C<get_*>. Factory methods are correctly classified as class methods that don't require pre-existing objects for testing.
=item * B<Singleton Pattern Detection>
Recognizes singleton patterns through multiple signals: method names like C<instance> or C<get_instance>, static variables holding instance references, lazy initialization patterns (C<$instance ||= new()>), and consistent return of the same instance variable.
=item * B<Constructor Parameter Analysis>
Examines C<new> methods to determine required and optional parameters, validation requirements, and default values. This enables test generators to provide appropriate constructor arguments when object instantiation is needed.
=item * B<Inheritance Relationship Handling>
Detects parent classes through C<use parent>, C<use base>, and C<@ISA> declarations. Identifies when methods use C<SUPER::> calls and determines whether the current class or a parent class constructor should be used for object instantiation.
=item * B<External Object Dependency Detection>
Identifies when methods create or depend on objects from other classes, enabling proper test setup with mock objects or real dependencies.
=back
These enhancements ensure that generated test schemas accurately reflect the object-oriented structure of the code, leading to more meaningful and effective test generation.
=back
=head2 Confidence Scoring
Each generated schema includes detailed confidence assessments:
=over 4
=item * B<High Confidence>
Multiple independent analysis sources converge on consistent,
well-constrained parameters with explicit validation logic and
comprehensive documentation.
=item * B<Medium Confidence>
Reasonable evidence from code patterns or partial documentation,
but may lack comprehensive constraints or have some ambiguities.
=item * B<Low Confidence>
Minimal evidence - primarily based on naming conventions,
default assumptions, or single-source analysis.
=item * B<Very Low Confidence>
Barely any detectable signals - schema should be thoroughly
reviewed before use in test generation.
=back
=head2 Use Cases
=over 4
=item * B<Automated Test Generation>
Generate comprehensive test suites with L<App::Test::Generator> using
extracted schemas as input. The schemas provide the necessary structure
for generating both positive and negative test cases.
=item * B<API Documentation Generation>
Supplement existing documentation with automatically inferred interface
specifications, parameter requirements, and return types.
=item * B<Code Quality Assessment>
Identify methods with poor documentation, inconsistent parameter handling,
or unclear interfaces that may benefit from refactoring.
=item * B<Refactoring Assistance>
Detect method dependencies, object instantiation requirements, and
parameter usage patterns to inform refactoring decisions.
=item * B<Legacy Code Analysis>
Quickly understand the interface contracts of legacy Perl codebases
without extensive manual code reading.
=back
=head2 Integration with Testing Ecosystem
The generated schemas are specifically designed to work with the
L<App::Test::Generator> ecosystem:
# Extract schemas from your module
my $extractor = App::Test::Generator::SchemaExtractor->new(...);
my $schemas = $extractor->extract_all();
# Use with test generator (typically as separate steps)
# fuzz-harness-generator -r schemas/method_name.yml
=head2 Limitations and Considerations
=over 4
=item * B<Dynamic Code Patterns>
Highly dynamic code (string evals, AUTOLOAD, symbolic references)
may not be fully detected by static analysis.
=item * B<Complex Validation Logic>
Sophisticated validation involving multiple parameters or external
dependencies may require manual schema refinement.
=item * B<Confidence Heuristics>
Confidence scores are based on heuristics and should be reviewed
by developers familiar with the codebase.
=item * B<Perl Idiom Recognition>
Some Perl-specific idioms may require custom pattern recognition
beyond the built-in detectors.
=item * B<Documentation Dependency>
Analysis quality improves significantly with comprehensive POD
documentation following consistent patterns.
=back
=head2 Best Practices for Optimal Results
=over 4
=item * B<Comprehensive POD Documentation>
Write detailed POD with explicit parameter documentation using
consistent patterns like C<$param - type (constraints), description>.
=item * B<Consistent Coding Patterns>
Use consistent parameter validation patterns and method signatures
throughout your codebase.
=item * B<Schema Review Process>
Review and refine automatically generated schemas, particularly
those with low confidence scores.
=item * B<Descriptive Naming>
Use descriptive method and parameter names that clearly indicate
purpose and expected types.
=item * B<Progressive Enhancement>
Start with automatically generated schemas and progressively
refine them based on test results and code understanding.
=back
The module is particularly valuable for large codebases where manual schema
creation would be prohibitively time-consuming, and for maintaining test
coverage as code evolves through continuous integration pipelines.
=head2 Advanced Type Detection
The schema extractor includes enhanced type detection capabilities that identify specialized Perl types beyond basic strings and integers.
L<DateTime> and L<Time::Piece> objects are detected through isa() checks and method call patterns, while date strings (ISO 8601, YYYY-MM-DD) and UNIX timestamps are recognized through regex validation and numeric range checks.
File handles and file paths are identified via I/O operations and file test operators, coderefs are detected through ref() checks and invocation patterns, and enum-like parameters are extracted from validation code including regex patterns (C</^(a|b|c)$/>), hash lookups, grep statements, and if/elsif chains.
These detected types are preserved in the generated YAML schemas with appropriate semantic annotations, enabling test generators to create more accurate and meaningful test cases.
=head3 Example Advanced Type Schema
For a method like:
sub process_event {
my ($self, $timestamp, $status, $callback) = @_;
croak unless $timestamp > 1000000000;
croak unless $status =~ /^(active|pending|complete)$/;
croak unless ref($callback) eq 'CODE';
$callback->($timestamp, $status);
}
The extractor generates:
---
function: process_event
module: MyModule
input:
timestamp:
type: integer
# min: 0
# max: 2147483647
position: 0
_note: Unix timestamp
semantic: unix_timestamp
status:
type: string
enum:
- active
- pending
- complete
position: 1
_note: 'Must be one of: active, pending, complete'
callback:
type: coderef
position: 2
_note: 'CODE reference - provide sub { } in tests'
=head1 RELATIONSHIP DETECTION
The schema extractor detects relationships and dependencies between parameters,
enabling more sophisticated validation and test generation.
=head2 Relationship Types
=over 4
=item * B<mutually_exclusive>
Parameters that cannot be used together.
die if $file && $content; # Can't specify both
Generated schema:
relationships:
- type: mutually_exclusive
params: [file, content]
description: Cannot specify both file and content
=item * B<required_group>
At least one parameter from the group must be specified (OR logic).
die unless $id || $name; # Must provide one
Generated schema:
relationships:
- type: required_group
params: [id, name]
logic: or
description: Must specify either id or name
=item * B<conditional_requirement>
If one parameter is specified, another becomes required (IF-THEN logic).
die if $async && !$callback; # async requires callback
Generated schema:
relationships:
- type: conditional_requirement
if: async
then_required: callback
description: When async is specified, callback is required
=item * B<dependency>
One parameter depends on another being present.
die "Port requires host" if $port && !$host;
Generated schema:
relationships:
- type: dependency
param: port
requires: host
description: port requires host to be specified
=item * B<value_constraint>
Specific value requirements between parameters.
die if $ssl && $port != 443; # ssl requires port 443
Generated schema:
relationships:
- type: value_constraint
if: ssl
then: port
operator: ==
value: 443
description: When ssl is specified, port must equal 443
=item * B<value_conditional>
Parameter required when another has a specific value.
die if $mode eq 'secure' && !$key;
Generated schema:
relationships:
- type: value_conditional
if: mode
equals: secure
then_required: key
description: When mode equals 'secure', key is required
=back
=head2 Default Value Extraction
The extractor comprehensively extracts default values from both code and POD documentation:
=head3 Code Pattern Recognition
Extracts defaults from multiple Perl idioms:
=over 4
=item * Logical OR operator: C<$param = $param || 'default'>
=item * Defined-or operator: C<$param //= 'default'>
=item * Ternary operator: C<$param = defined $param ? $param : 'default'>
=item * Unless conditional: C<$param = 'default' unless defined $param>
=item * Chained defaults: C<$param = $param || $self->{_default} || 'fallback'>
=item * Multi-line patterns: C<$param = {} unless $param>
=back
=head3 POD Pattern Recognition
Extracts defaults from documentation:
=over 4
=item * Standard format: C<Default: 'value'>
=item * Alternative format: C<Defaults to: 'value'>
=item * Inline format: C<Optional, default: 'value'>
=item * Parameter lists: C<$param - type, default 'value'>
=back
=head3 Value Processing
Properly handles:
=over 4
=item * String literals with quotes and escape sequences
=item * Numeric values (integers and floats)
=item * Boolean values (true/false converted to 1/0)
=item * Empty data structures ([] and {})
=item * Special values (undef, __PACKAGE__)
=item * Complex expressions (preserved as-is when unevaluatable)
=item * Quote operators (q{}, qq{}, qw{})
=back
=head3 Type Inference
When a parameter has a default value but no explicit type annotation,
the type is automatically inferred from the default:
$options = {} # inferred as hashref
$items = [] # inferred as arrayref
$count = 42 # inferred as integer
$ratio = 3.14 # inferred as number
$enabled = 1 # inferred as boolean
=head2 Context-Aware Return Analysis
The extractor provides comprehensive analysis of method return behavior,
including context sensitivity, error handling conventions, and method chaining patterns.
=head3 List vs Scalar Context Detection
Automatically detects methods that return different values based on calling context:
sub get_items {
my $self = $_[0];
return wantarray ? @items : scalar(@items);
}
Detection captures:
=over 4
=item * C<_context_aware> flag - Method uses wantarray
=item * C<_list_context> - Type returned in list context (e.g., 'array')
=item * C<_scalar_context> - Type returned in scalar context (e.g., 'integer')
=back
Recognizes both ternary operator patterns and conditional return patterns.
=head3 Void Context Methods
Identifies methods that don't return meaningful values:
=over 4
=item * Setters (C<set_*> methods)
=item * Mutators (C<add_*, remove_*, delete_*, clear_*, reset_*, update_*>)
=item * Loggers (C<log, debug, warn, error, info>)
=item * Methods with only empty returns
=back
Example:
sub set_name {
my ($self, $name) = @_;
$self->{name} = $name;
return; # Void context
}
Sets C<_void_context> flag and C<type =E<gt> 'void'>.
=head3 Method Chaining Detection
Identifies chainable methods that return C<$self> for fluent interfaces:
sub set_width {
my ($self, $width) = @_;
$self->{width} = $width;
return $self; # Chainable
}
Detection provides:
=over 4
=item * C<_returns_self> - Returns invocant for chaining
=item * C<class> - The class name being returned
=back
Also detects chaining documentation in POD (keywords: "chainable", "fluent interface",
"returns self", "method chaining").
=head3 Error Return Conventions
Analyzes how methods signal errors:
B<Pattern Detection:>
=over 4
=item * C<undef_on_error> - Explicit C<return undef if/unless condition>
=item * C<implicit_undef> - Bare C<return if/unless condition>
=item * C<empty_list> - C<return ()> for list context errors
=item * C<zero_on_error> - Returns 0/false for boolean error indication
=item * C<exception_handling> - Uses eval blocks with error checking
=back
B<Example Analysis:>
sub fetch_user {
my ($self, $id) = @_;
return undef unless $id; # undef_on_error
return undef if $id < 0; # undef_on_error
return $self->{users}{$id};
}
Results in:
_error_return: 'undef'
_success_failure_pattern: 1
_error_handling: {
undef_on_error: ['$id', '$id < 0']
}
B<Success/Failure Pattern:>
Methods that return different types for success vs. failure are flagged with
C<_success_failure_pattern>. Common patterns:
=over 4
=item * Returns value on success, undef on failure
=item * Returns true on success, false on failure
=item * Returns data on success, empty list on failure
=back
=head3 Success Indicator Detection
Methods that always return true (typically for side effects):
sub update_status {
my ($self, $status) = @_;
$self->{status} = $status;
return 1; # Success indicator
}
Sets C<_success_indicator> flag when method consistently returns 1.
=head3 Schema Output
Enhanced return analysis adds these fields to method schemas:
output:
type: boolean # Inferred return type
_context_aware: 1 # Uses wantarray
_list_context:
type: array
_scalar_context:
type: integer
_returns_self: 1 # Returns $self
_void_context: 1 # No meaningful return
_success_indicator: 1 # Always returns true
_error_return: undef # How errors are signaled
_success_failure_pattern: 1 # Mixed return types
_error_handling: # Detailed error patterns
undef_on_error: [...]
exception_handling: 1
This comprehensive analysis enables:
=over 4
=item * Better test generation (testing both contexts, error paths)
=item * Documentation generation (clear error conventions)
=item * API design validation (consistent error handling)
=item * Contract specification (precise return behavior)
=back
=head2 Example
For a method like:
sub connect {
my ($self, $host, $port, $ssl, $file, $content) = @_;
die if $file && $content; # mutually exclusive
die unless $host || $file; # required group
die "Port requires host" if $port && !$host; # dependency
die if $ssl && $port != 443; # value constraint
# ... connection logic
}
The extractor generates:
relationships:
- type: mutually_exclusive
params: [file, content]
description: Cannot specify both file and content
- type: required_group
params: [host, file]
logic: or
description: Must specify either host or file
- type: dependency
param: port
requires: host
description: port requires host to be specified
- type: value_constraint
if: ssl
then: port
operator: ==
value: 443
description: When ssl is specified, port must equal 443
=head1 MODERN PERL FEATURES
This module adds support for:
=head2 Subroutine Signatures (Perl 5.20+)
sub connect($host, $port = 3306, %options) {
...
}
Extracts: required params, optional params with defaults, slurpy params
=head2 Type Constraints (Perl 5.36+)
sub calculate($x :Int, $y :Num) {
...
}
Recognizes: Int, Num, Str, Bool, ArrayRef, HashRef, custom classes
=head3 Subroutine Attributes
sub get_value :lvalue :Returns(Int) {
...
}
Detects: :lvalue, :method, :Returns(Type), custom attributes
=head2 Postfix Dereferencing (Perl 5.20+)
my @array = $arrayref->@*;
my %hash = $hashref->%*;
my @slice = $arrayref->@[1,3,5];
Tracks usage of modern dereferencing syntax
=head2 Field Declarations (Perl 5.38+)
field $host :param = 'localhost';
field $port :param(port_number) = 3306;
field $logger :param :isa(Log::Any);
Extracts fields and maps them to parameters
=head2 Modern Perl Features Support
The schema extractor supports modern Perl syntax introduced in versions 5.20, 5.36, and 5.38+.
=head3 Subroutine Signatures (Perl 5.20+)
Automatically extracts parameters from native Perl signatures:
use feature 'signatures';
sub connect($host, $port = 3306, $database = undef) {
...
}
Extracted schema includes:
=over 4
=item * Parameter positions
=item * Optional vs required parameters
=item * Default values from signature
=item * Slurpy parameters (@array, %hash)
=back
B<Example:>
# Signature with defaults
sub process($file, %options) { ... }
# Extracts:
# $file: position 0, required
# %options: position 1, optional, slurpy hash
=head3 Type Constraints in Signatures (Perl 5.36+)
Recognizes type constraints in signature parameters:
sub calculate($x :Int, $y :Num, $name :Str = "result") {
return $x + $y;
}
Supported constraint types:
=over 4
=item * C<:Int, :Integer> -> integer
=item * C<:Num, :Number> -> number
=item * C<:Str, :String> -> string
=item * C<:Bool, :Boolean> -> boolean
=item * C<:ArrayRef, :Array> -> arrayref
=item * C<:HashRef, :Hash> -> hashref
=item * C<:ClassName> -> object with isa constraint
=back
Type constraints are combined with defaults when both are present.
=head3 Subroutine Attributes
Extracts and documents subroutine attributes:
sub get_value :lvalue {
my $self = shift;
return $self->{value};
}
sub calculate :Returns(Int) :method {
my ($self, $x, $y) = @_;
return $x + $y;
}
Recognized attributes stored in C<_attributes> field:
=over 4
=item * C<:lvalue> - Method can be assigned to
=item * C<:method> - Explicitly marked as method
=item * C<:Returns(Type)> - Declares return type
=item * Custom attributes with values: C<:MyAttr(value)>
=back
=head3 Postfix Dereferencing (Perl 5.20+)
Detects usage of postfix dereferencing syntax:
use feature 'postderef';
sub process_array {
my ($self, $arrayref) = @_;
my @array = $arrayref->@*; # Array dereference
my @slice = $arrayref->@[1,3,5]; # Array slice
return @array;
}
sub process_hash {
my ($self, $hashref) = @_;
my %hash = $hashref->%*; # Hash dereference
return keys %hash;
}
Tracked features stored in C<_modern_features>:
=over 4
=item * C<array_deref> - Uses C<-E<gt>@*>
=item * C<hash_deref> - Uses C<-E<gt>%*>
=item * C<scalar_deref> - Uses C<-E<gt>$*>
=item * C<code_deref> - Uses C<-E<gt>&*>
=item * C<array_slice> - Uses C<-E<gt>@[...]>
=item * C<hash_slice> - Uses C<-E<gt>%{...}>
=back
=head3 Field Declarations (Perl 5.38+)
Extracts field declarations from class syntax and maps them to method parameters:
use feature 'class';
class DatabaseConnection {
field $host :param = 'localhost';
field $port :param = 3306;
field $username :param(user);
field $password :param;
field $logger :param :isa(Log::Any);
method connect() {
# Fields available as instance variables
}
}
Field attributes:
=over 4
=item * C<:param> - Field is a constructor parameter (uses field name)
=item * C<:param(name)> - Field maps to parameter with different name
=item * C<:isa(Class)> - Type constraint for the field
=item * Default values in field declarations
=back
Extracted schema includes both field information in C<_fields> and merged parameter
information in C<input>, allowing proper validation of class constructors.
=head3 Mixed Modern and Traditional Syntax
The extractor handles code that mixes modern and traditional syntax:
sub modern($x, $y = 5) {
# Modern signature with default
}
sub traditional {
my ($self, $x, $y) = @_;
$y //= 5; # Traditional default in code
# Both extract same parameter information
}
Priority order for parameter information:
=over 4
=item 1. Signature declarations (highest priority)
=item 2. Field declarations (for class methods)
=item 3. POD documentation
=item 4. Code analysis (lowest priority)
=back
This ensures that explicit declarations in signatures take precedence over
inferred information from code analysis.
=head3 Backwards Compatibility
All modern Perl feature detection is optional and automatic:
=over 4
=item * Traditional C<sub> declarations continue to work
=item * Code without modern features extracts parameters as before
=item * Modern features are additive - they enhance rather than replace existing extraction
=item * Schemas include C<_source> field indicating where parameter info came from
=back
=head2 _yamltest_hints
Each method schema returned by L</extract_all> now optionally includes a
C<_yamltest_hints> key, which provides guidance for automated test generation
based on the code analysis.
This is intended to help L<App::Test::Generator> create meaningful tests,
including boundary and invalid input cases, without manually specifying them.
The structure is a hashref with the following keys:
=over 4
=item * boundary_values
An arrayref of numeric values that represent boundaries detected from
comparisons in the code. These are derived from literals in statements
like C<$x < 0> or C<$y >= 255>. The generator can use these to create
boundary tests.
Example:
_yamltest_hints:
boundary_values: [0, 1, 100, 255]
=item * invalid_inputs
An arrayref of values that are likely to be rejected by the method,
based on checks like C<defined>, empty strings, or numeric validations.
Example:
_yamltest_hints:
invalid_inputs: [undef, '', -1]
=item * equivalence_classes
An arrayref intended to capture detected equivalence classes or patterns
among inputs. Currently this is empty by default, but future enhancements
may populate it based on detected input groupings.
Example:
_yamltest_hints:
equivalence_classes: []
=back
=head3 Usage
When calling C<extract_all>, each method schema will include
C<_yamltest_hints> if any hints were detected:
my $schemas = $extractor->extract_all;
my $hints = $schemas->{example_method}->{_yamltest_hints};
You can then feed these hints into automated test generators to produce
negative tests, boundary tests, and parameter-specific test cases.
=head3 Notes
=over 4
=item * Hints are inferred heuristically from code and validation statements.
=item * Not all inputs are guaranteed to be detected; the feature is additive
and will never remove information from the schema.
=item * Currently, equivalence classes are not populated, but the field exists
for future extension.
=item * Boundary and invalid input hints are deduplicated to avoid repeated
test values.
=back
=head3 Examples
Given a method like:
sub example {
my ($x) = @_;
die "negative" if $x < 0;
return unless defined($x);
return $x * 2;
}
After running:
my $extractor = App::Test::Generator::SchemaExtractor->new(
input_file => 'TestHints.pm',
output_dir => '/tmp',
quiet => 1,
);
my $schemas = $extractor->extract_all;
The schema for the method "example" will include:
$schemas->{example} = {
function => 'example',
_confidence => {
input => 'unknown',
output => 'unknown',
},
input => {
x => {
type => 'scalar',
optional => 0,
}
},
output => {
type => 'scalar',
},
_yamltest_hints => {
boundary_values => [0, 1],
invalid_inputs => [undef, -1],
equivalence_classes => [],
},
_notes => '...',
_analysis => {
input_confidence => 'low',
output_confidence => 'unknown',
confidence_factors => {
input => {...},
output => {...},
},
overall_confidence => 'low',
},
_fields => {},
_modern_features => {},
_attributes => {},
};
=head1 METHODS
=head2 new
Construct a new SchemaExtractor for a given Perl source file.
my $extractor = App::Test::Generator::SchemaExtractor->new(
input_file => 'lib/MyModule.pm', # Required
output_dir => 'schemas/', # Optional - only needed if writing schemas
verbose => 1, # Default: 0
include_private => 1, # Default: 0
max_parameters => 50, # Default: 20
confidence_threshold => 0.7, # Default: 0.5
strict_pod => 0|1|2, # Default: 0 (off)
);
=head3 Arguments
=over 4
=item * C<input_file>
Path to the Perl source file to analyse. Required. Must exist on disk.
=item * C<output_dir>
Directory to write generated schema YAML files. Optional - only
required if C<_write_schema> will be called. Callers passing
C<no_write =E<gt> 1> to C<extract_all> do not need to supply it.
=item * C<verbose>
Print progress messages to stdout during analysis. Optional, default 0.
=item * C<include_private>
Include methods whose names begin with C<_> in the analysis. Optional,
default 0. Methods named C<_new>, C<_init>, and C<_build> are always
included regardless of this setting.
=item * C<max_parameters>
Safety limit on the number of parameters analysed per method to prevent
runaway processing on pathological code. Optional, default 20.
=item * C<confidence_threshold>
Minimum confidence score (0.0-1.0) below which a schema is marked with
C<_low_confidence =E<gt> 1>. Optional, default 0.5.
=item * C<strict_pod>
Controls POD/code agreement validation. C<0> disables validation,
C<1> emits warnings, C<2> croaks on first disagreement. Also accepts
the strings C<off>, C<warn>, and C<fatal>. Optional, default 0.
=back
=head3 Returns
A blessed hashref. Croaks if C<input_file> is missing or does not
exist on disk.
=head3 Side effects
Reads and parses the input file using L<PPI> at construction time.
=head3 API specification
=head4 input
{
input_file => { type => SCALAR },
output_dir => { type => SCALAR, optional => 1 },
verbose => { type => SCALAR, optional => 1 },
include_private => { type => SCALAR, optional => 1 },
max_parameters => { type => SCALAR, optional => 1 },
confidence_threshold => { type => SCALAR, optional => 1 },
strict_pod => { type => SCALAR, optional => 1 },
}
=head4 output
{
type => OBJECT,
isa => 'App::Test::Generator::SchemaExtractor',
}
=cut | |||||
| 1273 | ||||||
| 1274 | sub new { | |||||
| 1275 | 425 | 2653213 | my $class = shift; | |||
| 1276 | ||||||
| 1277 | # Handle hash or hashref arguments | |||||
| 1278 | 425 | 787 | my $params = Params::Get::get_params('input_file', @_) || {}; | |||
| 1279 | ||||||
| 1280 | 422 | 5247 | croak(__PACKAGE__, ': input_file required') unless exists $params->{input_file}; | |||
| 1281 | ||||||
| 1282 | my $self = { | |||||
| 1283 | input_file => $params->{input_file}, | |||||
| 1284 | # output_dir is optional â only required if _write_schema will be called. | |||||
| 1285 | # Callers using extract_all(no_write => 1) do not need to supply it. | |||||
| 1286 | output_dir => $params->{output_dir}, | |||||
| 1287 | verbose => $params->{verbose} // 0, | |||||
| 1288 | include_private => $params->{include_private} // 0, # include _private methods | |||||
| 1289 | confidence_threshold => $params->{confidence_threshold} // $DEFAULT_CONFIDENCE_THRESH, | |||||
| 1290 | max_parameters => $params->{max_parameters} // $DEFAULT_MAX_PARAMETERS, # safety limit | |||||
| 1291 | 422 | 1739 | strict_pod => _validate_strictness_level($params->{strict_pod}), # Enable strict POD checking | |||
| 1292 | }; | |||||
| 1293 | ||||||
| 1294 | # Validate input file exists | |||||
| 1295 | 422 | 3215 | unless (-f $self->{input_file}) { | |||
| 1296 | 3 | 53 | croak(__PACKAGE__, ": Input file '$self->{input_file}' does not exist"); | |||
| 1297 | } | |||||
| 1298 | ||||||
| 1299 | 419 | 938 | return bless $self, $class; | |||
| 1300 | } | |||||
| 1301 | ||||||
| 1302 - 1375 | =head2 extract_all
Extract schemas for all qualifying methods in the module and return
them as a hashref.
my $schemas = $extractor->extract_all();
# Suppress writing .yml files to disk
my $schemas = $extractor->extract_all(no_write => 1);
=head3 Arguments
=over 4
=item * C<no_write>
When true, schema files are not written to C<output_dir>. The returned
hashref is still fully populated. Useful when the caller wants to
inspect or augment schemas before deciding whether to write them.
Optional, default 0.
=back
=head3 Returns
A hashref mapping method name strings to schema hashrefs. Each schema
contains at minimum the keys C<function>, C<module>, C<input>,
C<output>, and C<_analysis>. See L</Generated Schema Structure> for
the full structure.
=head3 Side effects
Parses the input file with L<PPI>. Writes one YAML file per method to
C<output_dir> unless C<no_write> is set. Creates C<output_dir> if it
does not exist and writing is enabled.
=head3 Notes
Private methods (names beginning with C<_>) are excluded unless
C<include_private =E<gt> 1> was passed to C<new>. Duplicate method
names are deduplicated with a warning logged to stdout in verbose mode.
POD/code agreement validation is applied if C<strict_pod> was set in
C<new>. At level 2 (fatal), the first disagreement causes an immediate
croak.
=head3 API specification
=head4 input
{
self => { type => OBJECT, isa => 'App::Test::Generator::SchemaExtractor' },
no_write => { type => SCALAR, optional => 1 },
}
=head4 output
{
type => HASHREF,
keys => {
'*' => {
type => HASHREF,
keys => {
function => { type => SCALAR },
module => { type => SCALAR },
input => { type => HASHREF },
output => { type => HASHREF },
_analysis => { type => HASHREF },
},
},
},
}
=cut | |||||
| 1376 | ||||||
| 1377 | sub extract_all { | |||||
| 1378 | 70 | 1609 | my $self = shift; | |||
| 1379 | 70 | 109 | my $params = Params::Get::get_params(undef, @_) || {}; | |||
| 1380 | ||||||
| 1381 | 70 | 804 | $self->_log("Parsing $self->{input_file}..."); | |||
| 1382 | 70 | 143 | $self->_log('Strict POD mode: ' . (qw(off warn fatal))[$self->{strict_pod}]); | |||
| 1383 | ||||||
| 1384 | 70 | 252 | my $document = PPI::Document->new($self->{input_file}) or die "Failed to parse $self->{input_file}: $!"; | |||
| 1385 | ||||||
| 1386 | # Store document for later use | |||||
| 1387 | 70 | 604938 | $self->{_document} = $document; | |||
| 1388 | ||||||
| 1389 | 70 | 227 | my $package_name = $self->_extract_package_name($document); | |||
| 1390 | 70 | 625 | $self->{_package_name} //= $package_name; | |||
| 1391 | 70 | 133 | $self->_log("Package: $package_name"); | |||
| 1392 | ||||||
| 1393 | 70 | 100 | my $methods = $self->_find_methods($document); | |||
| 1394 | 70 | 148 | $self->_log('Found ' . scalar(@$methods) . ' methods (pre-dedup)'); | |||
| 1395 | ||||||
| 1396 | 70 | 56 | my %schemas; | |||
| 1397 | 70 70 | 69 85 | foreach my $method (@{$methods}) { | |||
| 1398 | 266 | 450 | $self->_log("\nAnalyzing method: $method->{name}"); | |||
| 1399 | ||||||
| 1400 | 266 | 338 | my $schema = $self->_analyze_method($method); | |||
| 1401 | 266 | 398 | $schemas{$method->{name}} = $schema; | |||
| 1402 | 266 | 313 | $schema->{'module'} = $package_name; | |||
| 1403 | ||||||
| 1404 | # Write individual schema file | |||||
| 1405 | # Only write schema files if no_write is not set | |||||
| 1406 | 266 | 433 | $self->_write_schema($method->{name}, $schema) unless $params->{no_write}; | |||
| 1407 | } | |||||
| 1408 | ||||||
| 1409 | 70 | 292 | return \%schemas; | |||
| 1410 | } | |||||
| 1411 | ||||||
| 1412 | # -------------------------------------------------- | |||||
| 1413 | # _extract_package_name | |||||
| 1414 | # | |||||
| 1415 | # Purpose: Extract the Perl package name from a | |||||
| 1416 | # PPI document, or from the cached value | |||||
| 1417 | # stored at construction time. | |||||
| 1418 | # | |||||
| 1419 | # Entry: $document - a PPI::Document, or undef | |||||
| 1420 | # to use $self->{_document}. | |||||
| 1421 | # | |||||
| 1422 | # Exit: Returns the package namespace string, | |||||
| 1423 | # or an empty string if no package | |||||
| 1424 | # statement is found. | |||||
| 1425 | # | |||||
| 1426 | # Side effects: Stores the package name in | |||||
| 1427 | # $self->{_package_name} if not already | |||||
| 1428 | # set. | |||||
| 1429 | # | |||||
| 1430 | # Notes: Croaks if more than one package | |||||
| 1431 | # declaration is found â multi-package | |||||
| 1432 | # files are not supported. | |||||
| 1433 | # -------------------------------------------------- | |||||
| 1434 | sub _extract_package_name { | |||||
| 1435 | 84 | 3173 | my ($self, $document) = @_; | |||
| 1436 | ||||||
| 1437 | 84 | 104 | if(!defined($document)) { | |||
| 1438 | 12 | 12 | $document = $self->{_document}; | |||
| 1439 | } | |||||
| 1440 | 84 | 176 | my $pkgs = $document->find('PPI::Statement::Package') || []; | |||
| 1441 | 84 | 159942 | if(@$pkgs == 0) { | |||
| 1442 | 5 | 12 | my $package_stmt = $document->find_first('PPI::Statement::Package'); | |||
| 1443 | 5 | 3136 | return $package_stmt ? $package_stmt->namespace() : ''; | |||
| 1444 | } | |||||
| 1445 | 79 | 108 | croak('More than one package declaration found') if @$pkgs > 1; | |||
| 1446 | 79 | 238 | $self->{_package_name} //= $pkgs->[0]->namespace(); | |||
| 1447 | 79 | 892 | return $pkgs->[0]->namespace(); | |||
| 1448 | } | |||||
| 1449 | ||||||
| 1450 | # -------------------------------------------------- | |||||
| 1451 | # _find_methods | |||||
| 1452 | # | |||||
| 1453 | # Purpose: Locate all subroutine and method | |||||
| 1454 | # declarations in a PPI document, | |||||
| 1455 | # including Moose-style method modifiers | |||||
| 1456 | # and Perl 5.38 class/method syntax. | |||||
| 1457 | # | |||||
| 1458 | # Entry: $document - a PPI::Document. | |||||
| 1459 | # | |||||
| 1460 | # Exit: Returns an arrayref of method hashrefs, | |||||
| 1461 | # each containing: name, node, body, pod, | |||||
| 1462 | # type, and optionally modifier, class, | |||||
| 1463 | # and fields keys. | |||||
| 1464 | # Private methods (names beginning with | |||||
| 1465 | # _) are excluded unless include_private | |||||
| 1466 | # was set in new(), except for _new, | |||||
| 1467 | # _init, and _build which are always | |||||
| 1468 | # included. | |||||
| 1469 | # | |||||
| 1470 | # Side effects: Logs progress and warnings to stdout | |||||
| 1471 | # when verbose is set. | |||||
| 1472 | # | |||||
| 1473 | # Notes: Duplicate method names are silently | |||||
| 1474 | # deduplicated â the second occurrence | |||||
| 1475 | # is dropped with a verbose warning. | |||||
| 1476 | # Class/method detection is regex-based | |||||
| 1477 | # and may misbehave on complex code. | |||||
| 1478 | # -------------------------------------------------- | |||||
| 1479 | sub _find_methods { | |||||
| 1480 | 76 | 34459 | my ($self, $document) = @_; | |||
| 1481 | ||||||
| 1482 | 76 | 95 | my $subs = $document->find('PPI::Statement::Sub') || []; | |||
| 1483 | 76 | 137280 | my $sub_decls = $document->find('PPI::Statement') || []; | |||
| 1484 | ||||||
| 1485 | 76 | 136706 | my @methods; | |||
| 1486 | 76 | 93 | foreach my $sub (@$subs) { | |||
| 1487 | 284 | 11891 | my $name = $sub->name(); | |||
| 1488 | ||||||
| 1489 | 284 | 5985 | next unless defined $name; # Skip anonymous routines | |||
| 1490 | ||||||
| 1491 | # Skip private methods unless explicitly included, or they're special | |||||
| 1492 | 284 | 353 | if ($name =~ /^_/ && $name !~ /^_(new|init|build)/) { | |||
| 1493 | 9 | 15 | next unless $self->{include_private}; | |||
| 1494 | } | |||||
| 1495 | ||||||
| 1496 | # Get the POD before this sub | |||||
| 1497 | 278 | 302 | my $pod = $self->_extract_pod_before($sub); | |||
| 1498 | ||||||
| 1499 | 278 | 315 | push @methods, { | |||
| 1500 | name => $name, | |||||
| 1501 | node => $sub, | |||||
| 1502 | body => $sub->content(), | |||||
| 1503 | pod => $pod, | |||||
| 1504 | type => 'sub', | |||||
| 1505 | }; | |||||
| 1506 | } | |||||
| 1507 | ||||||
| 1508 | # Look for class { method } syntax (Perl 5.38+) | |||||
| 1509 | 76 | 4903 | my $content = $document->content(); | |||
| 1510 | 76 | 18113 | if ($content =~ /\bclass\b/) { | |||
| 1511 | 16 | 23 | $self->_log(' Detecting class/method syntax...'); | |||
| 1512 | 16 | 26 | $self->_extract_class_methods($content, \@methods); | |||
| 1513 | } | |||||
| 1514 | ||||||
| 1515 | # Process method modifiers (Moose) | |||||
| 1516 | 76 | 84 | foreach my $decl (@$sub_decls) { | |||
| 1517 | 1409 | 1120 | my $content = $decl->content; | |||
| 1518 | 1409 | 30708 | if ($content =~ /^\s*(before|after|around)\s+['"]?(\w+)['"]?\b/) { | |||
| 1519 | 0 | 0 | my ($modifier, $method_name) = ($1, $2); | |||
| 1520 | 0 | 0 | my $full_name = "${modifier}_$method_name"; | |||
| 1521 | ||||||
| 1522 | # Look for the actual sub definition that follows | |||||
| 1523 | 0 | 0 | my $next_sib = $decl->next_sibling; | |||
| 1524 | 0 | 0 | while ($next_sib && !$next_sib->isa('PPI::Statement::Sub')) { | |||
| 1525 | 0 | 0 | $next_sib = $next_sib->next_sibling; | |||
| 1526 | } | |||||
| 1527 | ||||||
| 1528 | 0 | 0 | if ($next_sib && $next_sib->isa('PPI::Statement::Sub')) { | |||
| 1529 | 0 | 0 | my $pod = $self->_extract_pod_before($decl); # POD might be before modifier | |||
| 1530 | 0 | 0 | push @methods, { | |||
| 1531 | name => $full_name, | |||||
| 1532 | node => $next_sib, | |||||
| 1533 | body => $next_sib->content, | |||||
| 1534 | pod => $pod, | |||||
| 1535 | type => 'modifier', | |||||
| 1536 | original_method => $method_name, | |||||
| 1537 | modifier => $modifier, | |||||
| 1538 | }; | |||||
| 1539 | 0 | 0 | $self->_log(" Found method modifier: $full_name"); | |||
| 1540 | } | |||||
| 1541 | } | |||||
| 1542 | } | |||||
| 1543 | ||||||
| 1544 | # Prevent silent duplicate method overwrites | |||||
| 1545 | 76 | 62 | my %seen; | |||
| 1546 | @methods = grep { | |||||
| 1547 | 76 279 | 88 203 | my $n = $_->{name}; | |||
| 1548 | 279 | 316 | if ($seen{$n}++) { | |||
| 1549 | 0 | 0 | $self->_log(" WARNING: duplicate method '$n' ignored"); | |||
| 1550 | 0 | 0 | 0; | |||
| 1551 | } else { | |||||
| 1552 | 279 | 258 | 1; | |||
| 1553 | } | |||||
| 1554 | } @methods; | |||||
| 1555 | ||||||
| 1556 | 76 | 189 | return \@methods; | |||
| 1557 | } | |||||
| 1558 | ||||||
| 1559 | # -------------------------------------------------- | |||||
| 1560 | # _extract_class_methods | |||||
| 1561 | # | |||||
| 1562 | # Purpose: Extract method declarations from | |||||
| 1563 | # Perl 5.38 class { method {} } syntax | |||||
| 1564 | # by regex-based scanning of the class | |||||
| 1565 | # body content. | |||||
| 1566 | # | |||||
| 1567 | # Entry: $content - full document source string. | |||||
| 1568 | # $methods - arrayref to push discovered | |||||
| 1569 | # method hashrefs onto | |||||
| 1570 | # (modified in place). | |||||
| 1571 | # | |||||
| 1572 | # Exit: Returns nothing. Appends to $methods. | |||||
| 1573 | # | |||||
| 1574 | # Side effects: Logs class and method discoveries | |||||
| 1575 | # to stdout when verbose is set. | |||||
| 1576 | # | |||||
| 1577 | # Notes: This is experimental â regex-based | |||||
| 1578 | # class body parsing may misbehave on | |||||
| 1579 | # complex or nested class declarations. | |||||
| 1580 | # Class body boundaries are tracked by | |||||
| 1581 | # simple brace counting, which will | |||||
| 1582 | # fail on unbalanced braces in strings | |||||
| 1583 | # or heredocs. | |||||
| 1584 | # -------------------------------------------------- | |||||
| 1585 | sub _extract_class_methods { | |||||
| 1586 | 17 | 25 | my ($self, $content, $methods) = @_; | |||
| 1587 | ||||||
| 1588 | # EXPERIMENTAL: regex-based parsing, may misbehave on complex code | |||||
| 1589 | ||||||
| 1590 | # Simple pattern: find "class Name {" blocks | |||||
| 1591 | # This won't handle all edge cases but will work for simple classes | |||||
| 1592 | 17 | 58 | while ($content =~ /class\s+(\w+)\s*\{/g) { | |||
| 1593 | 2 | 3 | my $class_name = $1; | |||
| 1594 | 2 | 2 | my $start_pos = pos($content); | |||
| 1595 | ||||||
| 1596 | # Find the matching closing brace (simple brace counting) | |||||
| 1597 | 2 | 2 | my $depth = 1; | |||
| 1598 | 2 | 2 | my $class_end = $start_pos; | |||
| 1599 | ||||||
| 1600 | 2 | 7 | while ($depth > 0 && $class_end < length($content)) { | |||
| 1601 | 356 | 230 | my $char = substr($content, $class_end, 1); | |||
| 1602 | 356 | 256 | $depth++ if $char eq '{'; | |||
| 1603 | 356 | 220 | $depth-- if $char eq '}'; | |||
| 1604 | 356 | 341 | $class_end++; | |||
| 1605 | } | |||||
| 1606 | ||||||
| 1607 | 2 | 2 | next if $depth != 0; # unbalanced braces, skip class | |||
| 1608 | ||||||
| 1609 | 2 | 4 | my $class_body = substr($content, $start_pos, $class_end - $start_pos - 1); | |||
| 1610 | ||||||
| 1611 | 2 | 5 | $self->_log(" Found class $class_name"); | |||
| 1612 | ||||||
| 1613 | # Extract field declarations from class | |||||
| 1614 | 2 | 3 | my $fields = $self->_extract_field_declarations($class_body); | |||
| 1615 | ||||||
| 1616 | # Find methods in the class body | |||||
| 1617 | 2 | 7 | while ($class_body =~ /method\s+(\w+)\s*(\([^)]*\))?\s*\{/g) { | |||
| 1618 | 2 | 5 | my ($method_name, $sig_with_parens) = ($1, $2 || '()'); | |||
| 1619 | ||||||
| 1620 | # Skip private unless configured | |||||
| 1621 | 2 | 4 | if ($method_name =~ /^_/ && $method_name !~ /^_(new|init|build)/) { | |||
| 1622 | 0 | 0 | next unless $self->{include_private}; | |||
| 1623 | } | |||||
| 1624 | ||||||
| 1625 | # Reconstruct as sub for analysis | |||||
| 1626 | 2 | 2 | my $signature = $sig_with_parens; | |||
| 1627 | 2 | 4 | $signature =~ s/^\(//; | |||
| 1628 | 2 | 3 | $signature =~ s/\)$//; | |||
| 1629 | ||||||
| 1630 | # Build a fake sub declaration | |||||
| 1631 | 2 | 2 | my $fake_sub = "sub $method_name($signature) { }"; | |||
| 1632 | ||||||
| 1633 | 2 | 7 | push @$methods, { | |||
| 1634 | name => $method_name, | |||||
| 1635 | node => undef, | |||||
| 1636 | body => $fake_sub, # Just the signature for now | |||||
| 1637 | is_stub => 1, | |||||
| 1638 | pod => '', | |||||
| 1639 | type => 'method', | |||||
| 1640 | class => $class_name, | |||||
| 1641 | fields => $fields, | |||||
| 1642 | }; | |||||
| 1643 | ||||||
| 1644 | 2 | 4 | $self->_log(" Found method $method_name in class $class_name"); | |||
| 1645 | } | |||||
| 1646 | } | |||||
| 1647 | } | |||||
| 1648 | ||||||
| 1649 | # -------------------------------------------------- | |||||
| 1650 | # _extract_pod_before | |||||
| 1651 | # | |||||
| 1652 | # Purpose: Collect the POD documentation that | |||||
| 1653 | # appears immediately before a | |||||
| 1654 | # subroutine in the PPI document, by | |||||
| 1655 | # walking backwards through siblings. | |||||
| 1656 | # | |||||
| 1657 | # Entry: $sub - a PPI node (typically a | |||||
| 1658 | # PPI::Statement::Sub). | |||||
| 1659 | # | |||||
| 1660 | # Exit: Returns a string containing all POD | |||||
| 1661 | # content found before the sub, with | |||||
| 1662 | # inline parameter comments converted | |||||
| 1663 | # to =item format. Returns an empty | |||||
| 1664 | # string if no POD is found. | |||||
| 1665 | # | |||||
| 1666 | # Side effects: None. | |||||
| 1667 | # | |||||
| 1668 | # Notes: Stops walking backwards on the first | |||||
| 1669 | # non-POD, non-whitespace, non-separator, | |||||
| 1670 | # non-include node encountered. | |||||
| 1671 | # Walking is capped at $POD_WALK_LIMIT | |||||
| 1672 | # steps to prevent runaway processing | |||||
| 1673 | # on pathological documents. | |||||
| 1674 | # -------------------------------------------------- | |||||
| 1675 | sub _extract_pod_before { | |||||
| 1676 | 280 | 3416 | my ($self, $sub) = @_; | |||
| 1677 | ||||||
| 1678 | 280 | 213 | my $pod = ''; | |||
| 1679 | 280 | 318 | my $current = $sub->previous_sibling(); | |||
| 1680 | 280 | 3185 | my $seen_code = 0; | |||
| 1681 | 280 | 179 | my $steps = 0; | |||
| 1682 | ||||||
| 1683 | # Walk backwards collecting POD | |||||
| 1684 | 280 | 501 | while($current && $steps++ < $POD_WALK_LIMIT) { | |||
| 1685 | 1063 | 10182 | if ($current->isa('PPI::Token::Pod')) { | |||
| 1686 | 95 | 84 | $pod = $current->content() . $pod; | |||
| 1687 | } elsif ($current->isa('PPI::Token::Comment')) { | |||||
| 1688 | # Include comments that might contain parameter info | |||||
| 1689 | 6 | 5 | my $comment = $current->content(); | |||
| 1690 | 6 | 15 | if ($comment =~ /#\s*(?:param|arg|input)\s+\$(\w+)\s*:\s*(.+)/i) { | |||
| 1691 | 0 | 0 | $pod .= "=item \$$1\n$2\n\n"; | |||
| 1692 | } | |||||
| 1693 | } elsif ($current->isa('PPI::Token::Whitespace') || | |||||
| 1694 | $current->isa('PPI::Token::Separator')) { | |||||
| 1695 | # Skip whitespace and separators | |||||
| 1696 | } elsif ($current->isa('PPI::Statement::Include')) { | |||||
| 1697 | # allow 'use strict', 'use warnings' between POD and sub | |||||
| 1698 | } else { | |||||
| 1699 | # Hit non-POD, non-whitespace - stop | |||||
| 1700 | 278 | 199 | last; | |||
| 1701 | } | |||||
| 1702 | 785 | 762 | $current = $current->previous_sibling(); | |||
| 1703 | } | |||||
| 1704 | ||||||
| 1705 | 280 | 243 | return $pod; | |||
| 1706 | } | |||||
| 1707 | ||||||
| 1708 | # -------------------------------------------------- | |||||
| 1709 | # _analyze_method | |||||
| 1710 | # | |||||
| 1711 | # Purpose: Perform full multi-source analysis of | |||||
| 1712 | # a single method and produce a complete | |||||
| 1713 | # schema hashref, combining POD analysis, | |||||
| 1714 | # code pattern detection, signature | |||||
| 1715 | # analysis, validator schema extraction, | |||||
| 1716 | # confidence scoring, relationship | |||||
| 1717 | # detection, and modern Perl feature | |||||
| 1718 | # extraction. | |||||
| 1719 | # | |||||
| 1720 | # Entry: $method - a method hashref as produced | |||||
| 1721 | # by _find_methods, containing | |||||
| 1722 | # at minimum: name, body, pod. | |||||
| 1723 | # | |||||
| 1724 | # Exit: Returns a schema hashref containing: | |||||
| 1725 | # function, input, output, _confidence, | |||||
| 1726 | # _analysis, _notes, and optionally: | |||||
| 1727 | # new, accessor, relationships, | |||||
| 1728 | # _yamltest_hints, _attributes, | |||||
| 1729 | # _modern_features, _fields, _model, | |||||
| 1730 | # _low_confidence. | |||||
| 1731 | # | |||||
| 1732 | # Side effects: Logs progress to stdout when verbose | |||||
| 1733 | # is set. May carp or croak if | |||||
| 1734 | # strict_pod is enabled and POD/code | |||||
| 1735 | # disagreements are found. | |||||
| 1736 | # | |||||
| 1737 | # Notes: This is the central analysis entry | |||||
| 1738 | # point â it orchestrates all other | |||||
| 1739 | # analysis helpers and merges their | |||||
| 1740 | # results. The non-invasive reasoning | |||||
| 1741 | # layer (Model::Method, Analyzer::*) | |||||
| 1742 | # runs after the main schema is built | |||||
| 1743 | # and attaches metadata only. | |||||
| 1744 | # -------------------------------------------------- | |||||
| 1745 | sub _analyze_method { | |||||
| 1746 | 266 | 260 | my ($self, $method) = @_; | |||
| 1747 | 266 | 257 | my $code = $method->{body}; | |||
| 1748 | 266 | 225 | my $pod = $method->{pod}; | |||
| 1749 | ||||||
| 1750 | # Extract modern features | |||||
| 1751 | 266 | 395 | my $attributes = $self->_extract_subroutine_attributes($code); | |||
| 1752 | 266 | 363 | my $postfix_derefs = $self->_analyze_postfix_dereferencing($code); | |||
| 1753 | 266 | 359 | my $fields = $self->_extract_field_declarations($code); | |||
| 1754 | ||||||
| 1755 | # If this method came from a class, use those field declarations | |||||
| 1756 | 266 1 | 329 2 | if ($method->{fields} && keys %{$method->{fields}}) { | |||
| 1757 | 1 | 1 | $fields = $method->{fields}; | |||
| 1758 | } | |||||
| 1759 | ||||||
| 1760 | my $schema = { | |||||
| 1761 | function => $method->{name}, | |||||
| 1762 | 266 | 818 | _confidence => { | |||
| 1763 | 'input' => {}, | |||||
| 1764 | 'output' => {} | |||||
| 1765 | }, | |||||
| 1766 | input => {}, | |||||
| 1767 | output => {}, | |||||
| 1768 | setup => undef, | |||||
| 1769 | transforms => {}, | |||||
| 1770 | }; | |||||
| 1771 | ||||||
| 1772 | # Analyze different sources | |||||
| 1773 | 266 | 416 | my $pod_params = $self->_analyze_pod($pod); | |||
| 1774 | 266 | 411 | my $code_params = $self->_analyze_code($code, $method); | |||
| 1775 | ||||||
| 1776 | # Validate POD/code agreement if strict mode is enabled | |||||
| 1777 | 266 | 406 | if ($self->{strict_pod}) { | |||
| 1778 | my @validation_errors = $self->_validate_pod_code_agreement( | |||||
| 1779 | $pod_params, | |||||
| 1780 | $code_params, | |||||
| 1781 | $method->{name}, | |||||
| 1782 | { | |||||
| 1783 | 6 | 20 | ignore_self => 1, | |||
| 1784 | allow_renames => 1, | |||||
| 1785 | } | |||||
| 1786 | ); | |||||
| 1787 | ||||||
| 1788 | 6 | 11 | if (@validation_errors) { | |||
| 1789 | 5 | 10 | my $error_msg = "POD/Code disagreement in method '$method->{name}':\n " . | |||
| 1790 | join("\n ", @validation_errors); | |||||
| 1791 | ||||||
| 1792 | # Add to schema for reference even if we croak | |||||
| 1793 | 5 | 8 | $schema->{_pod_validation_errors} = \@validation_errors; | |||
| 1794 | ||||||
| 1795 | # Either croak immediately or log based on configuration | |||||
| 1796 | 5 | 6 | if($self->{strict_pod} == 2) { # 2 = fatal errors | |||
| 1797 | 0 | 0 | croak("[POD STRICT] $error_msg"); | |||
| 1798 | } else { # 1 = warnings | |||||
| 1799 | 5 | 215 | carp("[POD STRICT] $error_msg"); | |||
| 1800 | # Continue with analysis, but mark as problematic | |||||
| 1801 | 5 | 2814 | $schema->{_pod_disagreement} = 1; | |||
| 1802 | } | |||||
| 1803 | } | |||||
| 1804 | 6 | 12 | $schema->{_strict_pod_level} = $self->{strict_pod}; | |||
| 1805 | } | |||||
| 1806 | ||||||
| 1807 | 266 | 349 | my $validator_params = $self->_extract_validator_schema($code); | |||
| 1808 | ||||||
| 1809 | 266 | 260 | if ($validator_params) { | |||
| 1810 | 3 | 3 | $schema->{input} = $validator_params->{input}; | |||
| 1811 | 3 | 3 | $schema->{input_style} = 'hash'; | |||
| 1812 | 3 | 6 | $schema->{_confidence}{input} = { 'factors' => [ 'Determined from validator' ], 'level' => 'high' }; | |||
| 1813 | $schema->{_analysis}{confidence_factors}{input} = [ | |||||
| 1814 | 3 | 6 | 'Input schema extracted from validator' | |||
| 1815 | ]; | |||||
| 1816 | } else { | |||||
| 1817 | # Merge field declarations into code_params before merging analyses | |||||
| 1818 | 263 | 350 | if (keys %$fields) { | |||
| 1819 | 1 | 1 | $self->_merge_field_declarations($code_params, $fields); | |||
| 1820 | } | |||||
| 1821 | ||||||
| 1822 | # Merge analyses | |||||
| 1823 | 263 | 375 | $schema->{input} = $self->_merge_parameter_analyses( | |||
| 1824 | $pod_params, | |||||
| 1825 | $code_params, | |||||
| 1826 | ); | |||||
| 1827 | } | |||||
| 1828 | ||||||
| 1829 | # ---------------------------------------- | |||||
| 1830 | # Legacy Output Analysis (unchanged) | |||||
| 1831 | # ---------------------------------------- | |||||
| 1832 | ||||||
| 1833 | $schema->{output} = $self->_analyze_output( | |||||
| 1834 | $method->{pod}, | |||||
| 1835 | $method->{body}, | |||||
| 1836 | $method->{name} | |||||
| 1837 | 266 | 499 | ); | |||
| 1838 | ||||||
| 1839 | ||||||
| 1840 | # Detect accessor methods | |||||
| 1841 | 266 | 451 | $self->_detect_accessor_methods($method, $schema); | |||
| 1842 | ||||||
| 1843 | # Detect if this is an instance method that needs object instantiation | |||||
| 1844 | # Constructors never require object instantiation | |||||
| 1845 | 266 | 384 | my $needs_object = $self->_needs_object_instantiation($method->{name}, $method->{body}, $method); | |||
| 1846 | 266 | 528 | if($method->{name} ne 'new' && $needs_object) { | |||
| 1847 | 116 | 129 | $schema->{new} = $needs_object; | |||
| 1848 | 116 | 181 | $self->_log(" NEW: Method requires object instantiation: $needs_object"); | |||
| 1849 | } | |||||
| 1850 | ||||||
| 1851 | # Calculate confidences | |||||
| 1852 | 266 | 269 | my $input_confidence = $schema->{_confidence}{'input'}; | |||
| 1853 | 266 | 268 | if(!ref($input_confidence)) { | |||
| 1854 | 0 | 0 | $input_confidence = $schema->{_confidence}{'input'} = $self->_calculate_input_confidence($schema->{input}); | |||
| 1855 | } | |||||
| 1856 | 266 | 440 | my $output_confidence = $schema->{_confidence}{'output'} = $self->_calculate_output_confidence($schema->{output}); | |||
| 1857 | ||||||
| 1858 | # Add metadata | |||||
| 1859 | 266 | 420 | $schema->{_notes} = $self->_generate_notes($schema->{input}); | |||
| 1860 | ||||||
| 1861 | # Add analytics | |||||
| 1862 | 266 | 574 | $schema->{_analysis} ||= {}; | |||
| 1863 | 266 | 305 | $schema->{_analysis}{input_confidence} = $input_confidence->{level}; | |||
| 1864 | 266 | 255 | $schema->{_analysis}{output_confidence} = $output_confidence->{level}; | |||
| 1865 | 266 | 508 | $schema->{_analysis}{confidence_factors} ||= {}; | |||
| 1866 | 266 | 506 | $schema->{_analysis}{confidence_factors}{input} ||= $input_confidence->{factors}; | |||
| 1867 | 266 | 536 | $schema->{_analysis}{confidence_factors}{output} ||= $output_confidence->{factors}; | |||
| 1868 | ||||||
| 1869 | 266 | 239 | foreach my $mode('input', 'output') { | |||
| 1870 | 532 | 553 | $self->_set_defaults($schema, $mode); | |||
| 1871 | } | |||||
| 1872 | ||||||
| 1873 | # Optionally store detailed per-parameter analysis | |||||
| 1874 | 266 | 269 | if ($input_confidence->{per_parameter}) { | |||
| 1875 | 0 | 0 | $schema->{_analysis}{per_parameter_scores} = $input_confidence->{per_parameter}; | |||
| 1876 | } | |||||
| 1877 | ||||||
| 1878 | # Calculate overall confidence (for backward compatibility) | |||||
| 1879 | 266 | 207 | my $input_level = $input_confidence->{level}; | |||
| 1880 | 266 | 213 | my $output_level = $output_confidence->{level}; | |||
| 1881 | ||||||
| 1882 | 266 | 552 | my %level_rank = ( | |||
| 1883 | none => 0, | |||||
| 1884 | very_low => 1, | |||||
| 1885 | low => 2, | |||||
| 1886 | medium => 3, | |||||
| 1887 | high => 4 | |||||
| 1888 | ); | |||||
| 1889 | ||||||
| 1890 | # Overall is the lower of input and output | |||||
| 1891 | 266 | 482 | $input_level //= 'none'; | |||
| 1892 | 266 | 218 | $output_level //= 'none'; | |||
| 1893 | 266 | 307 | my $overall = $level_rank{$input_level} < $level_rank{$output_level} ? $input_level : $output_level; | |||
| 1894 | ||||||
| 1895 | 266 | 248 | $schema->{_analysis}{overall_confidence} = $overall; | |||
| 1896 | ||||||
| 1897 | # Analyze parameter relationships | |||||
| 1898 | 266 | 325 | my $relationships = $self->_analyze_relationships($method); | |||
| 1899 | 266 266 | 314 312 | if ($relationships && @{$relationships}) { | |||
| 1900 | 7 | 6 | $schema->{relationships} = $relationships; | |||
| 1901 | 7 | 11 | $self->_log(" Found " . scalar(@$relationships) . " parameter relationships"); | |||
| 1902 | } | |||||
| 1903 | ||||||
| 1904 | # Store modern feature info in schema | |||||
| 1905 | 266 | 316 | $schema->{_attributes} = $attributes if keys %$attributes; | |||
| 1906 | 266 | 298 | $schema->{_modern_features}{postfix_dereferencing} = $postfix_derefs if keys %$postfix_derefs; | |||
| 1907 | 266 | 251 | $schema->{_fields} = $fields if keys %$fields; | |||
| 1908 | ||||||
| 1909 | # Store class info if this is a class method | |||||
| 1910 | 266 | 309 | if ($method->{class}) { | |||
| 1911 | 1 | 1 | $schema->{_class} = $method->{class}; | |||
| 1912 | } | |||||
| 1913 | ||||||
| 1914 | 266 | 374 | my $hints = $self->_extract_test_hints($method, $schema); | |||
| 1915 | 266 | 377 | $self->_extract_pod_examples($pod, $hints); | |||
| 1916 | ||||||
| 1917 | 266 | 254 | for my $k (qw(boundary_values invalid_inputs valid_inputs equivalence_classes)) { | |||
| 1918 | 1064 | 626 | my %seen; | |||
| 1919 | $hints->{$k} = [ | |||||
| 1920 | 42 | 80 | grep { !$seen{ defined $_ ? $_ : '__undef__' }++ } | |||
| 1921 | 1064 1064 | 611 1183 | @{ $hints->{$k} } | |||
| 1922 | ]; | |||||
| 1923 | } | |||||
| 1924 | ||||||
| 1925 | # -------------------------------------------------- | |||||
| 1926 | # YAML test hints: numeric boundaries | |||||
| 1927 | # -------------------------------------------------- | |||||
| 1928 | 266 | 324 | if ($self->_method_has_numeric_intent($schema)) { | |||
| 1929 | 137 | 309 | $schema->{_yamltest_hints} ||= {}; | |||
| 1930 | ||||||
| 1931 | # Do not override existing hints | |||||
| 1932 | 137 | 263 | $schema->{_yamltest_hints}{boundary_values} ||= []; | |||
| 1933 | ||||||
| 1934 | 0 | 0 | my %seen = map { (defined $_ ? $_ : '__undef__') => 1 } | |||
| 1935 | 137 137 | 108 189 | @{ $schema->{_yamltest_hints}{boundary_values} }; | |||
| 1936 | ||||||
| 1937 | 137 137 | 100 157 | foreach my $v (@{ $self->_numeric_boundary_values }) { | |||
| 1938 | 685 | 601 | push @{ $schema->{_yamltest_hints}{boundary_values} }, $v | |||
| 1939 | 685 | 833 | unless $seen{$v}++; | |||
| 1940 | ||||||
| 1941 | 685 | 496 | my $key = defined $v ? $v : '__undef__'; | |||
| 1942 | 685 0 | 731 0 | push @{ $schema->{_yamltest_hints}{boundary_values} }, $v unless $seen{$key}++; | |||
| 1943 | } | |||||
| 1944 | ||||||
| 1945 | 137 | 289 | $self->_log(' HINTS: Added numeric boundary values'); | |||
| 1946 | } | |||||
| 1947 | ||||||
| 1948 | 266 | 320 | if (keys %$hints) { | |||
| 1949 | 266 | 417 | $schema->{_yamltest_hints} ||= {}; | |||
| 1950 | 266 | 294 | foreach my $k (keys %$hints) { | |||
| 1951 | $schema->{_yamltest_hints}{$k} = $hints->{$k} | |||||
| 1952 | 1064 | 1122 | unless exists $schema->{_yamltest_hints}{$k}; | |||
| 1953 | } | |||||
| 1954 | } | |||||
| 1955 | ||||||
| 1956 | 266 | 460 | if(($level_rank{$overall} < $level_rank{$LEVEL_MEDIUM}) && | |||
| 1957 | ($level_rank{$overall} < ($self->{confidence_threshold} * 4))) { | |||||
| 1958 | 223 | 958 | $schema->{_low_confidence} = 1 | |||
| 1959 | } | |||||
| 1960 | ||||||
| 1961 | # ---------------------------------------- | |||||
| 1962 | # Non-invasive reasoning layer | |||||
| 1963 | # ---------------------------------------- | |||||
| 1964 | ||||||
| 1965 | my $method_model = App::Test::Generator::Model::Method->new( | |||||
| 1966 | name => $method->{name}, | |||||
| 1967 | source => $method->{body}, | |||||
| 1968 | 266 | 1009 | ); | |||
| 1969 | ||||||
| 1970 | 266 | 651 | my $return_analyzer = App::Test::Generator::Analyzer::Return->new(); | |||
| 1971 | 266 | 449 | $return_analyzer->analyze($method_model); | |||
| 1972 | ||||||
| 1973 | # Let model learn from finalized schema | |||||
| 1974 | 266 | 285 | if ($schema->{output}) { | |||
| 1975 | 266 | 400 | $method_model->absorb_legacy_output($schema->{output}); | |||
| 1976 | } | |||||
| 1977 | ||||||
| 1978 | 266 | 389 | $method_model->resolve_return_type(); | |||
| 1979 | 266 | 357 | $method_model->resolve_classification(); | |||
| 1980 | 266 | 355 | $method_model->resolve_confidence(); | |||
| 1981 | ||||||
| 1982 | # Attach only metadata | |||||
| 1983 | $schema->{_model} = { | |||||
| 1984 | 266 | 381 | classification => $method_model->classification, | |||
| 1985 | confidence => $method_model->confidence, | |||||
| 1986 | }; | |||||
| 1987 | ||||||
| 1988 | # ---------------------------------------- | |||||
| 1989 | # Return Meta Analysis (Non-invasive) | |||||
| 1990 | # ---------------------------------------- | |||||
| 1991 | ||||||
| 1992 | 266 | 633 | my $meta = App::Test::Generator::Analyzer::ReturnMeta->new(); | |||
| 1993 | 266 | 355 | my $analysis = $meta->analyze($schema); | |||
| 1994 | ||||||
| 1995 | 266 | 262 | $schema->{_analysis}{stability_score} = $analysis->{stability_score}; | |||
| 1996 | 266 | 294 | $schema->{_analysis}{consistency_score} = $analysis->{consistency_score}; | |||
| 1997 | 266 | 291 | $schema->{_analysis}{risk_flags} = $analysis->{risk_flags}; | |||
| 1998 | ||||||
| 1999 | # ---------------------------------------- | |||||
| 2000 | # Side Effect Analysis (Non-invasive) | |||||
| 2001 | # ---------------------------------------- | |||||
| 2002 | ||||||
| 2003 | 266 | 598 | my $se = App::Test::Generator::Analyzer::SideEffect->new(); | |||
| 2004 | ||||||
| 2005 | 266 | 330 | my $effects = $se->analyze($method); | |||
| 2006 | ||||||
| 2007 | 266 | 266 | $schema->{_analysis}{side_effects} = $effects; | |||
| 2008 | ||||||
| 2009 | # ---------------------------------------- | |||||
| 2010 | # Complexity Analysis (Non-invasive) | |||||
| 2011 | # ---------------------------------------- | |||||
| 2012 | ||||||
| 2013 | 266 | 544 | my $cx = App::Test::Generator::Analyzer::Complexity->new(); | |||
| 2014 | 266 | 341 | my $complexity = $cx->analyze($method); | |||
| 2015 | ||||||
| 2016 | 266 | 267 | $schema->{_analysis}{complexity} = $complexity; | |||
| 2017 | ||||||
| 2018 | 266 | 1573 | return $schema; | |||
| 2019 | } | |||||
| 2020 | ||||||
| 2021 | # -------------------------------------------------- | |||||
| 2022 | # _method_has_numeric_intent | |||||
| 2023 | # | |||||
| 2024 | # Purpose: Determine whether a method schema | |||||
| 2025 | # has numeric intent â either a numeric | |||||
| 2026 | # output type or at least one required | |||||
| 2027 | # numeric input parameter â to decide | |||||
| 2028 | # whether to add standard numeric | |||||
| 2029 | # boundary hint values. | |||||
| 2030 | # | |||||
| 2031 | # Entry: $schema - schema hashref as built by | |||||
| 2032 | # _analyze_method. | |||||
| 2033 | # | |||||
| 2034 | # Exit: Returns 1 if numeric intent is | |||||
| 2035 | # detected, 0 otherwise. | |||||
| 2036 | # | |||||
| 2037 | # Side effects: None. | |||||
| 2038 | # -------------------------------------------------- | |||||
| 2039 | sub _method_has_numeric_intent { | |||||
| 2040 | 270 | 285 | my ($self, $schema) = @_; | |||
| 2041 | ||||||
| 2042 | # Numeric output | |||||
| 2043 | 270 | 1061 | return 1 if ($schema->{output} && $schema->{output}{type} && $schema->{output}{type} =~ /^(number|integer)$/); | |||
| 2044 | ||||||
| 2045 | # Numeric inputs | |||||
| 2046 | 144 144 | 99 204 | foreach my $p (values %{ $schema->{input} || {} }) { | |||
| 2047 | 125 | 121 | next if $p->{optional}; | |||
| 2048 | 48 | 128 | return 1 if ($p->{type} && $p->{type} =~ /^(number|integer)$/); | |||
| 2049 | } | |||||
| 2050 | ||||||
| 2051 | 130 | 147 | return 0; | |||
| 2052 | } | |||||
| 2053 | ||||||
| 2054 | # -------------------------------------------------- | |||||
| 2055 | # _numeric_boundary_values | |||||
| 2056 | # | |||||
| 2057 | # Purpose: Return the standard set of numeric | |||||
| 2058 | # boundary values used as test hints | |||||
| 2059 | # for methods with numeric intent. | |||||
| 2060 | # | |||||
| 2061 | # Entry: None. | |||||
| 2062 | # | |||||
| 2063 | # Exit: Returns an arrayref of boundary | |||||
| 2064 | # values: [-1, 0, 1, 2, 100]. | |||||
| 2065 | # | |||||
| 2066 | # Side effects: None. | |||||
| 2067 | # -------------------------------------------------- | |||||
| 2068 | sub _numeric_boundary_values { | |||||
| 2069 | 138 | 275 | return [ -1, 0, 1, 2, 100 ]; | |||
| 2070 | } | |||||
| 2071 | ||||||
| 2072 | # -------------------------------------------------- | |||||
| 2073 | # _detect_accessor_methods | |||||
| 2074 | # | |||||
| 2075 | # Purpose: Detect whether a method is a getter, | |||||
| 2076 | # setter, or combined getter/setter | |||||
| 2077 | # accessor by analysing assignment and | |||||
| 2078 | # return patterns involving $self->{...}. | |||||
| 2079 | # | |||||
| 2080 | # Entry: $method - method hashref containing | |||||
| 2081 | # at minimum 'body' and | |||||
| 2082 | # optionally 'pod'. | |||||
| 2083 | # $schema - schema hashref (modified | |||||
| 2084 | # in place). | |||||
| 2085 | # | |||||
| 2086 | # Exit: Returns nothing. Modifies $schema in | |||||
| 2087 | # place, setting accessor, input, | |||||
| 2088 | # input_style, output, and _confidence | |||||
| 2089 | # keys as appropriate. | |||||
| 2090 | # | |||||
| 2091 | # Side effects: Croaks if a getter/setter has more | |||||
| 2092 | # than one argument, or if a setter | |||||
| 2093 | # returns non-self data. | |||||
| 2094 | # Logs detections to stdout when | |||||
| 2095 | # verbose is set. | |||||
| 2096 | # | |||||
| 2097 | # Notes: Four accessor patterns are detected | |||||
| 2098 | # in order: (1) combined getter/setter | |||||
| 2099 | # with shift, (2) combined getter/setter | |||||
| 2100 | # with validated input, (3) getter only, | |||||
| 2101 | # (4) setter that returns $self. Methods | |||||
| 2102 | # accessing multiple $self fields are | |||||
| 2103 | # skipped immediately. | |||||
| 2104 | # -------------------------------------------------- | |||||
| 2105 | sub _detect_accessor_methods { | |||||
| 2106 | 269 | 273 | my ($self, $method, $schema) = @_; | |||
| 2107 | ||||||
| 2108 | 269 | 239 | my $body = $method->{body}; | |||
| 2109 | ||||||
| 2110 | # Normalize whitespace for regex sanity | |||||
| 2111 | 269 | 219 | my $code = $body; | |||
| 2112 | 269 | 1263 | $code =~ s/\s+/ /g; | |||
| 2113 | ||||||
| 2114 | # If a method touches more than one $self->{...}, itâs not an accessor. | |||||
| 2115 | 269 | 210 | my %fields_seen; | |||
| 2116 | 269 | 443 | while ($code =~ /\$self\s*->\s*\{\s*['"]?([^}'"]+)['"]?\s*\}/g) { | |||
| 2117 | 67 | 112 | $fields_seen{$1}++; | |||
| 2118 | } | |||||
| 2119 | 269 | 281 | if (keys(%fields_seen) > 1) { | |||
| 2120 | 3 | 4 | $self->_log(" Skipping accessor detection: multiple fields accessed"); | |||
| 2121 | 3 | 5 | return; | |||
| 2122 | } | |||||
| 2123 | ||||||
| 2124 | # ------------------------------- | |||||
| 2125 | # Getter/Setter combo | |||||
| 2126 | # ------------------------------- | |||||
| 2127 | 266 | 1234 | if ( | |||
| 2128 | # Require get/set of the same property | |||||
| 2129 | $code =~ /\$self\s*->\s*\{\s*['"]?([^}'"]+)['"]?\s*\}\s*=\s*shift\s*;/ && | |||||
| 2130 | $code =~ /return\s+\$self\s*->\s*\{\s*['"]?\Q$1\E['"]?\s*\}\s*;/ && | |||||
| 2131 | $code =~ /if\s*\(\s*\@_/ | |||||
| 2132 | ) { | |||||
| 2133 | 0 | 0 | my $property = $1; | |||
| 2134 | ||||||
| 2135 | 0 | 0 | if(!defined($property)) { | |||
| 2136 | 0 | 0 | if($code =~ /\$self\s*->\s*\{\s*['"]?([^}'"]+)['"]?\s*\}\s*=\s*shift\s*;/) { | |||
| 2137 | 0 | 0 | $property = $1; | |||
| 2138 | } | |||||
| 2139 | } | |||||
| 2140 | ||||||
| 2141 | $schema->{accessor} = { | |||||
| 2142 | 0 | 0 | type => 'getset', | |||
| 2143 | property => $property, | |||||
| 2144 | }; | |||||
| 2145 | ||||||
| 2146 | 0 | 0 | $self->_log(" Detected getter/setter accessor for property: $property"); | |||
| 2147 | ||||||
| 2148 | 0 | 0 | $schema->{input} ||= { value => { type => 'string', optional => 1 } }; | |||
| 2149 | ||||||
| 2150 | 0 | 0 | $schema->{input_style} = 'hash'; | |||
| 2151 | ||||||
| 2152 | $schema->{_confidence}{input} = { | |||||
| 2153 | 0 | 0 | level => 'high', | |||
| 2154 | factors => ['Detected combined getter/setter accessor'], | |||||
| 2155 | }; | |||||
| 2156 | 0 | 0 | if (my $pod = $method->{pod}) { | |||
| 2157 | 0 | 0 | if ($pod =~ /\b(LWP::UserAgent(::\w+)*)\b/) { | |||
| 2158 | 0 | 0 | my $class = $1; | |||
| 2159 | $schema->{output} = { | |||||
| 2160 | 0 | 0 | type => 'object', | |||
| 2161 | isa => $class, | |||||
| 2162 | }; | |||||
| 2163 | 0 | 0 | $schema->{input}{$property} = { | |||
| 2164 | type => 'object', | |||||
| 2165 | isa => $class, | |||||
| 2166 | optional => 1, | |||||
| 2167 | }; | |||||
| 2168 | ||||||
| 2169 | $schema->{_confidence}{output} = { | |||||
| 2170 | 0 | 0 | level => 'high', | |||
| 2171 | factors => ['POD specifies UserAgent object'], | |||||
| 2172 | }; | |||||
| 2173 | } | |||||
| 2174 | } | |||||
| 2175 | } elsif($code =~ /if\s*\(\s*(?:\@_|[\$]\w+)/ && | |||||
| 2176 | $code =~ /\$self\s*->\s*\{\s*['"]?([^}'"]+)['"]?\s*\}\s*=\s*(?:shift|\@_|\$_\[\d+\]|\$\w+)\b/x && | |||||
| 2177 | $code =~ /return\b/ | |||||
| 2178 | ) { | |||||
| 2179 | # ------------------------------- | |||||
| 2180 | # Getter/Setter (validated input) | |||||
| 2181 | # ------------------------------- | |||||
| 2182 | 4 | 4 | my $property = $1; | |||
| 2183 | ||||||
| 2184 | 4 | 4 | if(!defined($property)) { | |||
| 2185 | 4 | 7 | if($code =~ /\$self\s*->\s*\{\s*['"]?([^}'"]+)['"]?\s*\}\s*=/) { | |||
| 2186 | 4 | 4 | $property = $1; | |||
| 2187 | } | |||||
| 2188 | } | |||||
| 2189 | 4 | 10 | if ($code =~ /validate_strict/) { | |||
| 2190 | 2 2 | 1 3 | push @{ $schema->{_confidence}{input}{factors} }, 'Setter uses Params::Validate::Strict'; | |||
| 2191 | } else { | |||||
| 2192 | # --------------------------------------- | |||||
| 2193 | # Detect object input via blessed($arg) | |||||
| 2194 | # --------------------------------------- | |||||
| 2195 | 2 | 2 | if ($code =~ /blessed\s*\(\s*\$(\w+)\s*\)/) { | |||
| 2196 | 0 | 0 | my $param = $1; | |||
| 2197 | ||||||
| 2198 | 0 | 0 | $self->_log(" Detected object input via blessed(\$$param)"); | |||
| 2199 | ||||||
| 2200 | $schema->{input} = { | |||||
| 2201 | 0 | 0 | $param => { | |||
| 2202 | type => 'object', | |||||
| 2203 | optional => 1, | |||||
| 2204 | } | |||||
| 2205 | }; | |||||
| 2206 | ||||||
| 2207 | $schema->{_confidence}{input} = { | |||||
| 2208 | 0 | 0 | level => 'high', | |||
| 2209 | factors => ['Input validated by Scalar::Util::blessed'], | |||||
| 2210 | }; | |||||
| 2211 | } else { | |||||
| 2212 | # fallback ONLY if nothing known | |||||
| 2213 | $schema->{input} ||= { | |||||
| 2214 | 2 | 2 | value => { type => 'string', optional => 1 }, | |||
| 2215 | }; | |||||
| 2216 | } | |||||
| 2217 | }; | |||||
| 2218 | $schema->{accessor} = { | |||||
| 2219 | 4 | 10 | type => 'getset', | |||
| 2220 | property => $property, | |||||
| 2221 | }; | |||||
| 2222 | ||||||
| 2223 | 4 | 6 | $self->_log(" Detected getter/setter accessor for property: $property"); | |||
| 2224 | 4 | 7 | if (my $pod = $method->{pod}) { | |||
| 2225 | 1 | 1 | if ($pod =~ /\b(LWP::UserAgent(::\w+)*)\b/) { | |||
| 2226 | 0 | 0 | my $class = $1; | |||
| 2227 | $schema->{output} = { | |||||
| 2228 | 0 | 0 | type => 'object', | |||
| 2229 | isa => $class, | |||||
| 2230 | }; | |||||
| 2231 | 0 | 0 | $schema->{input}{$property} = { | |||
| 2232 | type => 'object', | |||||
| 2233 | isa => $class, | |||||
| 2234 | optional => 1, | |||||
| 2235 | }; | |||||
| 2236 | ||||||
| 2237 | $schema->{_confidence}{output} = { | |||||
| 2238 | 0 | 0 | level => 'high', | |||
| 2239 | factors => ['POD specifies UserAgent object'], | |||||
| 2240 | }; | |||||
| 2241 | } | |||||
| 2242 | } | |||||
| 2243 | 4 | 6 | if(ref($schema->{input}) eq 'HASH') { | |||
| 2244 | 4 4 | 3 5 | if(scalar keys(%{$schema->{input}}) > 1) { | |||
| 2245 | 0 | 0 | croak(__PACKAGE__, ': A getset accessor function can have at most one argument'); | |||
| 2246 | } | |||||
| 2247 | } | |||||
| 2248 | 4 | 6 | $schema->{input}->{$property}->{position} = 0; | |||
| 2249 | } elsif ($code =~ /return\s+\$self\s*->\s*\{\s*['"]?([^}'"]+)['"]?\s*\}\s*;/) { | |||||
| 2250 | # ------------------------------- | |||||
| 2251 | # Getter | |||||
| 2252 | # ------------------------------- | |||||
| 2253 | 18 | 57 | my $property = $1; | |||
| 2254 | ||||||
| 2255 | # Don't flag mutators like | |||||
| 2256 | # sub foo { | |||||
| 2257 | # my $self = shift; | |||||
| 2258 | # $self->{bar} = shift; | |||||
| 2259 | # return $self->{bar}; | |||||
| 2260 | # } | |||||
| 2261 | # Only exclude if the property is being set FROM EXTERNAL INPUT | |||||
| 2262 | 18 | 1076 | if($code !~ /\$self\s*->\s*\{\s*['"]?\Q$property\E['"]?\s*\}\s*=\s*(?:shift|\$\w+\s*=\s*shift|\@_|\$_\[\d+\])/) { | |||
| 2263 | 17 | 35 | my @returns = $code =~ /return\b/g; | |||
| 2264 | 17 | 305 | my @self_returns = $code =~ /return\s+\$self\s*->\s*\{\s*['"]?\Q$property\E['"]?\s*\}/g; | |||
| 2265 | # it's a getter | |||||
| 2266 | 17 | 28 | if (scalar(@returns) == scalar(@self_returns)) { | |||
| 2267 | # all returns are returning $self->{$property}, so it's a getter | |||||
| 2268 | $schema->{accessor} = { | |||||
| 2269 | 15 | 33 | type => 'getter', | |||
| 2270 | property => $property, | |||||
| 2271 | }; | |||||
| 2272 | ||||||
| 2273 | 15 | 27 | $self->_log(" Detected getter accessor for property: $property"); | |||
| 2274 | ||||||
| 2275 | $schema->{_confidence}{output} = { | |||||
| 2276 | 15 | 79 | level => 'high', | |||
| 2277 | factors => ['Detected getter method'], | |||||
| 2278 | }; | |||||
| 2279 | 15 | 29 | delete $schema->{input}; | |||
| 2280 | } | |||||
| 2281 | } | |||||
| 2282 | } elsif ( | |||||
| 2283 | $code =~ /return\s+\$self\b/ && | |||||
| 2284 | $code =~ /\$self\s*->\s*\{\s*['"]?([^}'"]+)['"]?\s*\}\s*=\s*\$(\w+)\s*;/ | |||||
| 2285 | ) { | |||||
| 2286 | # ------------------------------- | |||||
| 2287 | # Setter | |||||
| 2288 | # ------------------------------- | |||||
| 2289 | 5 | 8 | my ($property, $param) = ($1, $2); | |||
| 2290 | ||||||
| 2291 | $schema->{accessor} = { | |||||
| 2292 | 5 | 12 | type => 'setter', | |||
| 2293 | property => $property, | |||||
| 2294 | param => $param, | |||||
| 2295 | }; | |||||
| 2296 | ||||||
| 2297 | 5 | 9 | $self->_log(" Detected setter accessor for property: $property"); | |||
| 2298 | ||||||
| 2299 | $schema->{input} = { | |||||
| 2300 | 5 | 11 | $param => { type => 'string' }, # safe default | |||
| 2301 | }; | |||||
| 2302 | 5 | 9 | $schema->{input_style} = 'hash'; | |||
| 2303 | ||||||
| 2304 | $schema->{_confidence}{input} = { | |||||
| 2305 | 5 | 9 | level => 'high', | |||
| 2306 | factors => ['Detected setter/accessor method'], | |||||
| 2307 | }; | |||||
| 2308 | 5 | 7 | if($schema->{output}{_returns_self}) { | |||
| 2309 | 4 | 6 | if($schema->{output}{type} ne 'object') { | |||
| 2310 | 0 | 0 | croak 'Setter can not return data other than $self'; | |||
| 2311 | } | |||||
| 2312 | 4 | 12 | if($schema->{output}{isa} ne $self->{_package_name}) { | |||
| 2313 | 0 | 0 | croak 'Setter can not return data other than $self'; | |||
| 2314 | } | |||||
| 2315 | 1 | 2 | } elsif(scalar(keys %{$schema->{output}}) != 0) { | |||
| 2316 | $self->_analysis_error( | |||||
| 2317 | method => $method->{name}, | |||||
| 2318 | 0 | 0 | message => "Setter cannot return data", | |||
| 2319 | ); | |||||
| 2320 | } | |||||
| 2321 | } | |||||
| 2322 | ||||||
| 2323 | 266 | 384 | if(exists($schema->{accessor})) { | |||
| 2324 | 24 | 114 | if($schema->{accessor}{type} && $schema->{accessor}{type} =~ /setter|getset/ && $schema->{input}) { | |||
| 2325 | 9 9 | 6 12 | for my $param (keys %{ $schema->{input} }) { | |||
| 2326 | 10 | 11 | my $in = $schema->{input}{$param}; | |||
| 2327 | ||||||
| 2328 | 10 | 18 | if ($in->{type} && ($in->{type} eq 'object')) { | |||
| 2329 | $schema->{output} = { | |||||
| 2330 | type => 'object', | |||||
| 2331 | 2 | 4 | ($in->{isa} ? (isa => $in->{isa}) : ()), | |||
| 2332 | }; | |||||
| 2333 | ||||||
| 2334 | $schema->{_confidence}{output} = { | |||||
| 2335 | 2 | 4 | level => 'high', | |||
| 2336 | factors => ['Output type propagated from setter input'], | |||||
| 2337 | }; | |||||
| 2338 | } | |||||
| 2339 | } | |||||
| 2340 | } | |||||
| 2341 | ||||||
| 2342 | 24 | 136 | if($schema->{accessor}{type} && $schema->{accessor}{property} && ($schema->{accessor}{type} =~ /getter|getset/) && | |||
| 2343 | ((!defined($schema->{output}{type})) || ($schema->{output}{type} eq 'string'))) { | |||||
| 2344 | 16 | 29 | if (my $pod = $method->{pod}) { | |||
| 2345 | # POD says "UserAgent object" | |||||
| 2346 | 8 | 27 | if ($pod =~ /\bUser[- ]?Agent\b.*\bobject\b/i) { | |||
| 2347 | 1 | 2 | $schema->{output}{type} = 'object'; | |||
| 2348 | 1 | 1 | $schema->{output}{isa} = 'LWP::UserAgent'; | |||
| 2349 | ||||||
| 2350 | 1 1 | 1 2 | push @{ $schema->{_confidence}{output}{factors} }, 'POD indicates UserAgent object'; | |||
| 2351 | ||||||
| 2352 | 1 | 2 | $schema->{_confidence}{output}{level} = 'high'; | |||
| 2353 | } | |||||
| 2354 | } | |||||
| 2355 | } | |||||
| 2356 | } | |||||
| 2357 | } | |||||
| 2358 | ||||||
| 2359 | # -------------------------------------------------- | |||||
| 2360 | # _analysis_error | |||||
| 2361 | # | |||||
| 2362 | # Purpose: Report a fatal analysis error with | |||||
| 2363 | # module, method, and file context, | |||||
| 2364 | # then croak. | |||||
| 2365 | # | |||||
| 2366 | # Entry: Named args: | |||||
| 2367 | # method - method name string. | |||||
| 2368 | # message - error description string. | |||||
| 2369 | # | |||||
| 2370 | # Exit: Does not return â always croaks. | |||||
| 2371 | # | |||||
| 2372 | # Side effects: None beyond the croak. | |||||
| 2373 | # -------------------------------------------------- | |||||
| 2374 | sub _analysis_error { | |||||
| 2375 | 0 | 0 | my ($self, %args) = @_; | |||
| 2376 | ||||||
| 2377 | 0 | 0 | my $method = $args{method} // 'UNKNOWN'; | |||
| 2378 | 0 | 0 | my $msg = $args{message} // 'Analysis error'; | |||
| 2379 | ||||||
| 2380 | 0 | 0 | my $module = $self->{_package_name} // 'UNKNOWN'; | |||
| 2381 | 0 | 0 | my $file = $self->{input_file} // 'UNKNOWN'; | |||
| 2382 | ||||||
| 2383 | 0 | 0 | croak join "\n", | |||
| 2384 | $msg, | |||||
| 2385 | " Module: $module", | |||||
| 2386 | " Method: $method", | |||||
| 2387 | " File: $file", | |||||
| 2388 | ''; | |||||
| 2389 | } | |||||
| 2390 | ||||||
| 2391 | # -------------------------------------------------- | |||||
| 2392 | # _extract_validator_schema | |||||
| 2393 | # | |||||
| 2394 | # Purpose: Try each supported validator extractor | |||||
| 2395 | # in priority order and return the first | |||||
| 2396 | # schema that yields a non-empty input | |||||
| 2397 | # spec. Used to detect explicit | |||||
| 2398 | # parameter validation declarations | |||||
| 2399 | # before falling back to heuristic | |||||
| 2400 | # code analysis. | |||||
| 2401 | # | |||||
| 2402 | # Entry: $code - method body source string. | |||||
| 2403 | # | |||||
| 2404 | # Exit: Returns a schema hashref on success, | |||||
| 2405 | # or undef if no supported validator | |||||
| 2406 | # call is detected. | |||||
| 2407 | # | |||||
| 2408 | # Side effects: None. | |||||
| 2409 | # | |||||
| 2410 | # Notes: Extractors tried in order: | |||||
| 2411 | # Params::Validate::Strict, | |||||
| 2412 | # Params::Validate, | |||||
| 2413 | # MooseX::Params::Validate, | |||||
| 2414 | # Type::Params. | |||||
| 2415 | # -------------------------------------------------- | |||||
| 2416 | sub _extract_validator_schema { | |||||
| 2417 | 271 | 284 | my ($self, $code) = @_; | |||
| 2418 | ||||||
| 2419 | 271 | 276 | for my $extractor ('_extract_pvs_schema', '_extract_pv_schema', '_extract_moosex_params_schema', '_extract_type_params_schema') { | |||
| 2420 | 1072 | 1440 | my $res = $self->$extractor($code); | |||
| 2421 | 1072 5 | 3150 12 | return $res if ($res && ref($res) eq 'HASH' && keys %{ $res->{input} || {} }); | |||
| 2422 | } | |||||
| 2423 | ||||||
| 2424 | 266 | 267 | return; | |||
| 2425 | } | |||||
| 2426 | ||||||
| 2427 | # -------------------------------------------------- | |||||
| 2428 | # _parse_schema_hash | |||||
| 2429 | # | |||||
| 2430 | # Purpose: Parse a PPI block node representing | |||||
| 2431 | # a validator schema hash literal and | |||||
| 2432 | # return a normalised schema structure | |||||
| 2433 | # suitable for use as input spec. | |||||
| 2434 | # | |||||
| 2435 | # Entry: $hash - a PPI node with a children() | |||||
| 2436 | # method, typically a | |||||
| 2437 | # PPI::Structure::Block from | |||||
| 2438 | # a validate_strict call. | |||||
| 2439 | # | |||||
| 2440 | # Exit: Returns a hashref with keys: | |||||
| 2441 | # input - hashref of param specs | |||||
| 2442 | # input_style - 'hash' | |||||
| 2443 | # _confidence - confidence hashref | |||||
| 2444 | # or undef if parsing fails. | |||||
| 2445 | # | |||||
| 2446 | # Side effects: None. | |||||
| 2447 | # -------------------------------------------------- | |||||
| 2448 | sub _parse_schema_hash { | |||||
| 2449 | 1 | 769 | my ($self, $hash) = @_; | |||
| 2450 | ||||||
| 2451 | 1 | 1 | my %result; | |||
| 2452 | ||||||
| 2453 | 1 | 3 | for my $child ($hash->children) { | |||
| 2454 | # skip whitespace and operators | |||||
| 2455 | 1 | 8 | if ($child->isa('PPI::Statement') || $child->isa('PPI::Statement::Expression')) { | |||
| 2456 | 0 | 0 | my ($key, $val); | |||
| 2457 | ||||||
| 2458 | my @tokens = grep { | |||||
| 2459 | 0 0 | 0 0 | !$_->isa('PPI::Token::Whitespace') && | |||
| 2460 | !$_->isa('PPI::Token::Operator') | |||||
| 2461 | } $child->children; | |||||
| 2462 | ||||||
| 2463 | 0 | 0 | for (my $i = 0; $i < @tokens - 1; $i++) { | |||
| 2464 | 0 | 0 | if(($tokens[$i]->isa('PPI::Token::Word') || $tokens[$i]->isa('PPI::Token::Quote')) && | |||
| 2465 | $tokens[$i+1]->isa('PPI::Structure::Constructor')) { | |||||
| 2466 | 0 | 0 | $key = $tokens[$i]->content; | |||
| 2467 | 0 | 0 | $key =~ s/^['"]|['"]$//g; | |||
| 2468 | 0 | 0 | $val = $tokens[$i+1]; | |||
| 2469 | 0 | 0 | last; | |||
| 2470 | } | |||||
| 2471 | } | |||||
| 2472 | ||||||
| 2473 | 0 | 0 | next unless $key && $val; | |||
| 2474 | ||||||
| 2475 | 0 | 0 | my %param; | |||
| 2476 | 0 | 0 | for my $inner ($val->children) { | |||
| 2477 | 0 | 0 | next unless $inner->isa('PPI::Statement') || $inner->isa('PPI::Statement::Expression'); | |||
| 2478 | ||||||
| 2479 | my ($k, undef, $v) = grep { | |||||
| 2480 | 0 0 | 0 0 | !$_->isa('PPI::Token::Whitespace') && | |||
| 2481 | !$_->isa('PPI::Token::Operator') | |||||
| 2482 | } $inner->children; | |||||
| 2483 | ||||||
| 2484 | 0 | 0 | next unless $k && $v; | |||
| 2485 | ||||||
| 2486 | 0 | 0 | my $keyname = $k->content; | |||
| 2487 | 0 | 0 | my $value = $v->can('content') ? $v->content : undef; | |||
| 2488 | 0 | 0 | $value =~ s/^['"]|['"]$//g if defined $value; | |||
| 2489 | ||||||
| 2490 | 0 | 0 | if ($keyname eq 'type') { | |||
| 2491 | 0 | 0 | $param{type} = lc($value); | |||
| 2492 | } elsif ($keyname eq 'optional') { | |||||
| 2493 | 0 | 0 | $param{optional} = $value ? 1 : 0; | |||
| 2494 | } elsif ($keyname =~ /^(min|max)$/ && looks_like_number($value)) { | |||||
| 2495 | 0 | 0 | $param{$keyname} = 0 + $value; | |||
| 2496 | } elsif ($keyname eq 'matches') { | |||||
| 2497 | 0 | 0 | $param{matches} = qr/$value/; | |||
| 2498 | } | |||||
| 2499 | } | |||||
| 2500 | ||||||
| 2501 | 0 | 0 | $param{type} //= 'string'; | |||
| 2502 | 0 | 0 | $param{optional} //= 0; | |||
| 2503 | ||||||
| 2504 | 0 | 0 | $result{$key} = \%param; | |||
| 2505 | } | |||||
| 2506 | } | |||||
| 2507 | ||||||
| 2508 | return { | |||||
| 2509 | 1 | 4 | input => \%result, | |||
| 2510 | input_style => 'hash', | |||||
| 2511 | _confidence => { | |||||
| 2512 | input => { | |||||
| 2513 | level => 'high', | |||||
| 2514 | factors => ['Input schema extracted from validator'], | |||||
| 2515 | }, | |||||
| 2516 | }, | |||||
| 2517 | }; | |||||
| 2518 | } | |||||
| 2519 | ||||||
| 2520 | # -------------------------------------------------- | |||||
| 2521 | # _ppi | |||||
| 2522 | # | |||||
| 2523 | # Purpose: Return a PPI::Document for a code | |||||
| 2524 | # string, using a per-instance cache | |||||
| 2525 | # to avoid re-parsing the same string | |||||
| 2526 | # multiple times during a single | |||||
| 2527 | # analysis pass. | |||||
| 2528 | # | |||||
| 2529 | # Entry: $code - either a string of Perl source | |||||
| 2530 | # code, or an object that | |||||
| 2531 | # already has a find() method | |||||
| 2532 | # (returned as-is). | |||||
| 2533 | # | |||||
| 2534 | # Exit: Returns a PPI::Document, or the | |||||
| 2535 | # original object if it already | |||||
| 2536 | # supports find(). | |||||
| 2537 | # | |||||
| 2538 | # Side effects: Populates $self->{_ppi_cache}. | |||||
| 2539 | # -------------------------------------------------- | |||||
| 2540 | sub _ppi { | |||||
| 2541 | 24 | 20 | my ($self, $code) = @_; | |||
| 2542 | ||||||
| 2543 | 24 | 33 | return $code if ref($code) && $code->can('find'); | |||
| 2544 | ||||||
| 2545 | 24 | 52 | $self->{_ppi_cache} ||= {}; | |||
| 2546 | 24 | 84 | return $self->{_ppi_cache}{$code} //= PPI::Document->new(\$code); | |||
| 2547 | } | |||||
| 2548 | ||||||
| 2549 | # -------------------------------------------------- | |||||
| 2550 | # _extract_pvs_schema | |||||
| 2551 | # | |||||
| 2552 | # Purpose: Detect and extract a parameter schema | |||||
| 2553 | # from a Params::Validate::Strict | |||||
| 2554 | # validate_strict() call in the method | |||||
| 2555 | # body. | |||||
| 2556 | # | |||||
| 2557 | # Entry: $code - method body source string. | |||||
| 2558 | # | |||||
| 2559 | # Exit: Returns a schema hashref with input, | |||||
| 2560 | # style, and source keys on success, | |||||
| 2561 | # or undef if no validate_strict call | |||||
| 2562 | # is found or parsing fails. | |||||
| 2563 | # | |||||
| 2564 | # Side effects: None. | |||||
| 2565 | # -------------------------------------------------- | |||||
| 2566 | sub _extract_pvs_schema { | |||||
| 2567 | 279 | 292 | my ($self, $code) = @_; | |||
| 2568 | ||||||
| 2569 | 279 | 443 | return unless $code =~ /\bvalidate_strict\s*\(/; | |||
| 2570 | ||||||
| 2571 | 9 | 12 | my $doc = $self->_ppi($code) or return; | |||
| 2572 | ||||||
| 2573 | my $calls = $doc->find(sub { | |||||
| 2574 | 954 | 4410 | $_[1]->isa('PPI::Token::Word') && ($_[1]->content eq 'validate_strict' || $_[1]->content eq 'Params::Validate::Strict::validate_strict') | |||
| 2575 | 9 | 44362 | }) or return; | |||
| 2576 | ||||||
| 2577 | 9 | 59 | for my $call (@$calls) { | |||
| 2578 | 9 | 17 | my $list = $call->parent(); | |||
| 2579 | 9 | 164 | while ($list && !$list->isa('PPI::Structure::List')) { | |||
| 2580 | 40 | 215 | $list = $list->parent(); | |||
| 2581 | } | |||||
| 2582 | 9 | 20 | if(!defined($list)) { | |||
| 2583 | 9 | 16 | my $next = $call->next_sibling(); | |||
| 2584 | 9 | 196 | next unless defined $next; | |||
| 2585 | 9 | 12 | if($next->content() =~ /schema\s*=>\s*(\{(?:[^{}]|\{(?:[^{}]|\{[^{}]*\})*\})*\})/s) { | |||
| 2586 | 7 | 915 | my $schema_text = $1; | |||
| 2587 | 7 | 19 | my $compartment = Safe->new(); | |||
| 2588 | 7 | 2880 | $compartment->permit_only(qw(:base_core :base_mem :base_orig)); | |||
| 2589 | ||||||
| 2590 | 7 | 26 | my $schema_str = "my \$schema = $schema_text"; | |||
| 2591 | 7 | 12 | my $schema = $compartment->reval($schema_str); | |||
| 2592 | 7 7 | 2044 12 | if(scalar keys %{$schema}) { | |||
| 2593 | return { | |||||
| 2594 | 7 | 20 | input => $schema, | |||
| 2595 | style => 'hash', | |||||
| 2596 | source => 'validator' | |||||
| 2597 | } | |||||
| 2598 | } | |||||
| 2599 | } | |||||
| 2600 | } | |||||
| 2601 | 2 | 100 | next unless $list; | |||
| 2602 | ||||||
| 2603 | 0 0 | 0 0 | my ($schema_block) = grep { $_->isa('PPI::Structure::Block') } $list->children; | |||
| 2604 | ||||||
| 2605 | 0 | 0 | next unless $schema_block; | |||
| 2606 | ||||||
| 2607 | 0 | 0 | my $schema = $self->_extract_schema_hash_from_block($schema_block); | |||
| 2608 | 0 | 0 | return $self->_normalize_validator_schema($schema) if $schema; | |||
| 2609 | } | |||||
| 2610 | ||||||
| 2611 | 2 | 3 | return; | |||
| 2612 | } | |||||
| 2613 | ||||||
| 2614 | # -------------------------------------------------- | |||||
| 2615 | # _extract_pv_schema | |||||
| 2616 | # | |||||
| 2617 | # Purpose: Detect and extract a parameter schema | |||||
| 2618 | # from a Params::Validate validate() | |||||
| 2619 | # call in the method body. | |||||
| 2620 | # | |||||
| 2621 | # Entry: $code - method body source string. | |||||
| 2622 | # | |||||
| 2623 | # Exit: Returns a schema hashref with input, | |||||
| 2624 | # style, and source keys on success, | |||||
| 2625 | # or undef if no validate() call is | |||||
| 2626 | # found or parsing fails. | |||||
| 2627 | # | |||||
| 2628 | # Side effects: None. | |||||
| 2629 | # -------------------------------------------------- | |||||
| 2630 | sub _extract_pv_schema { | |||||
| 2631 | 277 | 275 | my ($self, $code) = @_; | |||
| 2632 | ||||||
| 2633 | 277 | 381 | return unless $code =~ /\bvalidate\s*\(/; | |||
| 2634 | ||||||
| 2635 | 7 | 9 | my $doc = $self->_ppi($code) or return; | |||
| 2636 | ||||||
| 2637 | my $calls = $doc->find(sub { | |||||
| 2638 | 426 | 2040 | $_[1]->isa('PPI::Token::Word') && ($_[1]->content eq 'validate' || $_[1]->content eq 'Params::Validate::validate') | |||
| 2639 | 7 | 18907 | }) or return; | |||
| 2640 | ||||||
| 2641 | 7 | 47 | for my $call (@$calls) { | |||
| 2642 | 7 | 12 | my $list = $call->parent; | |||
| 2643 | 7 | 118 | while ($list && !$list->isa('PPI::Structure::List')) { | |||
| 2644 | 26 | 130 | $list = $list->parent; | |||
| 2645 | } | |||||
| 2646 | 7 | 20 | if(!defined($list)) { | |||
| 2647 | 7 | 15 | my $next = $call->next_sibling(); | |||
| 2648 | 7 | 169 | my ($arglist, $schema_text) = $self->_parse_pv_call($next); | |||
| 2649 | ||||||
| 2650 | 7 | 9 | if($schema_text) { | |||
| 2651 | 7 | 15 | my $compartment = Safe->new(); | |||
| 2652 | 7 | 2807 | $compartment->permit_only(qw(:base_core :base_mem :base_orig)); | |||
| 2653 | ||||||
| 2654 | 7 | 25 | my $schema_str = "my \$schema = $schema_text"; | |||
| 2655 | 7 | 10 | my $schema = $compartment->reval($schema_str); | |||
| 2656 | ||||||
| 2657 | 7 7 | 2186 9 | if(scalar keys %{$schema}) { | |||
| 2658 | 6 6 | 7 6 | foreach my $arg(keys %{$schema}) { | |||
| 2659 | 9 | 8 | my $field = $schema->{$arg}; | |||
| 2660 | 9 | 10 | if(my $type = $field->{'type'}) { | |||
| 2661 | 9 | 10 | if($type eq 'ARRAYREF') { | |||
| 2662 | 0 | 0 | $field->{'type'} = 'arrayref'; | |||
| 2663 | } elsif($type eq 'SCALAR') { | |||||
| 2664 | 9 | 8 | $field->{'type'} = 'string'; | |||
| 2665 | } | |||||
| 2666 | } | |||||
| 2667 | 9 | 8 | delete $field->{'callbacks'}; | |||
| 2668 | } | |||||
| 2669 | ||||||
| 2670 | return { | |||||
| 2671 | 6 | 13 | input => $schema, | |||
| 2672 | style => 'hash', | |||||
| 2673 | source => 'validator' | |||||
| 2674 | } | |||||
| 2675 | } | |||||
| 2676 | } | |||||
| 2677 | } | |||||
| 2678 | 1 | 43 | next unless $list; | |||
| 2679 | ||||||
| 2680 | 0 0 | 0 0 | my ($schema_block) = grep { $_->isa('PPI::Structure::Block') } $list->children; | |||
| 2681 | ||||||
| 2682 | 0 | 0 | next unless $schema_block; | |||
| 2683 | ||||||
| 2684 | 0 | 0 | my $schema = $self->_extract_schema_hash_from_block($schema_block); | |||
| 2685 | 0 | 0 | return $self->_normalize_validator_schema($schema) if $schema; | |||
| 2686 | } | |||||
| 2687 | ||||||
| 2688 | 1 | 1 | return; | |||
| 2689 | } | |||||
| 2690 | ||||||
| 2691 | # -------------------------------------------------- | |||||
| 2692 | # _parse_pv_call | |||||
| 2693 | # | |||||
| 2694 | # Purpose: Split a Params::Validate call argument | |||||
| 2695 | # string into its two components: the | |||||
| 2696 | # first argument (typically \@_) and | |||||
| 2697 | # the schema hash string. | |||||
| 2698 | # | |||||
| 2699 | # Entry: $string - the raw argument string | |||||
| 2700 | # from the validate() call, | |||||
| 2701 | # including outer parentheses. | |||||
| 2702 | # | |||||
| 2703 | # Exit: Returns a two-element list: | |||||
| 2704 | # ($first_arg, $hash_str) | |||||
| 2705 | # or an empty list if no comma is found | |||||
| 2706 | # at brace depth zero (malformed call). | |||||
| 2707 | # | |||||
| 2708 | # Side effects: None. | |||||
| 2709 | # -------------------------------------------------- | |||||
| 2710 | sub _parse_pv_call { | |||||
| 2711 | 19 | 21 | my ($self, $string) = @_; | |||
| 2712 | ||||||
| 2713 | # Remove outer parentheses and whitespace | |||||
| 2714 | 19 | 28 | $string =~ s/^\s*\(\s*//; | |||
| 2715 | 19 | 1869 | $string =~ s/\s*\)\s*$//; | |||
| 2716 | ||||||
| 2717 | # Find the first comma at brace-depth 0 | |||||
| 2718 | 19 | 15 | my $depth = 0; | |||
| 2719 | 19 | 15 | my $comma_pos; | |||
| 2720 | ||||||
| 2721 | 19 | 23 | for my $i (0 .. length($string) - 1) { | |||
| 2722 | 114 | 78 | my $char = substr($string, $i, 1); | |||
| 2723 | ||||||
| 2724 | 114 | 147 | if ($char eq '{') { | |||
| 2725 | 2 | 1 | $depth++; | |||
| 2726 | } elsif ($char eq '}') { | |||||
| 2727 | 2 | 1 | $depth--; | |||
| 2728 | 2 | 3 | return if $depth < 0; # Broken source code | |||
| 2729 | } elsif ($char eq ',' && $depth == 0) { | |||||
| 2730 | 18 | 11 | $comma_pos = $i; | |||
| 2731 | 18 | 15 | last; | |||
| 2732 | } | |||||
| 2733 | } | |||||
| 2734 | ||||||
| 2735 | 19 | 19 | return unless defined $comma_pos; | |||
| 2736 | ||||||
| 2737 | 18 | 15 | my $first_arg = substr($string, 0, $comma_pos); | |||
| 2738 | 18 | 16 | my $hash_str = substr($string, $comma_pos + 1); | |||
| 2739 | ||||||
| 2740 | # Trim whitespace | |||||
| 2741 | 18 | 32 | $first_arg =~ s/^\s+|\s+$//g; | |||
| 2742 | 18 | 115 | $hash_str =~ s/^\s+|\s+$//g; | |||
| 2743 | ||||||
| 2744 | 18 | 31 | return ($first_arg, $hash_str); | |||
| 2745 | } | |||||
| 2746 | ||||||
| 2747 | # -------------------------------------------------- | |||||
| 2748 | # _extract_moosex_params_schema | |||||
| 2749 | # | |||||
| 2750 | # Purpose: Detect and extract a parameter schema | |||||
| 2751 | # from a MooseX::Params::Validate | |||||
| 2752 | # validated_hash() call in the method | |||||
| 2753 | # body. | |||||
| 2754 | # | |||||
| 2755 | # Entry: $code - method body source string. | |||||
| 2756 | # | |||||
| 2757 | # Exit: Returns a schema hashref with input, | |||||
| 2758 | # style, and source keys on success, | |||||
| 2759 | # or undef if no validated_hash() call | |||||
| 2760 | # is found or parsing fails. | |||||
| 2761 | # | |||||
| 2762 | # Side effects: None. | |||||
| 2763 | # -------------------------------------------------- | |||||
| 2764 | sub _extract_moosex_params_schema | |||||
| 2765 | { | |||||
| 2766 | 277 | 281 | my ($self, $code) = @_; | |||
| 2767 | ||||||
| 2768 | 277 | 374 | return unless $code =~ /\bvalidated_hash\s*\(/; | |||
| 2769 | ||||||
| 2770 | 8 | 9 | my $doc = $self->_ppi($code) or return; | |||
| 2771 | ||||||
| 2772 | my $calls = $doc->find(sub { | |||||
| 2773 | 530 | 2466 | $_[1]->isa('PPI::Token::Word') && ($_[1]->content eq 'validated_hash') | |||
| 2774 | 8 | 24025 | }) or return; | |||
| 2775 | ||||||
| 2776 | 8 | 52 | for my $call (@$calls) { | |||
| 2777 | 8 | 16 | my $list = $call->parent(); | |||
| 2778 | 8 | 205 | while ($list && !$list->isa('PPI::Structure::List')) { | |||
| 2779 | 32 | 211 | $list = $list->parent; | |||
| 2780 | } | |||||
| 2781 | 8 | 17 | if(!defined($list)) { | |||
| 2782 | 8 | 12 | my $next = $call->next_sibling(); | |||
| 2783 | 8 | 257 | my ($arglist, $schema_text) = $self->_parse_pv_call($next); | |||
| 2784 | ||||||
| 2785 | 8 | 9 | if($schema_text) { | |||
| 2786 | 8 | 16 | my $compartment = Safe->new(); | |||
| 2787 | 8 | 3116 | $compartment->permit_only(qw(:base_core :base_mem :base_orig)); | |||
| 2788 | ||||||
| 2789 | 8 | 26 | my $schema_str = "my \$schema = { $schema_text }"; | |||
| 2790 | 8 | 14 | $schema_str =~ s/ArrayRef\[(.+?)\]/arrayref, element_type => $1/g; | |||
| 2791 | 8 | 9 | my $schema = $compartment->reval($schema_str); | |||
| 2792 | ||||||
| 2793 | 8 8 | 2205 10 | if(scalar keys %{$schema}) { | |||
| 2794 | 8 8 | 5 11 | foreach my $arg(keys %{$schema}) { | |||
| 2795 | 11 | 10 | my $field = $schema->{$arg}; | |||
| 2796 | 11 | 10 | if(my $isa = delete $field->{'isa'}) { | |||
| 2797 | 11 | 11 | $field->{'type'} = $isa; | |||
| 2798 | } | |||||
| 2799 | 11 | 12 | if(exists($field->{'required'})) { | |||
| 2800 | 10 | 7 | my $required = delete $field->{'required'}; | |||
| 2801 | 10 | 11 | $field->{'optional'} = $required ? 0 : 1; | |||
| 2802 | } else { | |||||
| 2803 | 1 | 2 | $field->{'optional'} = 1; | |||
| 2804 | } | |||||
| 2805 | 11 | 12 | if(ref($field->{'default'}) eq 'CODE') { | |||
| 2806 | 1 | 13 | delete $field->{'default'}; # TODO | |||
| 2807 | } | |||||
| 2808 | } | |||||
| 2809 | ||||||
| 2810 | 8 8 | 8 6 | foreach my $arg(keys %{$schema}) { | |||
| 2811 | 11 | 6 | my $field = $schema->{$arg}; | |||
| 2812 | 11 | 11 | if(my $type = $field->{'type'}) { | |||
| 2813 | 11 | 12 | if($type eq 'ARRAYREF') { | |||
| 2814 | 0 | 0 | $field->{'type'} = 'arrayref'; | |||
| 2815 | } elsif($type eq 'SCALAR') { | |||||
| 2816 | 0 | 0 | $field->{'type'} = 'string'; | |||
| 2817 | } | |||||
| 2818 | } | |||||
| 2819 | 11 | 9 | delete $field->{'callbacks'}; | |||
| 2820 | } | |||||
| 2821 | ||||||
| 2822 | return { | |||||
| 2823 | 8 | 20 | input => $schema, | |||
| 2824 | style => 'hash', | |||||
| 2825 | source => 'validator' | |||||
| 2826 | } | |||||
| 2827 | } | |||||
| 2828 | } | |||||
| 2829 | } | |||||
| 2830 | 0 | 0 | next unless $list; | |||
| 2831 | ||||||
| 2832 | 0 0 | 0 0 | my ($schema_block) = grep { $_->isa('PPI::Structure::Block') } $list->children; | |||
| 2833 | ||||||
| 2834 | 0 | 0 | next unless $schema_block; | |||
| 2835 | ||||||
| 2836 | 0 | 0 | my $schema = $self->_extract_schema_hash_from_block($schema_block); | |||
| 2837 | 0 | 0 | return $self->_normalize_validator_schema($schema) if $schema; | |||
| 2838 | } | |||||
| 2839 | ||||||
| 2840 | 0 | 0 | return; | |||
| 2841 | } | |||||
| 2842 | ||||||
| 2843 | # -------------------------------------------------- | |||||
| 2844 | # _extract_schema_hash_from_block | |||||
| 2845 | # | |||||
| 2846 | # Purpose: Extract a parameter schema hashref from | |||||
| 2847 | # a PPI::Structure::Block node representing | |||||
| 2848 | # the schema argument to a validator call | |||||
| 2849 | # such as validate_strict({ ... }). | |||||
| 2850 | # | |||||
| 2851 | # Entry: $block - a PPI::Structure::Block node. | |||||
| 2852 | # | |||||
| 2853 | # Exit: Returns a hashref of parameter name to | |||||
| 2854 | # spec hashref, or undef if parsing fails. | |||||
| 2855 | # | |||||
| 2856 | # Side effects: None. | |||||
| 2857 | # | |||||
| 2858 | # Notes: Delegates to _parse_schema_hash which | |||||
| 2859 | # expects a PPI node with a children() | |||||
| 2860 | # method. This method exists to provide | |||||
| 2861 | # a clear semantic name at the call site. | |||||
| 2862 | # -------------------------------------------------- | |||||
| 2863 | sub _extract_schema_hash_from_block { | |||||
| 2864 | 2 | 6 | my ($self, $block) = @_; | |||
| 2865 | ||||||
| 2866 | 2 | 8 | return unless $block && $block->can('children'); | |||
| 2867 | ||||||
| 2868 | 0 | 0 | my $result = $self->_parse_schema_hash($block); | |||
| 2869 | ||||||
| 2870 | 0 | 0 | return unless $result && ref($result) eq 'HASH' && $result->{input}; | |||
| 2871 | ||||||
| 2872 | 0 | 0 | return $result->{input}; | |||
| 2873 | } | |||||
| 2874 | ||||||
| 2875 | # -------------------------------------------------- | |||||
| 2876 | # _normalize_validator_schema | |||||
| 2877 | # | |||||
| 2878 | # Purpose: Normalise a raw validator schema | |||||
| 2879 | # hashref (as extracted from PPI) into | |||||
| 2880 | # the standard input spec format used | |||||
| 2881 | # throughout the extractor. | |||||
| 2882 | # | |||||
| 2883 | # Entry: $schema - hashref of parameter name | |||||
| 2884 | # to raw spec hashref, as | |||||
| 2885 | # produced by | |||||
| 2886 | # _extract_schema_hash_from_block. | |||||
| 2887 | # | |||||
| 2888 | # Exit: Returns a hashref with keys: | |||||
| 2889 | # input_style - 'hash' | |||||
| 2890 | # input - normalised param specs | |||||
| 2891 | # Each param spec gains an explicit | |||||
| 2892 | # optional key and _source / _type_confidence | |||||
| 2893 | # metadata. | |||||
| 2894 | # | |||||
| 2895 | # Side effects: None. | |||||
| 2896 | # -------------------------------------------------- | |||||
| 2897 | sub _normalize_validator_schema { | |||||
| 2898 | 5 | 18 | my ($self, $schema) = @_; | |||
| 2899 | ||||||
| 2900 | 5 | 5 | my %input; | |||
| 2901 | ||||||
| 2902 | 5 | 6 | for my $name (keys %$schema) { | |||
| 2903 | 8 | 8 | my $spec = $schema->{$name}; | |||
| 2904 | ||||||
| 2905 | $input{$name} = { | |||||
| 2906 | %$spec, | |||||
| 2907 | 8 | 17 | optional => exists $spec->{optional} ? $spec->{optional} : 0, | |||
| 2908 | _source => 'validator', | |||||
| 2909 | _type_confidence => 'high', | |||||
| 2910 | }; | |||||
| 2911 | } | |||||
| 2912 | ||||||
| 2913 | return { | |||||
| 2914 | 5 | 9 | input_style => 'hash', | |||
| 2915 | input => \%input, | |||||
| 2916 | }; | |||||
| 2917 | } | |||||
| 2918 | ||||||
| 2919 | # -------------------------------------------------- | |||||
| 2920 | # _extract_type_params_schema | |||||
| 2921 | # | |||||
| 2922 | # Purpose: Detect and extract a parameter schema | |||||
| 2923 | # from a Type::Params signature_for() | |||||
| 2924 | # declaration for the current method, | |||||
| 2925 | # located in the module-level document. | |||||
| 2926 | # | |||||
| 2927 | # Entry: $code - method body source string | |||||
| 2928 | # (used to extract the function | |||||
| 2929 | # name for lookup). | |||||
| 2930 | # | |||||
| 2931 | # Exit: Returns a schema hashref on success, | |||||
| 2932 | # or undef if no signature_for | |||||
| 2933 | # declaration is found or compilation | |||||
| 2934 | # fails. | |||||
| 2935 | # | |||||
| 2936 | # Side effects: May fork a child process to compile | |||||
| 2937 | # the signature in isolation. | |||||
| 2938 | # -------------------------------------------------- | |||||
| 2939 | sub _extract_type_params_schema { | |||||
| 2940 | 266 | 207 | my ($self, $code) = @_; | |||
| 2941 | ||||||
| 2942 | 266 | 371 | my $function = $self->_extract_function_name($code) or return; | |||
| 2943 | ||||||
| 2944 | 265 | 592 | my $doc = $self->{_document} or return; | |||
| 2945 | 263 | 331 | my $stmt = $self->_find_signature_statement($doc, $function) or return; | |||
| 2946 | ||||||
| 2947 | 0 | 0 | my $signature_expr = $self->_extract_signature_expression($stmt, $function) or return; | |||
| 2948 | ||||||
| 2949 | 0 | 0 | my $meta = $self->_compile_signature_isolated($function, $signature_expr) or return; | |||
| 2950 | ||||||
| 2951 | 0 | 0 | return $self->_build_schema_from_meta($meta); | |||
| 2952 | } | |||||
| 2953 | ||||||
| 2954 | # -------------------------------------------------- | |||||
| 2955 | # _extract_function_name | |||||
| 2956 | # | |||||
| 2957 | # Purpose: Extract the subroutine name from the | |||||
| 2958 | # start of a method body string, used | |||||
| 2959 | # to look up its Type::Params signature. | |||||
| 2960 | # | |||||
| 2961 | # Entry: $code - method body source string. | |||||
| 2962 | # | |||||
| 2963 | # Exit: Returns the subroutine name string, | |||||
| 2964 | # or undef if no 'sub name' declaration | |||||
| 2965 | # is found. | |||||
| 2966 | # | |||||
| 2967 | # Side effects: None. | |||||
| 2968 | # -------------------------------------------------- | |||||
| 2969 | sub _extract_function_name { | |||||
| 2970 | 266 | 273 | my ($self, $code) = @_; | |||
| 2971 | 266 | 923 | return $1 if $code =~ /^\s*sub\s+([a-zA-Z0-9_]+)/; | |||
| 2972 | 1 | 2 | return; | |||
| 2973 | } | |||||
| 2974 | ||||||
| 2975 | # -------------------------------------------------- | |||||
| 2976 | # _find_signature_statement | |||||
| 2977 | # | |||||
| 2978 | # Purpose: Search a PPI document for a | |||||
| 2979 | # signature_for statement that | |||||
| 2980 | # corresponds to a named function. | |||||
| 2981 | # | |||||
| 2982 | # Entry: $doc - PPI::Document to search. | |||||
| 2983 | # $function - function name string. | |||||
| 2984 | # | |||||
| 2985 | # Exit: Returns the matching PPI::Statement | |||||
| 2986 | # node, or undef if none is found. | |||||
| 2987 | # | |||||
| 2988 | # Side effects: None. | |||||
| 2989 | # -------------------------------------------------- | |||||
| 2990 | sub _find_signature_statement { | |||||
| 2991 | 265 | 4253 | my ($self, $doc, $function) = @_; | |||
| 2992 | ||||||
| 2993 | my $statements = $doc->find( | |||||
| 2994 | sub { | |||||
| 2995 | 211384 | 1327628 | $_[1]->isa('PPI::Statement') && $_[1]->content =~ /^\s*signature_for\b/ | |||
| 2996 | } | |||||
| 2997 | 265 | 730 | ) or return; | |||
| 2998 | ||||||
| 2999 | 1 | 6 | foreach my $stmt (@$statements) { | |||
| 3000 | 1 | 2 | my $content = $stmt->content; | |||
| 3001 | 1 | 58 | if ($content =~ /^\s*signature_for\s+\Q$function\E\b/) { | |||
| 3002 | 1 | 3 | return $stmt; | |||
| 3003 | } | |||||
| 3004 | } | |||||
| 3005 | ||||||
| 3006 | 0 | 0 | return; | |||
| 3007 | } | |||||
| 3008 | ||||||
| 3009 | # -------------------------------------------------- | |||||
| 3010 | # _extract_signature_expression | |||||
| 3011 | # | |||||
| 3012 | # Purpose: Extract the Type::Params signature | |||||
| 3013 | # expression (everything after =>) from | |||||
| 3014 | # a signature_for statement node. | |||||
| 3015 | # | |||||
| 3016 | # Entry: $stmt - PPI::Statement node. | |||||
| 3017 | # $function - function name string, | |||||
| 3018 | # used in the match pattern. | |||||
| 3019 | # | |||||
| 3020 | # Exit: Returns the signature expression | |||||
| 3021 | # string, or undef if the pattern | |||||
| 3022 | # does not match. | |||||
| 3023 | # | |||||
| 3024 | # Side effects: None. | |||||
| 3025 | # -------------------------------------------------- | |||||
| 3026 | sub _extract_signature_expression { | |||||
| 3027 | 2 | 2166 | my ($self, $stmt, $function) = @_; | |||
| 3028 | ||||||
| 3029 | 2 | 4 | my $content = $stmt->content; | |||
| 3030 | ||||||
| 3031 | 2 | 80 | if ($content =~ /^\s*signature_for\s+\Q$function\E\s*=>\s*(.+?);?\s*$/s) { | |||
| 3032 | 1 | 3 | return $1; | |||
| 3033 | } | |||||
| 3034 | ||||||
| 3035 | 1 | 2 | return; | |||
| 3036 | } | |||||
| 3037 | ||||||
| 3038 | # -------------------------------------------------- | |||||
| 3039 | # _compile_signature_isolated | |||||
| 3040 | # | |||||
| 3041 | # Purpose: Compile and evaluate a Type::Params | |||||
| 3042 | # signature expression in an isolated | |||||
| 3043 | # environment to extract parameter | |||||
| 3044 | # metadata without polluting the | |||||
| 3045 | # current process. | |||||
| 3046 | # | |||||
| 3047 | # Entry: $function - function name string. | |||||
| 3048 | # $signature_expr - Type::Params | |||||
| 3049 | # signature expression | |||||
| 3050 | # string. | |||||
| 3051 | # | |||||
| 3052 | # Exit: Returns a decoded JSON hashref | |||||
| 3053 | # containing parameters and returns | |||||
| 3054 | # metadata on success. | |||||
| 3055 | # Croaks on unsafe expressions, timeout, | |||||
| 3056 | # or compile errors. | |||||
| 3057 | # | |||||
| 3058 | # Side effects: May fork a child process with a | |||||
| 3059 | # memory limit applied via | |||||
| 3060 | # BSD::Resource if available. | |||||
| 3061 | # Memory limiting is best-effort and | |||||
| 3062 | # silently skipped on platforms where | |||||
| 3063 | # BSD::Resource is unavailable. | |||||
| 3064 | # -------------------------------------------------- | |||||
| 3065 | sub _compile_signature_isolated { | |||||
| 3066 | 0 | 0 | my ($self, $function, $signature_expr) = @_; | |||
| 3067 | ||||||
| 3068 | # Remove comments | |||||
| 3069 | 0 | 0 | $signature_expr =~ s/#.*$//mg; | |||
| 3070 | ||||||
| 3071 | # Reject obviously dangerous constructs | |||||
| 3072 | 0 | 0 | if ($signature_expr =~ /\b(?:system|exec|open|fork|require|do|eval|qx)\b/) { | |||
| 3073 | 0 | 0 | die 'Unsafe signature expression'; | |||
| 3074 | } | |||||
| 3075 | ||||||
| 3076 | 0 | 0 | if ($signature_expr =~ /[`{};]/) { | |||
| 3077 | 0 | 0 | die "Unsafe signature expression"; | |||
| 3078 | } | |||||
| 3079 | ||||||
| 3080 | 0 | 0 | my $payload = <<'PERL'; | |||
| 3081 | use strict; | |||||
| 3082 | use warnings; | |||||
| 3083 | use Type::Params -sigs; | |||||
| 3084 | use Types::Common -types; | |||||
| 3085 | use JSON::MaybeXS; | |||||
| 3086 | ||||||
| 3087 | # Stub sub so Perl can parse it | |||||
| 3088 | sub FUNCTION_NAME {} | |||||
| 3089 | ||||||
| 3090 | # Create the Type::Params signature object | |||||
| 3091 | my $sig = signature_for FUNCTION_NAME => SIGNATURE_EXPR; | |||||
| 3092 | ||||||
| 3093 | # Extract parameters | |||||
| 3094 | my @sig_params = @{ $sig->parameters || [] }; | |||||
| 3095 | my $pos = 0; | |||||
| 3096 | my @params; | |||||
| 3097 | ||||||
| 3098 | # if ($sig->method) { | |||||
| 3099 | # The $self value | |||||
| 3100 | # push @params, { | |||||
| 3101 | # name => 'arg0', | |||||
| 3102 | # optional => 0, | |||||
| 3103 | # position => $pos++, | |||||
| 3104 | # }; | |||||
| 3105 | # } | |||||
| 3106 | ||||||
| 3107 | for my $p (@sig_params) { | |||||
| 3108 | push @params, { | |||||
| 3109 | name => "arg$pos", | |||||
| 3110 | optional => $p->optional ? 1 : 0, | |||||
| 3111 | position => $pos, | |||||
| 3112 | type => $p->type->name | |||||
| 3113 | }; | |||||
| 3114 | $pos++; | |||||
| 3115 | } | |||||
| 3116 | ||||||
| 3117 | # Extract return type | |||||
| 3118 | my $returns; | |||||
| 3119 | if (my $r = $sig->returns_scalar) { | |||||
| 3120 | $returns = { | |||||
| 3121 | context => 'scalar', | |||||
| 3122 | type => $r ? $r->name : 'unknown', | |||||
| 3123 | }; | |||||
| 3124 | } elsif ($r = $sig->returns_list) { | |||||
| 3125 | $returns = { | |||||
| 3126 | context => 'list', | |||||
| 3127 | type => $r ? $r->name : 'unknown', | |||||
| 3128 | }; | |||||
| 3129 | } | |||||
| 3130 | ||||||
| 3131 | print encode_json({ | |||||
| 3132 | parameters => \@params, | |||||
| 3133 | returns => $returns, | |||||
| 3134 | }); | |||||
| 3135 | PERL | |||||
| 3136 | ||||||
| 3137 | # Substitute function name and signature expression | |||||
| 3138 | 0 | 0 | $payload =~ s/FUNCTION_NAME/$function/g; | |||
| 3139 | 0 | 0 | $payload =~ s/SIGNATURE_EXPR/$signature_expr/; | |||
| 3140 | ||||||
| 3141 | 0 | 0 | my $compartment = Safe->new(); | |||
| 3142 | 0 | 0 | $compartment->permit_only(qw(:base_core :base_mem :base_orig :load)); | |||
| 3143 | 0 | 0 | if(my $sig = $compartment->reval($payload)) { | |||
| 3144 | 0 | 0 | return $sig; | |||
| 3145 | } | |||||
| 3146 | ||||||
| 3147 | # Run in an isolated Perl process | |||||
| 3148 | 0 | 0 | my ($wtr, $rdr, $err) = (undef, undef, gensym); | |||
| 3149 | 0 | 0 | local %ENV; | |||
| 3150 | ||||||
| 3151 | # Apply memory limit if BSD::Resource is available. | |||||
| 3152 | # This module is Unix-only and not available on Windows, | |||||
| 3153 | # so we guard the call and skip silently if not present. | |||||
| 3154 | 0 | 0 | eval { | |||
| 3155 | 0 | 0 | require BSD::Resource; | |||
| 3156 | 0 | 0 | BSD::Resource::setrlimit( | |||
| 3157 | BSD::Resource::RLIMIT_AS(), | |||||
| 3158 | $MEMORY_LIMIT_BYTES, | |||||
| 3159 | $MEMORY_LIMIT_BYTES | |||||
| 3160 | ); | |||||
| 3161 | }; | |||||
| 3162 | # Ignore failure â resource limiting is best-effort only | |||||
| 3163 | ||||||
| 3164 | 0 | 0 | my $pid = open3($wtr, $rdr, $err, $^X, '-T'); | |||
| 3165 | ||||||
| 3166 | 0 | 0 | print $wtr $payload; | |||
| 3167 | 0 | 0 | close $wtr; | |||
| 3168 | ||||||
| 3169 | 0 0 | 0 0 | local $SIG{ALRM} = sub { croak 'Signature compile timeout' }; | |||
| 3170 | 0 0 | 0 0 | eval { alarm($SIGNATURE_TIMEOUT_SECS) }; # no-op on Windows | |||
| 3171 | ||||||
| 3172 | 0 0 0 | 0 0 0 | my $stdout = do { local $/; <$rdr> }; | |||
| 3173 | 0 0 0 | 0 0 0 | my $stderr = do { local $/; <$err> }; | |||
| 3174 | ||||||
| 3175 | 0 0 | 0 0 | eval { alarm 0 }; | |||
| 3176 | ||||||
| 3177 | 0 | 0 | waitpid($pid, 0); | |||
| 3178 | ||||||
| 3179 | 0 | 0 | if ($stderr && length $stderr) { | |||
| 3180 | 0 | 0 | croak "Error compiling signature:\n$stderr"; | |||
| 3181 | } | |||||
| 3182 | ||||||
| 3183 | 0 | 0 | return decode_json($stdout); | |||
| 3184 | } | |||||
| 3185 | ||||||
| 3186 | # -------------------------------------------------- | |||||
| 3187 | # _build_schema_from_meta | |||||
| 3188 | # | |||||
| 3189 | # Purpose: Convert the parameter and return type | |||||
| 3190 | # metadata produced by | |||||
| 3191 | # _compile_signature_isolated into a | |||||
| 3192 | # standard schema hashref. | |||||
| 3193 | # | |||||
| 3194 | # Entry: $meta - hashref with 'parameters' | |||||
| 3195 | # arrayref and optional | |||||
| 3196 | # 'returns' hashref, as decoded | |||||
| 3197 | # from the isolated compile | |||||
| 3198 | # JSON output. | |||||
| 3199 | # | |||||
| 3200 | # Exit: Returns a schema hashref with input, | |||||
| 3201 | # output, style, source, _notes, and | |||||
| 3202 | # _confidence keys. | |||||
| 3203 | # | |||||
| 3204 | # Side effects: None. | |||||
| 3205 | # | |||||
| 3206 | # Notes: Unknown Type::Params type names are | |||||
| 3207 | # mapped to 'string' with a note added | |||||
| 3208 | # and confidence downgraded to 'medium'. | |||||
| 3209 | # -------------------------------------------------- | |||||
| 3210 | sub _build_schema_from_meta { | |||||
| 3211 | 8 | 35 | my ($self, $meta) = @_; | |||
| 3212 | ||||||
| 3213 | 8 | 18 | my %type_map = ( | |||
| 3214 | Num => 'number', | |||||
| 3215 | Int => 'integer', | |||||
| 3216 | Str => 'string', | |||||
| 3217 | Bool => 'boolean', | |||||
| 3218 | Object => 'object', | |||||
| 3219 | ArrayRef => 'array', | |||||
| 3220 | HashRef => 'object', | |||||
| 3221 | ); | |||||
| 3222 | ||||||
| 3223 | 8 | 4 | my $input; | |||
| 3224 | 8 | 6 | my $position = 0; | |||
| 3225 | 8 | 7 | my $confidence = 'high'; | |||
| 3226 | 8 | 6 | my @notes = ('Type::Params detected'); | |||
| 3227 | ||||||
| 3228 | 8 8 | 7 13 | foreach my $p (@{ $meta->{parameters} || [] }) { | |||
| 3229 | 8 | 12 | my $type = $type_map{ $p->{type} } // 'string'; | |||
| 3230 | ||||||
| 3231 | 8 | 8 | if (!exists $type_map{$p->{type}}) { | |||
| 3232 | 1 | 2 | push @notes, "Unknown type $p->{type}, defaulting to string"; | |||
| 3233 | 1 | 0 | $confidence = 'medium'; | |||
| 3234 | } | |||||
| 3235 | ||||||
| 3236 | $input->{"arg$position"} = { | |||||
| 3237 | type => $type, | |||||
| 3238 | position => $position, | |||||
| 3239 | 8 | 18 | optional => $p->{optional} ? 1 : 0, | |||
| 3240 | }; | |||||
| 3241 | ||||||
| 3242 | 8 | 8 | $position++; | |||
| 3243 | } | |||||
| 3244 | ||||||
| 3245 | 8 | 6 | my $output; | |||
| 3246 | ||||||
| 3247 | 8 | 11 | if (my $ret = $meta->{returns}) { | |||
| 3248 | 2 | 2 | my $type = $type_map{ $ret->{type} } // 'string'; | |||
| 3249 | ||||||
| 3250 | 2 | 3 | if (!exists $type_map{$ret->{type}}) { | |||
| 3251 | 0 | 0 | push @notes, "Unknown return type $ret->{type}, defaulting to string"; | |||
| 3252 | 0 | 0 | $confidence = 'medium'; | |||
| 3253 | } | |||||
| 3254 | ||||||
| 3255 | $output = { | |||||
| 3256 | 2 | 4 | type => $type, | |||
| 3257 | "_$ret->{context}_context" => { type => $type }, | |||||
| 3258 | }; | |||||
| 3259 | } | |||||
| 3260 | ||||||
| 3261 | return { | |||||
| 3262 | 8 | 24 | input => $input, | |||
| 3263 | output => $output, | |||||
| 3264 | style => 'hash', | |||||
| 3265 | source => 'validator', | |||||
| 3266 | _notes => \@notes, | |||||
| 3267 | _confidence => { | |||||
| 3268 | input => $confidence, | |||||
| 3269 | }, | |||||
| 3270 | }; | |||||
| 3271 | } | |||||
| 3272 | ||||||
| 3273 | # -------------------------------------------------- | |||||
| 3274 | # _analyze_pod | |||||
| 3275 | # | |||||
| 3276 | # Purpose: Parse POD documentation for a method | |||||
| 3277 | # and extract parameter names, types, | |||||
| 3278 | # constraints, and optionality from | |||||
| 3279 | # multiple POD patterns. | |||||
| 3280 | # | |||||
| 3281 | # Entry: $pod - string of POD content as | |||||
| 3282 | # returned by _extract_pod_before. | |||||
| 3283 | # May be undef or empty. | |||||
| 3284 | # | |||||
| 3285 | # Exit: Returns a hashref of parameter name | |||||
| 3286 | # to parameter spec hashref. Returns an | |||||
| 3287 | # empty hashref if no POD is provided | |||||
| 3288 | # or no parameters are found. | |||||
| 3289 | # | |||||
| 3290 | # Side effects: Carps when a semantic type is | |||||
| 3291 | # detected, advising the caller to | |||||
| 3292 | # set config->properties. | |||||
| 3293 | # Logs progress to stdout when | |||||
| 3294 | # verbose is set. | |||||
| 3295 | # | |||||
| 3296 | # Notes: Three pattern strategies are tried | |||||
| 3297 | # in order: (1) named Parameters section, | |||||
| 3298 | # (2) inline $name - type format, | |||||
| 3299 | # (3) =over/=item list. Parameters found | |||||
| 3300 | # earlier take precedence over later | |||||
| 3301 | # discoveries. Default values from POD | |||||
| 3302 | # are merged in last. | |||||
| 3303 | # -------------------------------------------------- | |||||
| 3304 | sub _analyze_pod { | |||||
| 3305 | 273 | 269 | my ($self, $pod) = @_; | |||
| 3306 | ||||||
| 3307 | 273 | 306 | return {} unless $pod; | |||
| 3308 | ||||||
| 3309 | 97 | 61 | my %params; | |||
| 3310 | 97 | 80 | my $position_counter = 0; | |||
| 3311 | ||||||
| 3312 | # Check for positional arguments in method signature | |||||
| 3313 | # Pattern: =head2 method_name($arg1, $arg2, $arg3) | |||||
| 3314 | 97 | 205 | if ($pod =~ /=head2\s+\w+\s*\(([^)]+)\)/s) { | |||
| 3315 | 32 | 36 | my $sig = $1; | |||
| 3316 | # Extract parameter names in order | |||||
| 3317 | 32 | 65 | my @sig_params = $sig =~ /\$(\w+)/g; | |||
| 3318 | ||||||
| 3319 | # Skip $self or $class | |||||
| 3320 | 32 | 72 | shift @sig_params if @sig_params && $sig_params[0] =~ /^(self|class)$/i; | |||
| 3321 | ||||||
| 3322 | # Assign positions | |||||
| 3323 | 32 | 29 | foreach my $param (@sig_params) { | |||
| 3324 | 44 | 107 | $params{$param}{position} //= $position_counter; | |||
| 3325 | 44 | 68 | $self->_log(" POD: $param has position $params{$param}{position}"); | |||
| 3326 | 44 | 46 | $position_counter++; | |||
| 3327 | } | |||||
| 3328 | } | |||||
| 3329 | ||||||
| 3330 | 97 | 144 | $self->_log(" POD: Found $position_counter unnamed parameters to add to the position list"); | |||
| 3331 | ||||||
| 3332 | # Pattern 1: Parse line-by-line in Parameters section | |||||
| 3333 | # First, extract the Parameters section | |||||
| 3334 | 97 | 66 | my $param_section; | |||
| 3335 | 97 | 685 | if($pod =~ /(?:Parameters?|Arguments?|Inputs?):?\s*\n((?:\s*\$.*\n)+)/si) { | |||
| 3336 | 29 | 31 | $param_section = $1; | |||
| 3337 | } elsif ($pod =~ /^=head\d+\s+(?:Parameters?|Arguments?|Inputs?)\b.*?\n(.*?)(?=^=head|\Z)/msi) { | |||||
| 3338 | 17 | 19 | $param_section = $1; | |||
| 3339 | } | |||||
| 3340 | 97 | 92 | if($param_section) { | |||
| 3341 | 46 | 39 | my $param_order = 0; | |||
| 3342 | ||||||
| 3343 | 46 | 96 | $self->_log(" POD: Scan for named parameters in '$param_section'"); | |||
| 3344 | # Now parse each line that starts with $varname | |||||
| 3345 | 46 | 93 | foreach my $line (split /\n/, $param_section) { | |||
| 3346 | 218 | 196 | if ($line =~ /C<\$(\w+)>\s*\((Required|Mandatory)\)/i) { | |||
| 3347 | 0 | 0 | $params{$1}{optional} = 0; | |||
| 3348 | 0 | 0 | $self->_log(" POD: $1 marked required from item header"); | |||
| 3349 | } | |||||
| 3350 | ||||||
| 3351 | # Match: $name - type (constraints), description | |||||
| 3352 | # or: $name - type, description | |||||
| 3353 | # or: $name - type | |||||
| 3354 | 218 | 357 | if(($line =~ /^\s*\$(\w+)\s*-\s*(\w+)(?:\s*\(([^)]+)\))?\s*,?\s*(.*)$/i) || | |||
| 3355 | ($line =~ /^\s*C<\$(\w+)>\s*-\s*(\w+)(?:\s*\(([^)]+)\))?\s*,?\s*(.*)$/i)) { | |||||
| 3356 | 42 | 118 | my ($name, $type, $constraint, $desc) = ($1, lc($2), $3, $4); | |||
| 3357 | ||||||
| 3358 | # Clean up | |||||
| 3359 | 42 | 98 | $desc =~ s/^\s+|\s+$//g if $desc; | |||
| 3360 | ||||||
| 3361 | # Skip common non-parameters | |||||
| 3362 | 42 | 61 | next if $name =~ /^(self|class|return|returns?)$/i; | |||
| 3363 | ||||||
| 3364 | 41 | 66 | $params{$name} ||= { _source => 'pod' }; | |||
| 3365 | ||||||
| 3366 | # If we haven't already assigned a position from the signature, use order in Parameters section | |||||
| 3367 | 41 | 43 | unless (exists $params{$name}{position}) { | |||
| 3368 | 16 | 15 | $params{$name}{position} = $param_order++; | |||
| 3369 | 16 | 21 | $self->_log(" POD: $name has position $params{$name}{position} (from Parameters order)"); | |||
| 3370 | } | |||||
| 3371 | ||||||
| 3372 | # Normalize type names | |||||
| 3373 | 41 | 43 | $type = 'integer' if $type eq 'int'; | |||
| 3374 | 41 | 65 | $type = 'number' if $type eq 'num' || $type eq 'float'; | |||
| 3375 | 41 | 54 | $type = 'boolean' if $type eq 'bool'; | |||
| 3376 | 41 | 70 | $type = 'arrayref' if $type eq 'array'; | |||
| 3377 | 41 | 32 | $type = 'hashref' if $type eq 'hash'; | |||
| 3378 | ||||||
| 3379 | 41 | 42 | $params{$name}{type} = $type; | |||
| 3380 | ||||||
| 3381 | # Parse constraints | |||||
| 3382 | 41 | 36 | if($constraint) { | |||
| 3383 | 7 | 10 | $self->_parse_constraints($params{$name}, $constraint); | |||
| 3384 | } | |||||
| 3385 | ||||||
| 3386 | # Check for optional/required in description OR constraint | |||||
| 3387 | 41 | 64 | my $full_text = ($constraint || '') . ' ' . ($desc || ''); | |||
| 3388 | 41 | 79 | if ($full_text =~ /optional/i) { | |||
| 3389 | 6 | 5 | $params{$name}{optional} = 1; | |||
| 3390 | 6 | 6 | $self->_log(" POD: $name marked as optional"); | |||
| 3391 | } elsif ($full_text =~ /required|mandatory/i) { | |||||
| 3392 | 2 | 4 | $params{$name}{optional} = 0; | |||
| 3393 | 2 | 3 | $self->_log(" POD: $name marked as required"); | |||
| 3394 | } | |||||
| 3395 | ||||||
| 3396 | # Detect semantic types: | |||||
| 3397 | 41 | 69 | if ($desc =~ /\b(email|url|uri|path|filename)\b/i) { | |||
| 3398 | # TODO: ensure properties is set to 1 in $config | |||||
| 3399 | 2 | 54 | carp('Manually set config->properties to 1 in ', $self->{'input_file'}); | |||
| 3400 | 2 | 1279 | $params{$name}{semantic} = lc($1); | |||
| 3401 | } | |||||
| 3402 | ||||||
| 3403 | # Look for regex patterns | |||||
| 3404 | 41 | 60 | if ($desc && $desc =~ m{matches?\s+(/[^/]+/|qr/.+?/)}i) { | |||
| 3405 | 1 | 1 | $params{$name}{matches} = $1; | |||
| 3406 | } | |||||
| 3407 | ||||||
| 3408 | 41 | 81 | $self->_log(" POD: Found parameter '$name' in parameters section, type=$type" . | |||
| 3409 | ($constraint ? " ($constraint)" : '') . | |||||
| 3410 | ($desc ? " - $desc" : '')); | |||||
| 3411 | } | |||||
| 3412 | } | |||||
| 3413 | } | |||||
| 3414 | ||||||
| 3415 | # Pattern 2: Also try the inline format in case Parameters: section wasn't found | |||||
| 3416 | 97 | 225 | while ($pod =~ /\$(\w+)\s*-\s*(string|integer|int|number|num|float|boolean|bool|arrayref|array|hashref|hash|object)(?:\s*\(([^)]+)\))?\s*,?\s*(.*)$/gim) { | |||
| 3417 | 28 | 40 | my ($name, $type, $constraint, $desc) = ($1, lc($2), $3, $4); | |||
| 3418 | ||||||
| 3419 | # Only process if we haven't already found this param in the Parameters section | |||||
| 3420 | 28 | 48 | next if exists $params{$name}; | |||
| 3421 | ||||||
| 3422 | # Clean up description - remove leading/trailing whitespace | |||||
| 3423 | 2 | 7 | $desc =~ s/^\s+|\s+$//g if $desc; | |||
| 3424 | ||||||
| 3425 | # Skip common words that aren't parameters | |||||
| 3426 | 2 | 4 | next if $name =~ /^(self|class|return|returns?)$/i; | |||
| 3427 | ||||||
| 3428 | 1 | 3 | $params{$name} ||= { _source => 'pod' }; | |||
| 3429 | ||||||
| 3430 | # Normalize type names | |||||
| 3431 | 1 | 1 | $type = 'integer' if $type eq 'int'; | |||
| 3432 | 1 | 2 | $type = 'number' if $type eq 'num' || $type eq 'float'; | |||
| 3433 | 1 | 2 | $type = 'boolean' if $type eq 'bool'; | |||
| 3434 | 1 | 1 | $type = 'arrayref' if $type eq 'array'; | |||
| 3435 | 1 | 1 | $type = 'hashref' if $type eq 'hash'; | |||
| 3436 | ||||||
| 3437 | 1 | 1 | $params{$name}{type} = $type; | |||
| 3438 | ||||||
| 3439 | # Parse constraints | |||||
| 3440 | 1 | 2 | if ($constraint) { | |||
| 3441 | 0 | 0 | $self->_parse_constraints($params{$name}, $constraint); | |||
| 3442 | } | |||||
| 3443 | ||||||
| 3444 | # Check for optional/required in description | |||||
| 3445 | 1 | 1 | if ($desc) { | |||
| 3446 | 1 | 3 | if ($desc =~ /optional/i) { | |||
| 3447 | 0 | 0 | $params{$name}{optional} = 1; | |||
| 3448 | } elsif ($desc =~ /required|mandatory/i) { | |||||
| 3449 | 0 | 0 | $params{$name}{optional} = 0; | |||
| 3450 | } | |||||
| 3451 | ||||||
| 3452 | # Look for regex patterns in description | |||||
| 3453 | 1 | 1 | if ($desc =~ m{matches?\s+(/[^/]+/|qr/.+?/)}i) { | |||
| 3454 | 0 | 0 | $params{$name}{matches} = $1; | |||
| 3455 | } | |||||
| 3456 | } | |||||
| 3457 | ||||||
| 3458 | 1 | 4 | $self->_log(" POD: Found parameter '$name' in the inline documentation, type=$type" . | |||
| 3459 | ($constraint ? " ($constraint)" : '')); | |||||
| 3460 | } | |||||
| 3461 | ||||||
| 3462 | # Pattern 3: Parse =over /=item list (supports bullets and C<>) | |||||
| 3463 | 97 | 411 | while ($pod =~ /=item\s+(?:\*\s*)?(?:C<)?\$(\w+)\b(?:>)?\s*(?:-.*)?\n?(.*?)(?==item|\=back|\=head)/sig) { | |||
| 3464 | 7 | 6 | my $name = $1; | |||
| 3465 | 7 | 8 | my $desc = $2; | |||
| 3466 | ||||||
| 3467 | # Never allow empty or undefined parameter names | |||||
| 3468 | 7 | 16 | next unless defined $name && length $name; | |||
| 3469 | ||||||
| 3470 | 7 | 5 | $desc =~ s/^\s+|\s+$//g; | |||
| 3471 | ||||||
| 3472 | # Skip common non-parameters | |||||
| 3473 | 7 | 8 | next if $name =~ /^(self|class|return|returns?)$/i; | |||
| 3474 | ||||||
| 3475 | 7 | 32 | $params{$name} ||= { _source => 'pod' }; | |||
| 3476 | ||||||
| 3477 | # Explicit typed form only: | |||||
| 3478 | # $param - type (constraints) | |||||
| 3479 | 7 | 7 | if ($desc =~ /^\s*(string|integer|int|number|num|float|boolean|bool|array|arrayref|hash|hashref)\b(?:\s*\(([^)]+)\))?/i) { | |||
| 3480 | 0 | 0 | my $type = lc($1); | |||
| 3481 | 0 | 0 | my $constraint = $2; | |||
| 3482 | ||||||
| 3483 | # Normalize type names | |||||
| 3484 | 0 | 0 | $type = 'integer' if $type eq 'int'; | |||
| 3485 | 0 | 0 | $type = 'number' if $type eq 'num' || $type eq 'float'; | |||
| 3486 | 0 | 0 | $type = 'boolean' if $type eq 'bool'; | |||
| 3487 | 0 | 0 | $type = 'arrayref' if $type eq 'array'; | |||
| 3488 | 0 | 0 | $type = 'hashref' if $type eq 'hash'; | |||
| 3489 | ||||||
| 3490 | 0 | 0 | $params{$name}{type} = $type; | |||
| 3491 | ||||||
| 3492 | 0 | 0 | if ($constraint) { | |||
| 3493 | 0 | 0 | $self->_parse_constraints($params{$name}, $constraint); | |||
| 3494 | } | |||||
| 3495 | ||||||
| 3496 | 0 | 0 | $self->_log(" POD: Explicit type '$type' for $name"); | |||
| 3497 | } else { | |||||
| 3498 | # Heuristic inference from description text | |||||
| 3499 | 7 | 20 | if ($desc =~ /\bstring\b/i) { | |||
| 3500 | 0 | 0 | $params{$name}{type} = 'string'; | |||
| 3501 | } elsif ($desc =~ /\b(int|integer)\b/i) { | |||||
| 3502 | 0 | 0 | $params{$name}{type} = 'integer'; | |||
| 3503 | } elsif ($desc =~ /\b(num|number|float)\b/i) { | |||||
| 3504 | 0 | 0 | $params{$name}{type} = 'number'; | |||
| 3505 | } elsif ($desc =~ /\b(bool|boolean)\b/i) { | |||||
| 3506 | 0 | 0 | $params{$name}{type} = 'boolean'; | |||
| 3507 | } | |||||
| 3508 | } | |||||
| 3509 | ||||||
| 3510 | # Check for optional/required in description | |||||
| 3511 | 7 | 13 | if ($desc =~ /optional/i) { | |||
| 3512 | 0 | 0 | $params{$name}{optional} = 1; | |||
| 3513 | } elsif ($desc =~ /required|mandatory/i) { | |||||
| 3514 | 0 | 0 | $params{$name}{optional} = 0; | |||
| 3515 | } | |||||
| 3516 | ||||||
| 3517 | # Look for regex patterns | |||||
| 3518 | 7 | 9 | if ($desc =~ m{matches?\s+(/[^/]+/|qr/.+?/)}i) { | |||
| 3519 | 0 | 0 | $params{$name}{matches} = $1; | |||
| 3520 | } | |||||
| 3521 | ||||||
| 3522 | 7 | 12 | $self->_log(" POD: Found parameter '$name' from =item list"); | |||
| 3523 | } | |||||
| 3524 | ||||||
| 3525 | # Extract default values from POD | |||||
| 3526 | 97 | 135 | my $pod_defaults = $self->_extract_defaults_from_pod($pod); | |||
| 3527 | 97 | 144 | foreach my $param (keys %$pod_defaults) { | |||
| 3528 | 10 | 10 | if (exists $params{$param}) { | |||
| 3529 | 10 | 11 | $params{$param}{_default} = $pod_defaults->{$param}; | |||
| 3530 | 10 | 8 | $params{$param}{optional} = 1 unless defined $params{$param}{optional}; | |||
| 3531 | $self->_log(sprintf(" POD: %s has default value: %s", | |||||
| 3532 | $param, | |||||
| 3533 | 10 | 16 | defined($pod_defaults->{$param}) ? $pod_defaults->{$param} : 'undef' | |||
| 3534 | )); | |||||
| 3535 | } | |||||
| 3536 | } | |||||
| 3537 | ||||||
| 3538 | # Default undocumented optionality: documented params are REQUIRED unless stated otherwise | |||||
| 3539 | 97 | 128 | for my $name (keys %params) { | |||
| 3540 | 68 | 78 | next if $name =~ /^(self|class)$/i; | |||
| 3541 | ||||||
| 3542 | # TODO: if optionality was never explicitly set, assume required. | |||||
| 3543 | # Currently disabled as it breaks some schemas â revisit in a future pass. | |||||
| 3544 | # if (!exists $params{$name}{optional}) { | |||||
| 3545 | # $params{$name}{optional} = 0; | |||||
| 3546 | # $self->_log(" POD: $name assumed required (no optional/default specified)"); | |||||
| 3547 | # } | |||||
| 3548 | } | |||||
| 3549 | ||||||
| 3550 | 97 | 123 | return \%params; | |||
| 3551 | } | |||||
| 3552 | ||||||
| 3553 | # -------------------------------------------------- | |||||
| 3554 | # _analyze_output | |||||
| 3555 | # | |||||
| 3556 | # Purpose: Orchestrate analysis of a method's | |||||
| 3557 | # return value by combining POD return | |||||
| 3558 | # section parsing, code return statement | |||||
| 3559 | # analysis, boolean detection, context | |||||
| 3560 | # detection, void detection, chaining | |||||
| 3561 | # detection, and error convention | |||||
| 3562 | # detection. | |||||
| 3563 | # | |||||
| 3564 | # Entry: $pod - POD string for the method. | |||||
| 3565 | # $code - method body source string. | |||||
| 3566 | # $method_name - name of the method being | |||||
| 3567 | # analysed, used for | |||||
| 3568 | # boolean heuristics. | |||||
| 3569 | # | |||||
| 3570 | # Exit: Returns a hashref describing the | |||||
| 3571 | # output type and behaviour, or an empty | |||||
| 3572 | # hashref if nothing could be determined. | |||||
| 3573 | # Keys include: type, value, isa, and | |||||
| 3574 | # various _* metadata keys. | |||||
| 3575 | # | |||||
| 3576 | # Side effects: Logs progress to stdout when | |||||
| 3577 | # verbose is set. | |||||
| 3578 | # -------------------------------------------------- | |||||
| 3579 | sub _analyze_output { | |||||
| 3580 | 266 | 355 | my ($self, $pod, $code, $method_name) = @_; | |||
| 3581 | ||||||
| 3582 | 266 | 191 | my %output; | |||
| 3583 | ||||||
| 3584 | 266 | 439 | $self->_analyze_output_from_pod(\%output, $pod); | |||
| 3585 | 266 | 432 | $self->_analyze_output_from_code(\%output, $code, $method_name); | |||
| 3586 | 266 | 485 | $self->_enhance_boolean_detection(\%output, $pod, $code, $method_name); | |||
| 3587 | 266 | 452 | $self->_detect_list_context(\%output, $code); | |||
| 3588 | 266 | 417 | $self->_detect_void_context(\%output, $code, $method_name); | |||
| 3589 | 266 | 437 | $self->_detect_chaining_pattern(\%output, $code); | |||
| 3590 | 266 | 431 | $self->_detect_error_conventions(\%output, $code); | |||
| 3591 | ||||||
| 3592 | 266 | 565 | $self->_validate_output(\%output) if keys %output; | |||
| 3593 | ||||||
| 3594 | # Don't return empty output | |||||
| 3595 | 266 | 419 | return (keys %output) ? \%output : {}; | |||
| 3596 | } | |||||
| 3597 | ||||||
| 3598 | # -------------------------------------------------- | |||||
| 3599 | # _analyze_output_from_pod | |||||
| 3600 | # | |||||
| 3601 | # Purpose: Parse the POD documentation for a | |||||
| 3602 | # method's return value and populate | |||||
| 3603 | # an output hashref with type, value, | |||||
| 3604 | # and behaviour information. | |||||
| 3605 | # | |||||
| 3606 | # Entry: $output - hashref to populate | |||||
| 3607 | # (modified in place). | |||||
| 3608 | # $pod - POD string for the method. | |||||
| 3609 | # | |||||
| 3610 | # Exit: Returns nothing. Modifies $output | |||||
| 3611 | # in place. | |||||
| 3612 | # | |||||
| 3613 | # Side effects: Logs detections to stdout when | |||||
| 3614 | # verbose is set. | |||||
| 3615 | # | |||||
| 3616 | # Notes: Two patterns are tried: (1) a | |||||
| 3617 | # 'Returns:' section of up to 3 lines, | |||||
| 3618 | # and (2) an inline 'returns X' phrase. | |||||
| 3619 | # The section pattern takes precedence. | |||||
| 3620 | # -------------------------------------------------- | |||||
| 3621 | sub _analyze_output_from_pod { | |||||
| 3622 | 271 | 300 | my ($self, $output, $pod) = @_; | |||
| 3623 | 271 2981 | 305 2892 | my %VALID_OUTPUT_TYPES = map { $_ => 1 } | |||
| 3624 | qw(string integer number float boolean arrayref hashref object coderef void undef); | |||||
| 3625 | ||||||
| 3626 | 271 | 540 | if ($pod) { | |||
| 3627 | # Pattern 1: Returns: section | |||||
| 3628 | # Up to 3 lines | |||||
| 3629 | 97 | 151 | if ($pod =~ /Returns?:\s+([^\n]+(?:\n[^\n]+){0,2})/si) { | |||
| 3630 | 8 | 10 | my $returns_desc = $1; | |||
| 3631 | 8 | 20 | $returns_desc =~ s/^\s+|\s+$//g; | |||
| 3632 | ||||||
| 3633 | 8 | 15 | $self->_log(" OUTPUT: Found Returns section: $returns_desc"); | |||
| 3634 | ||||||
| 3635 | # Try to infer type from description | |||||
| 3636 | 8 | 64 | if ($returns_desc =~ /\b(string|text)\b/i) { | |||
| 3637 | 1 | 1 | $output->{type} = 'string'; | |||
| 3638 | } elsif ($returns_desc =~ /\b(integer|int|count)\b/i) { | |||||
| 3639 | 1 | 1 | $output->{type} = 'integer'; | |||
| 3640 | } elsif ($returns_desc =~ /\b(float|decimal|number)\b/i) { | |||||
| 3641 | 0 | 0 | $output->{type} = 'number'; | |||
| 3642 | } elsif ($returns_desc =~ /\b(boolean|true|false)\b/i) { | |||||
| 3643 | 1 | 2 | $output->{type} = 'boolean'; | |||
| 3644 | } elsif ($returns_desc =~ /\b(array|list)\b/i) { | |||||
| 3645 | 0 | 0 | $output->{type} = 'arrayref'; | |||
| 3646 | } elsif ($returns_desc =~ /\b(hash|hashref|dictionary)\b/i) { | |||||
| 3647 | 0 | 0 | $output->{type} = 'hashref'; | |||
| 3648 | } elsif ($returns_desc =~ /\b(object|instance)\b/i) { | |||||
| 3649 | 2 | 2 | $output->{type} = 'object'; | |||
| 3650 | } elsif ($returns_desc =~ /\bundef\b/i) { | |||||
| 3651 | 0 | 0 | $output->{type} = 'undef'; | |||
| 3652 | } | |||||
| 3653 | ||||||
| 3654 | # Look for specific values | |||||
| 3655 | 8 | 18 | if ($returns_desc =~ /\b1\s+(?:on\s+success|if\s+successful)\b/i) { | |||
| 3656 | 1 | 2 | $output->{value} = 1; | |||
| 3657 | 1 | 2 | if(defined($output->{'type'}) && ($output->{type} eq 'scalar')) { | |||
| 3658 | 0 | 0 | $output->{type} = 'boolean'; | |||
| 3659 | } else { | |||||
| 3660 | 1 | 3 | $output->{type} ||= 'boolean'; | |||
| 3661 | } | |||||
| 3662 | 1 | 1 | $self->_log(" OUTPUT: Returns 1 on success"); | |||
| 3663 | } elsif ($returns_desc =~ /\b0\s+(?:on\s+failure|if\s+fail)\b/i) { | |||||
| 3664 | 0 | 0 | $output->{alt_value} = 0; | |||
| 3665 | } elsif ($returns_desc =~ /dies\s+on\s+(?:error|failure)/i) { | |||||
| 3666 | 0 | 0 | $output->{_STATUS} = 'LIVES'; | |||
| 3667 | 0 | 0 | $self->_log(' OUTPUT: Should not die on success'); | |||
| 3668 | } | |||||
| 3669 | 8 | 13 | if ($returns_desc =~ /\b(true|false)\b/i) { | |||
| 3670 | 1 | 2 | $output->{type} ||= 'boolean'; | |||
| 3671 | } | |||||
| 3672 | 8 | 10 | if ($returns_desc =~ /\bundef\b/i) { | |||
| 3673 | 0 | 0 | $output->{optional} = 1; | |||
| 3674 | } | |||||
| 3675 | } | |||||
| 3676 | ||||||
| 3677 | # Pattern 2: Inline "returns X" | |||||
| 3678 | 97 | 338 | if((!$output->{type}) && ($pod =~ /returns?\s+(?:an?\s+)?(\w+)/i)) { | |||
| 3679 | 28 | 43 | my $type = lc($1); | |||
| 3680 | ||||||
| 3681 | 28 | 54 | $type = 'boolean' if $type =~ /^(true|false|bool)$/; | |||
| 3682 | # Skip if it's just a number (like "returns 1") | |||||
| 3683 | 28 | 36 | $type = 'integer' if $type eq 'int'; | |||
| 3684 | 28 | 70 | $type = 'number' if $type =~ /^(num|float)$/; | |||
| 3685 | 28 | 27 | $type = 'arrayref' if $type eq 'array'; | |||
| 3686 | 28 | 29 | $type = 'hashref' if $type eq 'hash'; | |||
| 3687 | ||||||
| 3688 | 28 | 41 | if($type =~ /^\d+$/) { | |||
| 3689 | 0 | 0 | if($type eq '1' || $type eq '0') { | |||
| 3690 | # Try hard to guess if the result is a boolean | |||||
| 3691 | 0 | 0 | if($pod =~ /1 on success.+0 (on|if) /i) { | |||
| 3692 | 0 | 0 | $type = 'boolean'; | |||
| 3693 | } elsif($pod =~ /return 0 .+ 1 on success/) { | |||||
| 3694 | 0 | 0 | $type = 'boolean'; | |||
| 3695 | } else { | |||||
| 3696 | 0 | 0 | $type = 'integer'; | |||
| 3697 | } | |||||
| 3698 | } else { | |||||
| 3699 | 0 | 0 | $type = 'integer'; | |||
| 3700 | } | |||||
| 3701 | } | |||||
| 3702 | ||||||
| 3703 | 28 | 34 | $type = 'arrayref' if !$type && $pod =~ /returns?\s+.+\slist\b/i; | |||
| 3704 | # $output->{type} = $type if $type && $type !~ /^\d+$/; | |||||
| 3705 | 28 | 34 | if ($VALID_OUTPUT_TYPES{$type}) { | |||
| 3706 | 8 | 10 | $output->{type} = $type; | |||
| 3707 | 8 | 16 | $self->_log(" OUTPUT: Inferred type from POD: $type"); | |||
| 3708 | } else { | |||||
| 3709 | 20 | 32 | $self->_log(" OUTPUT: POD return type '$type' is not a valid type, ignoring"); | |||
| 3710 | } | |||||
| 3711 | } | |||||
| 3712 | } | |||||
| 3713 | } | |||||
| 3714 | ||||||
| 3715 | # -------------------------------------------------- | |||||
| 3716 | # _extract_defaults_from_pod | |||||
| 3717 | # | |||||
| 3718 | # Purpose: Extract default values for parameters | |||||
| 3719 | # from POD documentation using multiple | |||||
| 3720 | # pattern strategies. | |||||
| 3721 | # | |||||
| 3722 | # Entry: $pod - POD string for the method. | |||||
| 3723 | # May be undef or empty. | |||||
| 3724 | # | |||||
| 3725 | # Exit: Returns a hashref of parameter name | |||||
| 3726 | # to cleaned default value. Returns an | |||||
| 3727 | # empty hashref if no POD is provided | |||||
| 3728 | # or no defaults are found. | |||||
| 3729 | # | |||||
| 3730 | # Side effects: None. | |||||
| 3731 | # | |||||
| 3732 | # Notes: Three strategies are tried: (1) lines | |||||
| 3733 | # containing 'Default:' or 'Defaults to:', | |||||
| 3734 | # (2) lines containing 'Optional, default', | |||||
| 3735 | # (3) inline $name - type, default value | |||||
| 3736 | # format. Parameter names are inferred | |||||
| 3737 | # by scanning backwards from the default | |||||
| 3738 | # phrase to the nearest $variable. | |||||
| 3739 | # -------------------------------------------------- | |||||
| 3740 | sub _extract_defaults_from_pod { | |||||
| 3741 | 99 | 550 | my ($self, $pod) = @_; | |||
| 3742 | ||||||
| 3743 | 99 | 92 | return {} unless $pod; | |||
| 3744 | ||||||
| 3745 | 99 | 80 | my %defaults; | |||
| 3746 | ||||||
| 3747 | # Pattern 1: Default: 'value' or Defaults to: 'value' | |||||
| 3748 | 99 | 195 | while ($pod =~ /(?:Default(?:s? to)?|default(?:s? to)?)[:]\s*([^\n\r]+)/gi) { | |||
| 3749 | 18 | 13 | my $default_text = $1; | |||
| 3750 | 18 | 16 | my $match_pos = pos($pod); | |||
| 3751 | 18 | 23 | $default_text =~ s/^\s+|\s+$//g; | |||
| 3752 | ||||||
| 3753 | # Look backwards in the POD to find the parameter name | |||||
| 3754 | 18 | 13 | my $context = substr($pod, 0, $match_pos); | |||
| 3755 | 18 | 31 | my @param_matches = ($context =~ /\$(\w+)/g); | |||
| 3756 | 18 | 18 | my $param = $param_matches[-1] if @param_matches; # Last parameter before default | |||
| 3757 | ||||||
| 3758 | 18 | 15 | if ($param) { | |||
| 3759 | # Always clean the default value - let _clean_default_value handle everything | |||||
| 3760 | 18 | 15 | if ($default_text =~ /(\w+)\s*=\s*(.+)$/) { | |||
| 3761 | # Has explicit param = value format in the default text | |||||
| 3762 | 0 | 0 | my ($p, $value) = ($1, $2); | |||
| 3763 | 0 | 0 | $defaults{$p} = $self->_clean_default_value($value); | |||
| 3764 | } else { | |||||
| 3765 | # Just a value, associate with the found param | |||||
| 3766 | 18 | 30 | $defaults{$param} = $self->_clean_default_value($default_text, 0); # NOT from code | |||
| 3767 | } | |||||
| 3768 | } | |||||
| 3769 | } | |||||
| 3770 | ||||||
| 3771 | # Pattern 2: Optional, default 'value' | |||||
| 3772 | 99 | 162 | while ($pod =~ /Optional(?:,)?\s+(?:default|value)\s*[:=]?\s*([^\n\r,;]+)/gi) { | |||
| 3773 | 5 | 3 | my $default_text = $1; | |||
| 3774 | 5 | 4 | my $match_pos = pos($pod); | |||
| 3775 | 5 | 6 | $default_text =~ s/^\s+|\s+$//g; | |||
| 3776 | ||||||
| 3777 | # Look backwards for parameter name | |||||
| 3778 | 5 | 4 | my $context = substr($pod, 0, $match_pos); | |||
| 3779 | 5 | 9 | my @param_matches = ($context =~ /\$(\w+)/g); | |||
| 3780 | 5 | 6 | if (@param_matches) { | |||
| 3781 | 5 | 3 | my $param = $param_matches[-1]; # Last parameter before the default | |||
| 3782 | 5 | 5 | $defaults{$param} = $self->_clean_default_value($default_text, 0); | |||
| 3783 | } | |||||
| 3784 | } | |||||
| 3785 | ||||||
| 3786 | # Pattern 3: In parameter descriptions: $param - type, default 'value' | |||||
| 3787 | 99 | 240 | while ($pod =~ /\$(\w+)\s*-\s*\w+(?:\([^)]*\))?[,\s]+default\s+['"]?([^'",\n]+)['"]?/gi) { | |||
| 3788 | 0 | 0 | my ($param, $value) = ($1, $2); | |||
| 3789 | 0 | 0 | $defaults{$param} = $self->_clean_default_value($value, 0); | |||
| 3790 | } | |||||
| 3791 | ||||||
| 3792 | 99 | 96 | return \%defaults; | |||
| 3793 | } | |||||
| 3794 | ||||||
| 3795 | # -------------------------------------------------- | |||||
| 3796 | # _analyze_output_from_code | |||||
| 3797 | # | |||||
| 3798 | # Purpose: Analyse return statements in a method | |||||
| 3799 | # body to infer the output type by | |||||
| 3800 | # counting and classifying each return | |||||
| 3801 | # expression. | |||||
| 3802 | # | |||||
| 3803 | # Entry: $output - hashref to populate | |||||
| 3804 | # (modified in place). | |||||
| 3805 | # $code - method body source string. | |||||
| 3806 | # $method_name - method name string. | |||||
| 3807 | # | |||||
| 3808 | # Exit: Returns nothing. Modifies $output | |||||
| 3809 | # in place. | |||||
| 3810 | # | |||||
| 3811 | # Side effects: Logs detections to stdout when | |||||
| 3812 | # verbose is set. | |||||
| 3813 | # -------------------------------------------------- | |||||
| 3814 | sub _analyze_output_from_code | |||||
| 3815 | { | |||||
| 3816 | 270 | 311 | my ($self, $output, $code, $method_name) = @_; | |||
| 3817 | ||||||
| 3818 | 270 | 271 | if ($code) { | |||
| 3819 | # Early boolean detection - check for consistent 1/0 returns | |||||
| 3820 | 270 | 832 | my @all_returns = $code =~ /return\s+([^;]+);/g; | |||
| 3821 | 270 | 283 | if (@all_returns) { | |||
| 3822 | 252 | 188 | my $boolean_count = 0; | |||
| 3823 | 252 | 213 | my $total_count = scalar(@all_returns); | |||
| 3824 | ||||||
| 3825 | 252 | 257 | foreach my $ret (@all_returns) { | |||
| 3826 | 277 | 617 | $ret =~ s/^\s+|\s+$//g; | |||
| 3827 | # Match 0 or 1, even with conditions | |||||
| 3828 | 277 | 428 | $boolean_count++ if ($ret =~ /^(?:0|1)(?:\s|$)/); | |||
| 3829 | } | |||||
| 3830 | ||||||
| 3831 | # If most returns are 0 or 1, strongly suggest boolean | |||||
| 3832 | 252 | 339 | if ($boolean_count >= 2 && $boolean_count >= $total_count * 0.8) { | |||
| 3833 | 3 | 5 | unless ($output->{type}) { | |||
| 3834 | 3 | 3 | $output->{type} = 'boolean'; | |||
| 3835 | 3 | 6 | $self->_log(" OUTPUT: Early detection - $boolean_count/$total_count returns are 0/1, setting boolean"); | |||
| 3836 | } | |||||
| 3837 | } | |||||
| 3838 | } | |||||
| 3839 | ||||||
| 3840 | 270 | 206 | my @return_statements; | |||
| 3841 | ||||||
| 3842 | 270 | 1252 | if ($code =~ /return\s+bless\s*\{[^}]*\}\s*,\s*['"]?(\w+)['"]?/s) { | |||
| 3843 | # Detect blessed refs | |||||
| 3844 | 2 | 3 | $output->{type} = 'object'; | |||
| 3845 | 2 | 3 | if($method_name eq 'new') { | |||
| 3846 | # If we found the new() method, the object we're returning should be a sensible one | |||||
| 3847 | 0 | 0 | if($self->{_document} && (my $package_stmt = $self->{_document}->find_first('PPI::Statement::Package'))) { | |||
| 3848 | 0 | 0 | $output->{isa} = $package_stmt->namespace(); | |||
| 3849 | 0 | 0 | $self->{_package_name} //= $output->{isa}; | |||
| 3850 | } | |||||
| 3851 | } else { | |||||
| 3852 | 2 | 2 | $output->{isa} = $1; | |||
| 3853 | } | |||||
| 3854 | 2 | 3 | $self->_log(" OUTPUT: Bless found, inferring type from code is $output->{isa}"); | |||
| 3855 | } elsif ($code =~ /return\s+bless/s) { | |||||
| 3856 | 13 | 19 | $output->{type} = 'object'; | |||
| 3857 | 13 | 19 | if($method_name eq 'new') { | |||
| 3858 | 12 | 16 | $output->{isa} = $self->_extract_package_name(); | |||
| 3859 | 12 | 158 | $self->_log(" OUTPUT: Bless found, inferring type from code is $output->{isa}"); | |||
| 3860 | } else { | |||||
| 3861 | 1 | 1 | $self->_log(' OUTPUT: Bless found, inferring type from code is object'); | |||
| 3862 | } | |||||
| 3863 | } elsif ($code =~ /return\s*\(\s*[^)]+\s*,\s*[^)]+\s*\)\s*;/) { | |||||
| 3864 | # Detect array context returns - must end with semicolon to be actual return | |||||
| 3865 | 1 | 2 | $output->{type} = 'array'; # Not arrayref - actual array | |||
| 3866 | 1 | 1 | $self->_log(' OUTPUT: Found array contect return'); | |||
| 3867 | } elsif ($code =~ /return\s+bless[^,]+,\s*__PACKAGE__/) { | |||||
| 3868 | # Detect: bless {}, __PACKAGE__ | |||||
| 3869 | 0 | 0 | $output->{type} = 'object'; | |||
| 3870 | # Get package name from the extractor's stored document | |||||
| 3871 | 0 | 0 | if ($self->{_document}) { | |||
| 3872 | 0 | 0 | my $pkg = $self->{_document}->find_first('PPI::Statement::Package'); | |||
| 3873 | 0 | 0 | $output->{isa} = $pkg ? $pkg->namespace : 'UNKNOWN'; | |||
| 3874 | 0 | 0 | $self->_log(' OUTPUT: Object blessed into __PACKAGE__: ' . ($output->{isa} || 'UNKNOWN')); | |||
| 3875 | 0 | 0 | $self->{_package_name} //= $output->{isa}; | |||
| 3876 | } | |||||
| 3877 | } elsif ($code =~ /return\s*\(([^)]+)\)/) { | |||||
| 3878 | 0 | 0 | my $content = $1; | |||
| 3879 | 0 | 0 | if ($content =~ /,/) { # Has comma = multiple values | |||
| 3880 | 0 | 0 | $output->{type} = 'array'; | |||
| 3881 | } | |||||
| 3882 | } elsif ($code =~ /return\s+\$self\s*;/ && $code =~ /\$self\s*->\s*\{[^}]+\}\s*=/) { | |||||
| 3883 | # Returns $self for chaining | |||||
| 3884 | 6 | 6 | $output->{type} = 'object'; | |||
| 3885 | 6 | 9 | if ($self->{_document}) { | |||
| 3886 | 6 | 10 | my $pkg = $self->{_document}->find_first('PPI::Statement::Package'); | |||
| 3887 | 6 | 516 | $output->{isa} = $pkg ? $pkg->namespace : 'UNKNOWN'; | |||
| 3888 | 6 | 82 | $self->_log(' OUTPUT: Object chained into __PACKAGE__: ' . ($output->{isa} || 'UNKNOWN')); | |||
| 3889 | 6 | 9 | $self->{_package_name} //= $output->{isa}; | |||
| 3890 | } | |||||
| 3891 | } | |||||
| 3892 | ||||||
| 3893 | # Find all return statements | |||||
| 3894 | 270 | 602 | while ($code =~ /return\s+([^;]+);/g) { | |||
| 3895 | 277 | 356 | my $return_expr = $1; | |||
| 3896 | 277 | 388 | push @return_statements, $return_expr; | |||
| 3897 | } | |||||
| 3898 | ||||||
| 3899 | 270 | 269 | if (@return_statements) { | |||
| 3900 | 252 | 476 | $self->_log(' OUTPUT: Found ' . scalar(@return_statements) . ' return statement(s)'); | |||
| 3901 | ||||||
| 3902 | # Analyze return patterns | |||||
| 3903 | 252 | 195 | my %return_types; | |||
| 3904 | ||||||
| 3905 | 252 | 286 | if($output->{'type'}) { | |||
| 3906 | 33 | 49 | $return_types{$output->{'type'}} += 3; # Add weighting to what's already been found | |||
| 3907 | } | |||||
| 3908 | 252 | 179 | my $min; | |||
| 3909 | 252 | 212 | foreach my $ret (@return_statements) { | |||
| 3910 | 277 | 494 | $ret =~ s/^\s+|\s+$//g; | |||
| 3911 | ||||||
| 3912 | # Literal values | |||||
| 3913 | 277 | 1485 | if ($ret eq '1' || $ret eq '0') { | |||
| 3914 | 23 | 30 | $return_types{boolean}++; | |||
| 3915 | } elsif ($ret =~ /^['"]/) { | |||||
| 3916 | 16 | 19 | $return_types{string}++; | |||
| 3917 | } elsif ($ret =~ /^-?\d+$/) { | |||||
| 3918 | 103 | 156 | $return_types{integer}++; | |||
| 3919 | } elsif ($ret =~ /^-?\d+\.\d+$/) { | |||||
| 3920 | 0 | 0 | $return_types{number}++; | |||
| 3921 | } elsif ($ret eq 'undef') { | |||||
| 3922 | 1 | 2 | $return_types{undef}++; | |||
| 3923 | } elsif ($ret =~ /^\[/) { | |||||
| 3924 | # Data structures | |||||
| 3925 | 0 | 0 | $return_types{arrayref}++; | |||
| 3926 | } elsif ($ret =~ /^\{/) { | |||||
| 3927 | 1 | 1 | $return_types{hashref}++; | |||
| 3928 | } elsif ($ret =~ m{ | |||||
| 3929 | # Numeric expressions (heuristic, medium confidence) | |||||
| 3930 | # Don't match -> | |||||
| 3931 | (?: | |||||
| 3932 | \+ | -\b | \* | / | % | |||||
| 3933 | | \+\+ | -- | |||||
| 3934 | ) | |||||
| 3935 | }x) { | |||||
| 3936 | 36 | 54 | $return_types{number} += 2; | |||
| 3937 | } elsif ($ret =~ /\|\|\s*\d+\b/) { | |||||
| 3938 | # Logical-or fallback with numeric literal (e.g. $x || 200) | |||||
| 3939 | 0 | 0 | $return_types{integer} += 2; | |||
| 3940 | 0 | 0 | $self->_log(" OUTPUT: Numeric fallback expression detected"); | |||
| 3941 | } elsif($ret =~ /^length[\s\(]/) { | |||||
| 3942 | 0 | 0 | $return_types{integer}++; | |||
| 3943 | 0 | 0 | $min = 0; | |||
| 3944 | } elsif($ret =~ /^pos[\s\(]/) { | |||||
| 3945 | 0 | 0 | $return_types{integer}++; | |||
| 3946 | 0 | 0 | $min = 0; | |||
| 3947 | } elsif($ret =~ /^index[\s\(]/) { | |||||
| 3948 | 0 | 0 | $return_types{integer}++; | |||
| 3949 | 0 | 0 | $min = -1; | |||
| 3950 | } elsif($ret =~ /^rindex[\s\(]/) { | |||||
| 3951 | 0 | 0 | $return_types{integer}++; | |||
| 3952 | 0 | 0 | $min = -1; | |||
| 3953 | } elsif($ret =~ /^ord[\s\(]/) { | |||||
| 3954 | 0 | 0 | $return_types{integer}++; | |||
| 3955 | } elsif ($ret =~ /=/ && $ret =~ /\$\w+/) { | |||||
| 3956 | # Assignment returning a value (e.g. $self->{status} = $status) | |||||
| 3957 | # If assignment involves a numeric literal or variable, assume numeric intent | |||||
| 3958 | 1 | 3 | if ($ret =~ /\b\d+\b/) { | |||
| 3959 | 0 | 0 | $return_types{integer} += 2; | |||
| 3960 | 0 | 0 | $self->_log(" OUTPUT: Assignment with numeric value detected"); | |||
| 3961 | } else { | |||||
| 3962 | 1 | 1 | $return_types{scalar}++; | |||
| 3963 | } | |||||
| 3964 | } | |||||
| 3965 | # Variables/expressions | |||||
| 3966 | elsif ($ret =~ /\$\w+/) { | |||||
| 3967 | 86 | 237 | if ($ret =~ /\\\@/) { | |||
| 3968 | 0 | 0 | $return_types{arrayref}++; | |||
| 3969 | } elsif ($ret =~ /\\\%/) { | |||||
| 3970 | 0 | 0 | $return_types{hashref}++; | |||
| 3971 | } elsif ($ret =~ /bless/) { | |||||
| 3972 | 1 | 2 | $return_types{object} += 2; # Heigher weight | |||
| 3973 | } elsif ($ret =~ /^\{[^}]*\}$/) { | |||||
| 3974 | 0 | 0 | $return_types{hashref}++; | |||
| 3975 | } elsif ($ret =~ /^\[[^\]]*\]$/) { | |||||
| 3976 | 0 | 0 | $return_types{arrayref}++; | |||
| 3977 | } else { | |||||
| 3978 | 85 | 108 | $return_types{scalar}++; | |||
| 3979 | } | |||||
| 3980 | } | |||||
| 3981 | } | |||||
| 3982 | ||||||
| 3983 | # Determine most common return type | |||||
| 3984 | 252 | 303 | if (keys %return_types) { | |||
| 3985 | 248 36 | 382 64 | my ($most_common) = sort { $return_types{$b} <=> $return_types{$a} } keys %return_types; | |||
| 3986 | # Prefer integer over scalar if numeric returns dominate | |||||
| 3987 | 248 | 417 | if ($return_types{integer} && (!$return_types{string})) { | |||
| 3988 | 101 | 152 | if (!$output->{type} || $output->{type} eq 'scalar') { | |||
| 3989 | 101 | 129 | $output->{type} = 'integer'; | |||
| 3990 | 101 | 139 | $self->_log(" OUTPUT: Numeric returns dominate, forcing integer"); | |||
| 3991 | 101 | 242 | $output->{_type_confidence} ||= 'low'; | |||
| 3992 | 101 | 120 | if(defined($min)) { | |||
| 3993 | 0 | 0 | $output->{min} = $min; | |||
| 3994 | } | |||||
| 3995 | } | |||||
| 3996 | } | |||||
| 3997 | 248 | 302 | unless ($output->{type}) { | |||
| 3998 | 114 | 110 | $output->{type} = $most_common; | |||
| 3999 | ||||||
| 4000 | # Assign confidence for inferred numeric expressions | |||||
| 4001 | 114 | 121 | if ($most_common eq 'number') { | |||
| 4002 | 24 | 60 | $output->{_type_confidence} ||= 'medium'; | |||
| 4003 | 24 | 23 | if(defined($min)) { | |||
| 4004 | 0 | 0 | $output->{min} = $min; | |||
| 4005 | } | |||||
| 4006 | } | |||||
| 4007 | ||||||
| 4008 | 114 | 150 | $self->_log(" OUTPUT: Inferred type from code: $most_common"); | |||
| 4009 | } | |||||
| 4010 | } | |||||
| 4011 | ||||||
| 4012 | # Check for consistent single value returns | |||||
| 4013 | 252 | 677 | if (@return_statements == 1 && $return_statements[0] eq '1') { | |||
| 4014 | 18 | 20 | $output->{value} = 1; | |||
| 4015 | 18 | 39 | $output->{type} = 'boolean' if !$output->{type} || $output->{type} eq 'scalar'; | |||
| 4016 | 18 | 38 | $self->_log(" OUTPUT: Type already set to '$output->{type}', overriding with boolean") if($output->{'type'}); | |||
| 4017 | } | |||||
| 4018 | } else { | |||||
| 4019 | # No explicit return - might return nothing or implicit undef | |||||
| 4020 | 18 | 20 | $self->_log(" OUTPUT: No explicit return statement found"); | |||
| 4021 | } | |||||
| 4022 | } | |||||
| 4023 | } | |||||
| 4024 | ||||||
| 4025 | # -------------------------------------------------- | |||||
| 4026 | # _enhance_boolean_detection | |||||
| 4027 | # | |||||
| 4028 | # Purpose: Apply additional boolean-specific | |||||
| 4029 | # detection heuristics using a weighted | |||||
| 4030 | # scoring system, to override weak | |||||
| 4031 | # type assignments when there is strong | |||||
| 4032 | # evidence of a boolean return. | |||||
| 4033 | # | |||||
| 4034 | # Entry: $output - output hashref | |||||
| 4035 | # (modified in place). | |||||
| 4036 | # $pod - POD string. | |||||
| 4037 | # $code - method body source string. | |||||
| 4038 | # $method_name - method name string. | |||||
| 4039 | # | |||||
| 4040 | # Exit: Returns nothing. Modifies $output | |||||
| 4041 | # in place, setting type to 'boolean' | |||||
| 4042 | # if the score reaches | |||||
| 4043 | # $BOOLEAN_SCORE_THRESHOLD. | |||||
| 4044 | # | |||||
| 4045 | # Side effects: Logs scoring details to stdout when | |||||
| 4046 | # verbose is set. | |||||
| 4047 | # | |||||
| 4048 | # Notes: Only fires when output type is | |||||
| 4049 | # not yet set or is 'unknown'. Does not | |||||
| 4050 | # override explicitly set types. | |||||
| 4051 | # -------------------------------------------------- | |||||
| 4052 | sub _enhance_boolean_detection { | |||||
| 4053 | 269 | 354 | my ($self, $output, $pod, $code, $method_name) = @_; | |||
| 4054 | ||||||
| 4055 | 269 | 222 | my $boolean_score = 0; # Track evidence for boolean return | |||
| 4056 | ||||||
| 4057 | 269 | 565 | return unless !$output->{type} || $output->{type} eq 'unknown'; | |||
| 4058 | ||||||
| 4059 | # Look for stronger boolean indicators | |||||
| 4060 | 24 | 35 | if ($pod && !$output->{type}) { | |||
| 4061 | # Common boolean return patterns in POD | |||||
| 4062 | 3 | 6 | if ($pod =~ /returns?\s+(true|false|true|false|1|0)\s+(?:on|for|upon)\s+(success|failure|error|valid|invalid)/i) { | |||
| 4063 | 0 | 0 | $boolean_score += 30; | |||
| 4064 | 0 | 0 | $self->_log(' OUTPUT: Strong boolean indicator in POD (+30)'); | |||
| 4065 | } | |||||
| 4066 | ||||||
| 4067 | # Check for method names that suggest boolean returns | |||||
| 4068 | 3 | 9 | if ($pod =~ /(?:method|sub)\s+(\w+)/) { | |||
| 4069 | 0 | 0 | my $inferred_method_name = $1; | |||
| 4070 | 0 | 0 | if ($inferred_method_name =~ /^(is_|has_|can_|should_|contains_|exists_)/) { | |||
| 4071 | 0 | 0 | $boolean_score += 20; | |||
| 4072 | 0 | 0 | $self->_log(" OUTPUT: Inferred method name '$inferred_method_name' suggests boolean return (+20)"); | |||
| 4073 | } | |||||
| 4074 | } | |||||
| 4075 | } | |||||
| 4076 | ||||||
| 4077 | # Analyze code for boolean patterns | |||||
| 4078 | 24 | 28 | if ($code) { | |||
| 4079 | # Count boolean return idioms | |||||
| 4080 | 24 | 38 | my $true_returns = () = $code =~ /return\s+1\s*;/g; | |||
| 4081 | 24 | 28 | my $false_returns = () = $code =~ /return\s+0\s*;/g; | |||
| 4082 | ||||||
| 4083 | 24 | 44 | if ($true_returns + $false_returns >= 2) { | |||
| 4084 | 0 | 0 | $boolean_score += 40; | |||
| 4085 | 0 | 0 | $self->_log(' OUTPUT: Multiple 1/0 returns suggest boolean (+40)'); | |||
| 4086 | } elsif ($true_returns + $false_returns == 1) { | |||||
| 4087 | 2 | 2 | $boolean_score += 10; | |||
| 4088 | 2 | 2 | $self->_log(' OUTPUT: Single 1/0 return (+10)'); | |||
| 4089 | } | |||||
| 4090 | ||||||
| 4091 | # Ternary operators that return booleans | |||||
| 4092 | 24 | 34 | if ($code =~ /return\s+(?:\w+\s*[!=]=\s*\w+|\w+\s*>\s*\w+|\w+\s*<\s*\w+)\s*\?\s*(?:1|0)\s*:\s*(?:1|0)/) { | |||
| 4093 | 0 | 0 | $boolean_score += 25; | |||
| 4094 | 0 | 0 | $self->_log(' OUTPUT: Ternary with 1/0 suggests boolean (+25)'); | |||
| 4095 | } | |||||
| 4096 | ||||||
| 4097 | # Check for common boolean method patterns | |||||
| 4098 | 24 | 48 | if ($code =~ /return\s+[!\$\@\%]/) { | |||
| 4099 | # Returns negation or existence check | |||||
| 4100 | 0 | 0 | $boolean_score += 15; | |||
| 4101 | 0 | 0 | $self->_log(' OUTPUT: Returns negation/existence check (+15)'); | |||
| 4102 | } | |||||
| 4103 | } | |||||
| 4104 | ||||||
| 4105 | # Check method name for boolean indicators | |||||
| 4106 | 24 | 36 | if ($method_name) { | |||
| 4107 | 24 | 35 | if ($method_name =~ /^(is_|has_|can_|should_|contains_|exists_|check_|verify_|validate_)/) { | |||
| 4108 | 2 | 2 | $boolean_score += 25; | |||
| 4109 | 2 | 3 | $self->_log(" OUTPUT: Method name '$method_name' suggests boolean return (+25)"); | |||
| 4110 | } | |||||
| 4111 | 24 | 31 | if ($method_name =~ /_ok$/) { | |||
| 4112 | 0 | 0 | $boolean_score += 30; | |||
| 4113 | 0 | 0 | $self->_log(" OUTPUT: Method name '$method_name' ends with '_ok' (+30)"); | |||
| 4114 | } | |||||
| 4115 | } | |||||
| 4116 | ||||||
| 4117 | # Apply boolean type if we have strong evidence | |||||
| 4118 | # Override weak type assignments (like 'array' from false positive) | |||||
| 4119 | 24 | 43 | if($boolean_score >= $BOOLEAN_SCORE_THRESHOLD) { | |||
| 4120 | 2 | 6 | if (!$output->{type} || $output->{type} eq 'scalar' || $output->{type} eq 'array' || $output->{type} eq 'undef') { | |||
| 4121 | 2 | 4 | my $old_type = $output->{type} || 'none'; | |||
| 4122 | 2 | 3 | $output->{type} = 'boolean'; | |||
| 4123 | 2 | 3 | $self->_log(" OUTPUT: Boolean score $boolean_score >= $BOOLEAN_SCORE_THRESHOLD, setting type to boolean (was: $old_type)"); | |||
| 4124 | } | |||||
| 4125 | } | |||||
| 4126 | } | |||||
| 4127 | ||||||
| 4128 | # -------------------------------------------------- | |||||
| 4129 | # _detect_list_context | |||||
| 4130 | # | |||||
| 4131 | # Purpose: Detect methods that return different | |||||
| 4132 | # values depending on calling context | |||||
| 4133 | # via wantarray, and methods that | |||||
| 4134 | # return explicit lists. | |||||
| 4135 | # | |||||
| 4136 | # Entry: $output - output hashref (modified | |||||
| 4137 | # in place). | |||||
| 4138 | # $code - method body source string. | |||||
| 4139 | # | |||||
| 4140 | # Exit: Returns nothing. Modifies $output | |||||
| 4141 | # in place, setting _context_aware, | |||||
| 4142 | # _list_context, _scalar_context, | |||||
| 4143 | # _list_return, and/or type keys. | |||||
| 4144 | # | |||||
| 4145 | # Side effects: Logs detections to stdout when | |||||
| 4146 | # verbose is set. | |||||
| 4147 | # -------------------------------------------------- | |||||
| 4148 | sub _detect_list_context { | |||||
| 4149 | 268 | 268 | my ($self, $output, $code) = @_; | |||
| 4150 | 268 | 259 | return unless $code; | |||
| 4151 | ||||||
| 4152 | # Check for wantarray usage | |||||
| 4153 | 268 | 374 | if ($code =~ /wantarray/) { | |||
| 4154 | 5 | 6 | $output->{_context_aware} = 1; | |||
| 4155 | 5 | 6 | $self->_log(' OUTPUT: Method uses wantarray - context sensitive'); | |||
| 4156 | ||||||
| 4157 | # Debug: show what we're matching against | |||||
| 4158 | 5 | 10 | if ($code =~ /(wantarray[^;]+;)/s) { | |||
| 4159 | 4 | 9 | $self->_log(" DEBUG wantarray line: $1"); | |||
| 4160 | } | |||||
| 4161 | ||||||
| 4162 | 5 | 19 | if ($code =~ /wantarray\s*\?\s*\(([^)]+)\)\s*:\s*([^;]+)/s) { | |||
| 4163 | # Pattern 1: wantarray ? (list, items) : scalar_value (with parens) | |||||
| 4164 | 1 | 2 | my ($list_return, $scalar_return) = ($1, $2); | |||
| 4165 | 1 | 2 | $self->_log(" DEBUG list (with parens): [$list_return], scalar: [$scalar_return]"); | |||
| 4166 | ||||||
| 4167 | 1 | 1 | $output->{_list_context} = $self->_infer_type_from_expression($list_return); | |||
| 4168 | 1 | 1 | $output->{_scalar_context} = $self->_infer_type_from_expression($scalar_return); | |||
| 4169 | 1 | 2 | $self->_log(' OUTPUT: Detected context-dependent returns (parenthesized)'); | |||
| 4170 | } elsif ($code =~ /wantarray\s*\?\s*([^:]+?)\s*:\s*([^;]+)/s) { | |||||
| 4171 | # Pattern 2: wantarray ? @array : scalar (no parens around list) | |||||
| 4172 | 3 | 6 | my ($list_return, $scalar_return) = ($1, $2); | |||
| 4173 | # Clean up | |||||
| 4174 | 3 | 4 | $list_return =~ s/^\s+|\s+$//g; | |||
| 4175 | 3 | 6 | $scalar_return =~ s/^\s+|\s+$//g; | |||
| 4176 | ||||||
| 4177 | 3 | 4 | $self->_log(" DEBUG list (no parens): [$list_return], scalar: [$scalar_return]"); | |||
| 4178 | ||||||
| 4179 | 3 | 4 | $output->{_list_context} = $self->_infer_type_from_expression($list_return); | |||
| 4180 | 3 | 3 | $output->{_scalar_context} = $self->_infer_type_from_expression($scalar_return); | |||
| 4181 | 3 | 3 | $self->_log(' OUTPUT: Detected context-dependent returns (non-parenthesized)'); | |||
| 4182 | } elsif ($code =~ /return[^;]*unless\s+wantarray.*?return\s*\(([^)]+)\)/s) { | |||||
| 4183 | # Pattern 3: return unless wantarray; return (list); | |||||
| 4184 | 1 | 1 | $output->{_list_context} = { type => 'array' }; | |||
| 4185 | 1 | 2 | $self->_log(' OUTPUT: Detected list context return after wantarray check'); | |||
| 4186 | } | |||||
| 4187 | } | |||||
| 4188 | ||||||
| 4189 | # Detect explicit list returns (multiple values in parentheses) | |||||
| 4190 | # Avoid false positives from function calls | |||||
| 4191 | 268 | 421 | if ($code =~ /return\s*\(\s*([^)]+)\s*\)\s*;/) { | |||
| 4192 | 2 | 2 | my $content = $1; | |||
| 4193 | # Count commas outside of nested structures | |||||
| 4194 | 2 | 2 | my $comma_count = 0; | |||
| 4195 | 2 | 2 | my $depth = 0; | |||
| 4196 | 2 | 5 | for my $char (split //, $content) { | |||
| 4197 | 52 | 65 | $depth++ if $char eq '(' || $char eq '[' || $char eq '{'; | |||
| 4198 | 52 | 64 | $depth-- if $char eq ')' || $char eq ']' || $char eq '}'; | |||
| 4199 | 52 | 47 | $comma_count++ if $char eq ',' && $depth == 0; | |||
| 4200 | } | |||||
| 4201 | ||||||
| 4202 | 2 | 8 | if ($comma_count > 0 && $content !~ /\b(?:bless|new)\b/) { | |||
| 4203 | # Multiple values returned | |||||
| 4204 | 2 | 4 | unless ($output->{type} && $output->{type} eq 'boolean') { | |||
| 4205 | 2 | 2 | $output->{type} = 'array'; | |||
| 4206 | 2 | 3 | $output->{_list_return} = $comma_count + 1; | |||
| 4207 | 2 | 4 | $self->_log(' OUTPUT: Returns list of ' . ($comma_count + 1) . ' values'); | |||
| 4208 | } | |||||
| 4209 | } | |||||
| 4210 | } | |||||
| 4211 | } | |||||
| 4212 | ||||||
| 4213 | # -------------------------------------------------- | |||||
| 4214 | # _detect_void_context | |||||
| 4215 | # | |||||
| 4216 | # Purpose: Detect methods that return nothing | |||||
| 4217 | # meaningful (void context), methods | |||||
| 4218 | # that always return 1 as a success | |||||
| 4219 | # indicator, and methods whose name | |||||
| 4220 | # suggests void context (setters, | |||||
| 4221 | # mutators, loggers). | |||||
| 4222 | # | |||||
| 4223 | # Entry: $output - output hashref | |||||
| 4224 | # (modified in place). | |||||
| 4225 | # $code - method body source string. | |||||
| 4226 | # $method_name - method name string. | |||||
| 4227 | # | |||||
| 4228 | # Exit: Returns nothing. Modifies $output | |||||
| 4229 | # in place, setting _void_context, | |||||
| 4230 | # _success_indicator, and/or type. | |||||
| 4231 | # | |||||
| 4232 | # Side effects: Logs detections to stdout when | |||||
| 4233 | # verbose is set. | |||||
| 4234 | # -------------------------------------------------- | |||||
| 4235 | sub _detect_void_context { | |||||
| 4236 | 269 | 269 | my ($self, $output, $code, $method_name) = @_; | |||
| 4237 | 269 | 276 | return unless $code; | |||
| 4238 | ||||||
| 4239 | 269 | 353 | $self->_log(" DEBUG _detect_void_context called for $method_name"); | |||
| 4240 | ||||||
| 4241 | # Methods that typically don't return meaningful values | |||||
| 4242 | 269 | 1033 | my $void_patterns = { | |||
| 4243 | 'setter' => qr/^set_\w+$/, | |||||
| 4244 | 'mutator' => qr/^(?:add|remove|delete|clear|reset|update)_/, | |||||
| 4245 | 'logger' => qr/^(?:log|debug|warn|error|info)$/, | |||||
| 4246 | 'printer' => qr/^(?:print|say|dump)_/, | |||||
| 4247 | }; | |||||
| 4248 | ||||||
| 4249 | # Check if method name suggests void context | |||||
| 4250 | 269 | 340 | foreach my $type (keys %$void_patterns) { | |||
| 4251 | 1063 | 1770 | if ($method_name =~ $void_patterns->{$type}) { | |||
| 4252 | 9 | 8 | $output->{_void_context_hint} = $type; | |||
| 4253 | 9 | 13 | $self->_log(" OUTPUT: Method name suggests $type (typically void context)"); | |||
| 4254 | 9 | 8 | last; | |||
| 4255 | } | |||||
| 4256 | } | |||||
| 4257 | ||||||
| 4258 | # Analyze return statements | |||||
| 4259 | 269 | 598 | my @returns = $code =~ /return\s*([^;]*);/g; | |||
| 4260 | ||||||
| 4261 | 269 | 484 | $self->_log(' DEBUG Found ' . scalar(@returns) . ' return statements'); | |||
| 4262 | ||||||
| 4263 | # Count different return patterns | |||||
| 4264 | 269 | 239 | my $no_value_returns = 0; | |||
| 4265 | 269 | 184 | my $true_returns = 0; | |||
| 4266 | 269 | 185 | my $self_returns = 0; | |||
| 4267 | ||||||
| 4268 | 269 | 263 | foreach my $ret (@returns) { | |||
| 4269 | 280 | 472 | $ret =~ s/^\s+|\s+$//g; | |||
| 4270 | 280 | 375 | $self->_log(" DEBUG return value: [$ret]"); | |||
| 4271 | 280 | 297 | $no_value_returns++ if $ret eq ''; | |||
| 4272 | 280 | 301 | $no_value_returns++ if($ret =~ /^(if|unless)\s/); | |||
| 4273 | 280 | 260 | $true_returns++ if $ret eq '1'; | |||
| 4274 | 280 | 282 | $self_returns++ if $ret eq '$self'; | |||
| 4275 | 280 | 301 | if ($ret =~ /\?\s*1\s*:\s*0\b/) { | |||
| 4276 | # Strong boolean signal: ternary returning 1/0 | |||||
| 4277 | 8 | 4 | $true_returns++; | |||
| 4278 | # $self->_log(" OUTPUT: Ternary 1:0 return detected, treating as boolean (+40)"); | |||||
| 4279 | 8 | 8 | $self->_log(' OUTPUT: Ternary 1:0 return detected, treating as boolean'); | |||
| 4280 | } | |||||
| 4281 | } | |||||
| 4282 | ||||||
| 4283 | 269 | 241 | my $total_returns = scalar(@returns); | |||
| 4284 | ||||||
| 4285 | 269 | 432 | $self->_log(" DEBUG no_value=$no_value_returns, true=$true_returns, self=$self_returns, total=$total_returns"); | |||
| 4286 | ||||||
| 4287 | # Void context indicators | |||||
| 4288 | 269 | 914 | if ($no_value_returns > 0 && $no_value_returns == $total_returns) { | |||
| 4289 | 6 | 7 | $output->{_void_context} = 1; | |||
| 4290 | 6 | 5 | $output->{type} = 'void'; # This should override any previous type | |||
| 4291 | 6 | 6 | $self->_log(' OUTPUT: All returns are empty - void context method'); | |||
| 4292 | } elsif ($true_returns > 0 && $true_returns == $total_returns && $total_returns >= 1) { | |||||
| 4293 | # Methods that always return true (success indicator) | |||||
| 4294 | 27 | 26 | $output->{_success_indicator} = 1; | |||
| 4295 | # Don't override type if already set to boolean | |||||
| 4296 | 27 | 58 | unless ($output->{type} && $output->{type} eq 'boolean') { | |||
| 4297 | 2 | 2 | $output->{type} = 'boolean'; | |||
| 4298 | } | |||||
| 4299 | 27 | 29 | $self->_log(' OUTPUT: Always returns 1 - success indicator pattern'); | |||
| 4300 | } | |||||
| 4301 | } | |||||
| 4302 | ||||||
| 4303 | # -------------------------------------------------- | |||||
| 4304 | # _detect_chaining_pattern | |||||
| 4305 | # | |||||
| 4306 | # Purpose: Detect methods that return $self for | |||||
| 4307 | # fluent interface chaining, by counting | |||||
| 4308 | # the proportion of return statements | |||||
| 4309 | # that return $self. | |||||
| 4310 | # | |||||
| 4311 | # Entry: $output - output hashref (modified | |||||
| 4312 | # in place). | |||||
| 4313 | # $code - method body source string. | |||||
| 4314 | # | |||||
| 4315 | # Exit: Returns nothing. Modifies $output | |||||
| 4316 | # in place, setting type to 'object', | |||||
| 4317 | # _returns_self to 1, and isa to the | |||||
| 4318 | # current package name when the | |||||
| 4319 | # proportion of $self returns is >= 0.8. | |||||
| 4320 | # | |||||
| 4321 | # Side effects: Logs detection to stdout when | |||||
| 4322 | # verbose is set. | |||||
| 4323 | # -------------------------------------------------- | |||||
| 4324 | sub _detect_chaining_pattern { | |||||
| 4325 | 268 | 266 | my ($self, $output, $code) = @_; | |||
| 4326 | 268 | 224 | return unless $code; | |||
| 4327 | ||||||
| 4328 | # Count returns of $self | |||||
| 4329 | 268 | 208 | my $self_returns = 0; | |||
| 4330 | 268 | 170 | my $total_returns = 0; | |||
| 4331 | ||||||
| 4332 | 268 | 517 | while ($code =~ /return\s+([^;]+);/g) { | |||
| 4333 | 274 | 258 | my $ret = $1; | |||
| 4334 | 274 | 425 | $ret =~ s/^\s+|\s+$//g; | |||
| 4335 | 274 | 179 | $total_returns++; | |||
| 4336 | 274 | 367 | $self_returns++ if $ret eq '$self'; | |||
| 4337 | } | |||||
| 4338 | ||||||
| 4339 | # If most/all returns are $self, it's a chaining method | |||||
| 4340 | 268 | 365 | if ($self_returns > 0 && $total_returns > 0) { | |||
| 4341 | 8 | 9 | my $ratio = $self_returns / $total_returns; | |||
| 4342 | ||||||
| 4343 | 8 | 11 | if ($ratio >= 0.8) { | |||
| 4344 | 6 | 6 | $output->{type} = 'object'; | |||
| 4345 | 6 | 6 | $output->{_returns_self} = 1; | |||
| 4346 | ||||||
| 4347 | # Get the class name | |||||
| 4348 | 6 | 9 | if ($self->{_document}) { | |||
| 4349 | 5 | 6 | my $pkg = $self->{_document}->find_first('PPI::Statement::Package'); | |||
| 4350 | 5 | 358 | $output->{isa} = $pkg ? $pkg->namespace : 'UNKNOWN'; | |||
| 4351 | 5 | 58 | $self->{_package_name} //= $output->{isa}; | |||
| 4352 | } | |||||
| 4353 | ||||||
| 4354 | 6 | 11 | $self->_log(" OUTPUT: Chainable method - returns \$self ($self_returns/$total_returns returns)"); | |||
| 4355 | } | |||||
| 4356 | } | |||||
| 4357 | } | |||||
| 4358 | ||||||
| 4359 | # -------------------------------------------------- | |||||
| 4360 | # _detect_error_conventions | |||||
| 4361 | # | |||||
| 4362 | # Purpose: Analyse how a method signals errors | |||||
| 4363 | # by detecting patterns such as | |||||
| 4364 | # 'return undef if', implicit bare | |||||
| 4365 | # returns, empty list returns, 0/1 | |||||
| 4366 | # boolean error patterns, and eval | |||||
| 4367 | # exception handling. | |||||
| 4368 | # | |||||
| 4369 | # Entry: $output - output hashref (modified | |||||
| 4370 | # in place). | |||||
| 4371 | # $code - method body source string. | |||||
| 4372 | # | |||||
| 4373 | # Exit: Returns nothing. Modifies $output | |||||
| 4374 | # in place, setting _error_handling, | |||||
| 4375 | # _error_return, and | |||||
| 4376 | # _success_failure_pattern keys. | |||||
| 4377 | # | |||||
| 4378 | # Side effects: Logs detections to stdout when | |||||
| 4379 | # verbose is set. | |||||
| 4380 | # -------------------------------------------------- | |||||
| 4381 | sub _detect_error_conventions { | |||||
| 4382 | 269 | 256 | my ($self, $output, $code) = @_; | |||
| 4383 | ||||||
| 4384 | 269 | 225 | return unless $code; | |||
| 4385 | ||||||
| 4386 | 269 | 304 | $self->_log(' DEBUG _detect_error_conventions called'); | |||
| 4387 | ||||||
| 4388 | 269 | 188 | my %error_patterns; | |||
| 4389 | ||||||
| 4390 | # Pattern 1: return undef if/unless condition | |||||
| 4391 | 269 | 394 | while ($code =~ /return\s+undef\s+(?:if|unless)\s+([^;]+);/g) { | |||
| 4392 | 7 7 | 4 11 | push @{$error_patterns{undef_on_error}}, $1; | |||
| 4393 | 7 | 7 | $self->_log(" DEBUG Found 'return undef' pattern"); | |||
| 4394 | } | |||||
| 4395 | ||||||
| 4396 | # Pattern 2: return if/unless (implicit undef) | |||||
| 4397 | 269 | 430 | while ($code =~ /return\s+(?:if|unless)\s+([^;]+);/g) { | |||
| 4398 | 7 7 | 5 10 | push @{$error_patterns{implicit_undef}}, $1; | |||
| 4399 | 7 | 7 | $self->_log(" DEBUG Found implicit undef pattern"); | |||
| 4400 | } | |||||
| 4401 | ||||||
| 4402 | # Pattern 3: return () - matches with or without conditions | |||||
| 4403 | 269 | 372 | if ($code =~ /return\s*\(\s*\)\s*(?:if|unless|;)/) { | |||
| 4404 | 3 | 4 | $error_patterns{empty_list} = 1; | |||
| 4405 | 3 | 3 | $self->_log(" DEBUG Found empty list return"); | |||
| 4406 | } | |||||
| 4407 | ||||||
| 4408 | # Pattern 4: return 0/1 pattern (indicates boolean with error handling) | |||||
| 4409 | 269 | 209 | my $zero_returns = 0; | |||
| 4410 | 269 | 180 | my $one_returns = 0; | |||
| 4411 | # Match "return 0" or "return 1" followed by anything (condition or semicolon) | |||||
| 4412 | 269 | 457 | while ($code =~ /return\s+(0|1)\s*(?:;|if|unless)/g) { | |||
| 4413 | 26 | 51 | if ($1 eq '0') { | |||
| 4414 | 3 | 4 | $zero_returns++; | |||
| 4415 | } else { | |||||
| 4416 | 23 | 22 | $one_returns++; | |||
| 4417 | } | |||||
| 4418 | } | |||||
| 4419 | ||||||
| 4420 | 269 | 336 | if ($zero_returns > 0 && $one_returns > 0) { | |||
| 4421 | 2 | 2 | $error_patterns{zero_on_error} = 1; | |||
| 4422 | 2 | 4 | $self->_log(" DEBUG Found 0/1 return pattern ($zero_returns zeros, $one_returns ones)"); | |||
| 4423 | } | |||||
| 4424 | ||||||
| 4425 | # Pattern 5: Exception handling with eval | |||||
| 4426 | 269 | 307 | if ($code =~ /eval\s*\{/) { | |||
| 4427 | # Check if there's error handling after eval | |||||
| 4428 | 3 | 10 | if ($code =~ /eval\s*\{.*?\}[^}]*(?:if\s*\(\s*\$\@|catch|return\s+undef)/s) { | |||
| 4429 | 3 | 3 | $error_patterns{exception_handling} = 1; | |||
| 4430 | 3 | 3 | $self->_log(' DEBUG Found exception handling with eval'); | |||
| 4431 | } | |||||
| 4432 | } | |||||
| 4433 | ||||||
| 4434 | # Detect success/failure return pattern | |||||
| 4435 | 269 | 437 | my @all_returns = $code =~ /return\s+([^;]+);/g; | |||
| 4436 | 269 278 | 255 368 | my $has_undef = grep { /^\s*undef\s*(?:if|unless|$)/ } @all_returns; | |||
| 4437 | 269 278 | 221 603 | my $has_value = grep { !/^\s*undef\s*$/ && !/^\s*$/ } @all_returns; | |||
| 4438 | ||||||
| 4439 | 269 | 318 | if ($has_undef && $has_value && scalar(@all_returns) >= 2) { | |||
| 4440 | 6 | 7 | $output->{_success_failure_pattern} = 1; | |||
| 4441 | 6 | 6 | $self->_log(" OUTPUT: Uses success/failure return pattern"); | |||
| 4442 | } | |||||
| 4443 | ||||||
| 4444 | # Store error conventions in output | |||||
| 4445 | 269 | 283 | if(scalar(keys %error_patterns)) { | |||
| 4446 | 17 | 18 | $output->{_error_handling} = \%error_patterns; | |||
| 4447 | ||||||
| 4448 | # Determine primary error convention | |||||
| 4449 | 17 | 26 | if ($error_patterns{undef_on_error}) { | |||
| 4450 | 5 | 5 | $output->{_error_return} = 'undef'; | |||
| 4451 | 5 | 5 | $self->_log(" OUTPUT: Returns undef on error"); | |||
| 4452 | } elsif ($error_patterns{implicit_undef}) { | |||||
| 4453 | 6 | 6 | $output->{_error_return} = 'undef'; | |||
| 4454 | 6 | 6 | $self->_log(" OUTPUT: Returns implicit undef on error"); | |||
| 4455 | } elsif ($error_patterns{empty_list}) { | |||||
| 4456 | 3 | 4 | $output->{_error_return} = 'empty_list'; | |||
| 4457 | 3 | 2 | $self->_log(" OUTPUT: Returns empty list on error"); | |||
| 4458 | } elsif ($error_patterns{zero_on_error}) { | |||||
| 4459 | 2 | 2 | $output->{_error_return} = 'false'; | |||
| 4460 | 2 | 2 | $self->_log(" OUTPUT: Returns 0/false on error"); | |||
| 4461 | } | |||||
| 4462 | ||||||
| 4463 | 17 | 27 | if ($error_patterns{exception_handling}) { | |||
| 4464 | 3 | 3 | $self->_log(" OUTPUT: Has exception handling"); | |||
| 4465 | } | |||||
| 4466 | } else { | |||||
| 4467 | 252 | 302 | delete $output->{_error_handling}; | |||
| 4468 | } | |||||
| 4469 | } | |||||
| 4470 | ||||||
| 4471 | # -------------------------------------------------- | |||||
| 4472 | # _infer_type_from_expression | |||||
| 4473 | # | |||||
| 4474 | # Purpose: Infer the data type of a return | |||||
| 4475 | # expression string by matching it | |||||
| 4476 | # against common Perl literal and | |||||
| 4477 | # variable patterns. | |||||
| 4478 | # | |||||
| 4479 | # Entry: $expr - return expression string, | |||||
| 4480 | # trimmed of leading and | |||||
| 4481 | # trailing whitespace. | |||||
| 4482 | # May be undef. | |||||
| 4483 | # | |||||
| 4484 | # Exit: Returns a type hashref of the form | |||||
| 4485 | # { type => '...' } and optionally | |||||
| 4486 | # { min => N }. Defaults to | |||||
| 4487 | # { type => 'scalar' } when no | |||||
| 4488 | # pattern matches. | |||||
| 4489 | # | |||||
| 4490 | # Side effects: None. | |||||
| 4491 | # -------------------------------------------------- | |||||
| 4492 | sub _infer_type_from_expression { | |||||
| 4493 | 37 | 673 | my ($self, $expr) = @_; | |||
| 4494 | ||||||
| 4495 | 37 | 43 | return { type => 'scalar' } unless defined $expr; | |||
| 4496 | ||||||
| 4497 | 35 | 69 | $expr =~ s/^\s+|\s+$//g; | |||
| 4498 | ||||||
| 4499 | # Check for multiple comma-separated values (indicates array/list) | |||||
| 4500 | 35 | 63 | if ($expr =~ /,/) { | |||
| 4501 | 5 | 4 | my $comma_count = 0; | |||
| 4502 | 5 | 6 | my $depth = 0; | |||
| 4503 | 5 | 11 | for my $char (split //, $expr) { | |||
| 4504 | 75 | 60 | $depth++ if $char =~ /[\(\[\{]/; | |||
| 4505 | 75 | 55 | $depth-- if $char =~ /[\)\]\}]/; | |||
| 4506 | 75 | 69 | $comma_count++ if $char eq ',' && $depth == 0; | |||
| 4507 | } | |||||
| 4508 | ||||||
| 4509 | 5 | 8 | if ($comma_count > 0) { | |||
| 4510 | 3 | 7 | return { type => 'array' }; | |||
| 4511 | } | |||||
| 4512 | } | |||||
| 4513 | ||||||
| 4514 | # Check for @ prefix (array) | |||||
| 4515 | 32 | 77 | if ($expr =~ /^\@\w+/ || $expr =~ /^qw\(/ || $expr =~ /^\@\{/) { | |||
| 4516 | 6 | 16 | return { type => 'array' }; | |||
| 4517 | } | |||||
| 4518 | ||||||
| 4519 | # Check for scalar() function - returns count | |||||
| 4520 | 26 | 28 | if ($expr =~ /scalar\s*\(/) { | |||
| 4521 | 4 | 9 | return { type => 'integer', min => 0 }; | |||
| 4522 | } | |||||
| 4523 | ||||||
| 4524 | # Check for array reference | |||||
| 4525 | 22 | 40 | if ($expr =~ /^\[/ || $expr =~ /^\\\@/) { | |||
| 4526 | 4 | 9 | return { type => 'arrayref' }; | |||
| 4527 | } | |||||
| 4528 | ||||||
| 4529 | # Check for hash reference | |||||
| 4530 | 18 | 26 | if ($expr =~ /^\{/ || $expr =~ /^\\\%/) { | |||
| 4531 | 3 | 6 | return { type => 'hashref' }; | |||
| 4532 | } | |||||
| 4533 | ||||||
| 4534 | # Check for hash | |||||
| 4535 | 15 | 24 | if ($expr =~ /^\%\w+/ || $expr =~ /^\%\{/) { | |||
| 4536 | 0 | 0 | return { type => 'hash' }; | |||
| 4537 | } | |||||
| 4538 | ||||||
| 4539 | # Check for strings | |||||
| 4540 | 15 | 30 | if ($expr =~ /^['"]/ || $expr =~ /['"]$/) { | |||
| 4541 | 2 | 4 | return { type => 'string' }; | |||
| 4542 | } | |||||
| 4543 | ||||||
| 4544 | # Check for booleans first â must come before the integer check | |||||
| 4545 | # since /^-?\d+$/ would otherwise match 0 and 1 as integers | |||||
| 4546 | 13 | 16 | if($expr =~ /^[01]$/) { | |||
| 4547 | 4 | 8 | return { type => 'boolean' }; | |||
| 4548 | } | |||||
| 4549 | ||||||
| 4550 | # Check for integers | |||||
| 4551 | 9 | 16 | if($expr =~ /^-?\d+$/) { | |||
| 4552 | 4 | 8 | return { type => 'integer' }; | |||
| 4553 | } | |||||
| 4554 | ||||||
| 4555 | 5 | 7 | if ($expr =~ /^-?\d+\.\d+$/) { | |||
| 4556 | 2 | 5 | return { type => 'number' }; | |||
| 4557 | } | |||||
| 4558 | ||||||
| 4559 | # Check for objects | |||||
| 4560 | 3 | 5 | if ($expr =~ /bless/) { | |||
| 4561 | 0 | 0 | return { type => 'object' }; | |||
| 4562 | } | |||||
| 4563 | ||||||
| 4564 | 3 | 10 | if($expr =~ /\blength\s*\(/) { | |||
| 4565 | 2 | 3 | return { type => 'integer', min => 0 }; | |||
| 4566 | } | |||||
| 4567 | ||||||
| 4568 | # Default to scalar | |||||
| 4569 | 1 | 1 | return { type => 'scalar' }; | |||
| 4570 | } | |||||
| 4571 | ||||||
| 4572 | # -------------------------------------------------- | |||||
| 4573 | # _detect_chaining_from_pod | |||||
| 4574 | # | |||||
| 4575 | # Purpose: Check POD documentation for explicit | |||||
| 4576 | # indications that a method is chainable | |||||
| 4577 | # or part of a fluent interface. | |||||
| 4578 | # | |||||
| 4579 | # Entry: $output - output hashref (modified | |||||
| 4580 | # in place). | |||||
| 4581 | # $pod - POD string for the method. | |||||
| 4582 | # | |||||
| 4583 | # Exit: Returns nothing. Sets _returns_self | |||||
| 4584 | # in $output if chaining keywords are | |||||
| 4585 | # found. | |||||
| 4586 | # | |||||
| 4587 | # Side effects: Logs detection to stdout when | |||||
| 4588 | # verbose is set. | |||||
| 4589 | # -------------------------------------------------- | |||||
| 4590 | sub _detect_chaining_from_pod { | |||||
| 4591 | 5 | 14 | my ($self, $output, $pod) = @_; | |||
| 4592 | 5 | 6 | return unless $pod; | |||
| 4593 | ||||||
| 4594 | # Look for explicit chaining documentation | |||||
| 4595 | 4 | 22 | if ($pod =~ /returns?\s+(?:\$)?self\b/i || | |||
| 4596 | $pod =~ /chainable/i || | |||||
| 4597 | $pod =~ /fluent\s+interface/i || | |||||
| 4598 | $pod =~ /method\s+chaining/i) { | |||||
| 4599 | ||||||
| 4600 | 3 | 4 | $output->{_returns_self} = 1; | |||
| 4601 | 3 | 4 | $self->_log(" OUTPUT: POD indicates chainable/fluent interface"); | |||
| 4602 | } | |||||
| 4603 | } | |||||
| 4604 | ||||||
| 4605 | # -------------------------------------------------- | |||||
| 4606 | # _validate_output | |||||
| 4607 | # | |||||
| 4608 | # Purpose: Apply basic sanity checks to the | |||||
| 4609 | # assembled output hashref and warn | |||||
| 4610 | # about suspicious type combinations, | |||||
| 4611 | # normalising clearly invalid types to | |||||
| 4612 | # 'string'. | |||||
| 4613 | # | |||||
| 4614 | # Entry: $output - output hashref (modified | |||||
| 4615 | # in place). | |||||
| 4616 | # | |||||
| 4617 | # Exit: Returns nothing. May modify type key | |||||
| 4618 | # in $output. Logs warnings to stdout | |||||
| 4619 | # when verbose is set. | |||||
| 4620 | # | |||||
| 4621 | # Side effects: None. | |||||
| 4622 | # -------------------------------------------------- | |||||
| 4623 | sub _validate_output { | |||||
| 4624 | 260 | 2575 | my ($self, $output) = @_; | |||
| 4625 | ||||||
| 4626 | # Warn about suspicious combinations | |||||
| 4627 | 260 | 542 | if (defined $output->{type} && $output->{type} eq 'boolean' && !defined($output->{value})) { | |||
| 4628 | 12 | 20 | $self->_log(' WARNING Boolean type without value - may want to set value: 1'); | |||
| 4629 | } | |||||
| 4630 | 260 | 344 | if ($output->{value} && defined $output->{type} && $output->{type} ne 'boolean') { | |||
| 4631 | 0 | 0 | $self->_log(" WARNING Value set but type is not boolean: $output->{type}"); | |||
| 4632 | } | |||||
| 4633 | 260 2080 | 290 1826 | my %valid_types = map { $_ => 1 } qw(string integer number boolean arrayref hashref object void); | |||
| 4634 | 260 | 358 | if(exists $output->{type}) { | |||
| 4635 | 256 | 397 | if(!$valid_types{$output->{type}}) { | |||
| 4636 | 56 | 84 | $self->_log(" WARNING Output value type is unknown: '$output->{type}', setting to string"); | |||
| 4637 | 56 | 91 | $output->{type} = 'string'; | |||
| 4638 | } | |||||
| 4639 | } | |||||
| 4640 | } | |||||
| 4641 | ||||||
| 4642 | # -------------------------------------------------- | |||||
| 4643 | # _parse_constraints | |||||
| 4644 | # | |||||
| 4645 | # Purpose: Parse a constraint string extracted | |||||
| 4646 | # from POD documentation and populate | |||||
| 4647 | # min, max, or other constraint fields | |||||
| 4648 | # in a parameter hashref. | |||||
| 4649 | # | |||||
| 4650 | # Entry: $param - hashref for the parameter | |||||
| 4651 | # being annotated (modified | |||||
| 4652 | # in place). | |||||
| 4653 | # $constraint - the constraint string, | |||||
| 4654 | # e.g. '3-50', 'positive', | |||||
| 4655 | # '>= 0', 'min 3'. | |||||
| 4656 | # | |||||
| 4657 | # Exit: Returns nothing. Modifies $param in | |||||
| 4658 | # place by setting min and/or max keys. | |||||
| 4659 | # | |||||
| 4660 | # Side effects: Logs min/max values to stdout when | |||||
| 4661 | # verbose is set. | |||||
| 4662 | # -------------------------------------------------- | |||||
| 4663 | sub _parse_constraints { | |||||
| 4664 | 22 | 2923 | my ($self, $param, $constraint) = @_; | |||
| 4665 | ||||||
| 4666 | # Range: "3-50" or "1-100 chars" | |||||
| 4667 | 22 | 94 | if ($constraint =~ /(\d+)\s*-\s*(\d+)/) { | |||
| 4668 | 8 | 11 | $param->{min} = $1; | |||
| 4669 | 8 | 9 | $param->{max} = $2; | |||
| 4670 | } | |||||
| 4671 | elsif ($constraint =~ /(\d+)\s*\.\.\s*(\d+)/) { | |||||
| 4672 | # Range: 0..19 | |||||
| 4673 | 2 | 3 | $param->{min} = $1; | |||
| 4674 | 2 | 3 | $param->{max} = $2; | |||
| 4675 | } | |||||
| 4676 | # Minimum: "min 3" or "at least 5" | |||||
| 4677 | elsif ($constraint =~ /(?:min|minimum|at least)\s*(\d+)/i) { | |||||
| 4678 | 4 | 6 | $param->{min} = $1; | |||
| 4679 | } | |||||
| 4680 | # Maximum: "max 50" or "up to 100" | |||||
| 4681 | elsif ($constraint =~ /(?:max|maximum|up to)\s*(\d+)/i) { | |||||
| 4682 | 3 | 4 | $param->{max} = $1; | |||
| 4683 | } | |||||
| 4684 | # Positive | |||||
| 4685 | elsif ($constraint =~ /positive/i) { | |||||
| 4686 | 2 | 5 | $param->{min} = 1 if $param->{type} && $param->{type} eq 'integer'; | |||
| 4687 | 2 | 5 | $param->{min} = 0.01 if $param->{type} && $param->{type} eq 'number'; | |||
| 4688 | } | |||||
| 4689 | # Non-negative | |||||
| 4690 | elsif ($constraint =~ /non-negative/i) { | |||||
| 4691 | 2 | 3 | $param->{min} = 0; | |||
| 4692 | } elsif($constraint =~ /(.+)?\s(.+)/) { | |||||
| 4693 | 0 | 0 | my ($op, $val) = ($1, $2); | |||
| 4694 | 0 | 0 | if(looks_like_number($val)) { | |||
| 4695 | 0 | 0 | if ($op eq '<') { | |||
| 4696 | 0 | 0 | $param->{max} = $val - 1; | |||
| 4697 | } elsif ($op eq '<=') { | |||||
| 4698 | 0 | 0 | $param->{max} = $val; | |||
| 4699 | } elsif ($op eq '>') { | |||||
| 4700 | 0 | 0 | $param->{min} = $val + 1; | |||
| 4701 | } elsif ($op eq '>=') { | |||||
| 4702 | 0 | 0 | $param->{min} = $val; | |||
| 4703 | } | |||||
| 4704 | } | |||||
| 4705 | } | |||||
| 4706 | ||||||
| 4707 | 22 | 27 | if(defined($param->{max})) { | |||
| 4708 | 13 | 26 | $self->_log(" Set max to $param->{max}"); | |||
| 4709 | } | |||||
| 4710 | 22 | 26 | if(defined($param->{min})) { | |||
| 4711 | 18 | 24 | $self->_log(" Set min to $param->{min}"); | |||
| 4712 | } | |||||
| 4713 | } | |||||
| 4714 | ||||||
| 4715 | # -------------------------------------------------- | |||||
| 4716 | # _analyze_code | |||||
| 4717 | # | |||||
| 4718 | # Purpose: Analyse a method's source code using | |||||
| 4719 | # pattern matching to infer parameter | |||||
| 4720 | # names, types, constraints, defaults, | |||||
| 4721 | # and optionality. Orchestrates all | |||||
| 4722 | # per-parameter code analysis helpers. | |||||
| 4723 | # | |||||
| 4724 | # Entry: $code - method body source string. | |||||
| 4725 | # $method - method hashref (used for | |||||
| 4726 | # constructor-specific logic | |||||
| 4727 | # when extracting parameters | |||||
| 4728 | # from @_ patterns). | |||||
| 4729 | # | |||||
| 4730 | # Exit: Returns a hashref of parameter name | |||||
| 4731 | # to parameter spec hashref, with as | |||||
| 4732 | # much type and constraint information | |||||
| 4733 | # as could be inferred from the code. | |||||
| 4734 | # | |||||
| 4735 | # Side effects: Logs progress and warnings to stdout | |||||
| 4736 | # when verbose is set. | |||||
| 4737 | # | |||||
| 4738 | # Notes: Analysis is capped at max_parameters | |||||
| 4739 | # to prevent runaway processing on | |||||
| 4740 | # pathological methods. Falls back to | |||||
| 4741 | # classic @_ extraction if signature | |||||
| 4742 | # extraction found no parameters. | |||||
| 4743 | # -------------------------------------------------- | |||||
| 4744 | sub _analyze_code { | |||||
| 4745 | 268 | 2001 | my ($self, $code, $method) = @_; | |||
| 4746 | ||||||
| 4747 | 268 | 206 | my %params; | |||
| 4748 | ||||||
| 4749 | # Safety check - limit parameter analysis to prevent runaway processing | |||||
| 4750 | 268 | 197 | my $param_count = 0; | |||
| 4751 | ||||||
| 4752 | # Extract parameter names from various signature styles | |||||
| 4753 | 268 | 439 | $self->_extract_parameters_from_signature(\%params, $code); | |||
| 4754 | ||||||
| 4755 | 268 | 473 | $self->_extract_defaults_from_code(\%params, $code, $method); | |||
| 4756 | ||||||
| 4757 | # Infer types from defaults | |||||
| 4758 | 268 | 308 | foreach my $param (keys %params) { | |||
| 4759 | 171 | 243 | if ($params{$param}{_default} && !$params{$param}{type}) { | |||
| 4760 | 18 | 11 | my $default = $params{$param}{_default}; | |||
| 4761 | 18 | 21 | if (ref($default) eq 'HASH') { | |||
| 4762 | 2 | 2 | $params{$param}{type} = 'hashref'; | |||
| 4763 | 2 | 2 | $self->_log(" CODE: $param type inferred as hashref from default"); | |||
| 4764 | } elsif (ref($default) eq 'ARRAY') { | |||||
| 4765 | 1 | 1 | $params{$param}{type} = 'arrayref'; | |||
| 4766 | 1 | 1 | $self->_log(" CODE: $param type inferred as arrayref from default"); | |||
| 4767 | } | |||||
| 4768 | } | |||||
| 4769 | } | |||||
| 4770 | ||||||
| 4771 | 268 | 523 | if($code =~ /(croak|die)\(.*\)\s+if\s*\(\s*scalar\(\@_\)\s*<\s*(\d+)\s*\)/s) { | |||
| 4772 | 0 | 0 | my $required_count = $2; | |||
| 4773 | 0 0 | 0 0 | my @param_names = sort { $params{$a}{position} <=> $params{$b}{position} } keys %params; | |||
| 4774 | 0 | 0 | for my $i (0 .. $required_count-1) { | |||
| 4775 | 0 | 0 | $params{$param_names[$i]}{optional} = 0; | |||
| 4776 | 0 | 0 | $self->_log(" CODE: $param_names[$i] marked required due to croak scalar check"); | |||
| 4777 | } | |||||
| 4778 | } elsif ($code =~ /(croak|die)\(.*\)\s+if\s*\(\s*scalar\(\@_\)\s*==\s*(0)\s*\)/s) { | |||||
| 4779 | 0 | 0 | foreach my $param (keys %params) { | |||
| 4780 | 0 | 0 | $params{$param}{optional} = 0; | |||
| 4781 | 0 | 0 | $self->_log(" CODE: $param: all parameters are required due to 'scalar(@_) == 0' check"); | |||
| 4782 | } | |||||
| 4783 | } | |||||
| 4784 | ||||||
| 4785 | # Analyze each parameter (with safety limit) | |||||
| 4786 | 268 | 273 | foreach my $param (keys %params) { | |||
| 4787 | 171 | 216 | if ($param_count++ > $self->{max_parameters}) { | |||
| 4788 | 0 | 0 | $self->_log(" WARNING: Max parameters ($self->{max_parameters}) exceeded, skipping remaining"); | |||
| 4789 | 0 | 0 | last; | |||
| 4790 | } | |||||
| 4791 | ||||||
| 4792 | 171 | 157 | my $p = \$params{$param}; | |||
| 4793 | ||||||
| 4794 | 171 | 235 | $self->_analyze_parameter_type($p, $param, $code); | |||
| 4795 | 171 | 356 | $self->_analyze_parameter_constraints($p, $param, $code); | |||
| 4796 | 171 | 325 | $self->_analyze_parameter_validation($p, $param, $code); | |||
| 4797 | 171 | 272 | $self->_analyze_advanced_types($p, $param, $code); | |||
| 4798 | ||||||
| 4799 | # Defined checks | |||||
| 4800 | 171 | 2149 | if ($code =~ /defined\s*\(\s*\$$param\s*\)/) { | |||
| 4801 | 1 | 2 | $$p->{optional} = 0; | |||
| 4802 | 1 | 1 | $self->_log(" CODE: $param is required (defined check)"); | |||
| 4803 | } | |||||
| 4804 | ||||||
| 4805 | # Determine optional/required and numeric type from code | |||||
| 4806 | 171 | 8915 | if ($code =~ /\s*\$$param\s*(?:\/\/|\|\|)=/) { | |||
| 4807 | # e.g. $var //= 5; or $var ||= 5; | |||||
| 4808 | 7 | 8 | $$p->{optional} = 1; | |||
| 4809 | 7 | 10 | $self->_log(" CODE: $param is optional (default value assigned in code)"); | |||
| 4810 | } elsif ($code =~ /\s*\$$param\s*(?:[\+\-\*\/%]|(?:\+\+)|(?:--)|(?:[\+\-\*\/%]=)|\+\$|\$[+-])/ ) { | |||||
| 4811 | # Covers arithmetic usage: | |||||
| 4812 | # $x + $param, $param++, $param--, $x += $param, $x -= $param, etc. | |||||
| 4813 | 36 | 117 | $$p->{optional} = 0; | |||
| 4814 | 36 | 104 | $$p->{type} //= 'number'; | |||
| 4815 | 36 | 59 | $self->_log(" CODE: $param is required (used in arithmetic context)"); | |||
| 4816 | } elsif ($code =~ /\$\b$param\b\s*(?:\+0|\*1)/) { | |||||
| 4817 | # Forces numeric context, e.g., "$param + 0" or "$param * 1" | |||||
| 4818 | 0 | 0 | $$p->{optional} = 0; | |||
| 4819 | 0 | 0 | $$p->{type} //= 'number'; | |||
| 4820 | 0 | 0 | $self->_log(" CODE: $param is required (numeric context)"); | |||
| 4821 | } | |||||
| 4822 | ||||||
| 4823 | # Required parameter checks (undef causes error) | |||||
| 4824 | ||||||
| 4825 | # Style 1: block form | |||||
| 4826 | 171 | 4990 | if ($code =~ /if\s*\(\s*!\s*defined\s*\(\s*\$$param\s*\)\s*\)\s*\{([^}]+)\}/s) { | |||
| 4827 | 0 | 0 | my $block = $1; | |||
| 4828 | 0 | 0 | if ($block =~ /\b(croak|die|confess)\b/) { | |||
| 4829 | 0 | 0 | $$p->{optional} = 0; | |||
| 4830 | 0 | 0 | $self->_log(" CODE: $param is required (undef causes error)"); | |||
| 4831 | } | |||||
| 4832 | } | |||||
| 4833 | ||||||
| 4834 | # Style 2: postfix unless | |||||
| 4835 | 171 | 5480 | if ($code =~ /\b(croak|die|confess)\b[^;]*\bunless\s+defined\s*\(\s*\$$param\s*\)/) { | |||
| 4836 | 0 | 0 | $$p->{optional} = 0; | |||
| 4837 | 0 | 0 | $self->_log(" CODE: $param is required (postfix undef check)"); | |||
| 4838 | } | |||||
| 4839 | ||||||
| 4840 | # Exists checks for hash keys | |||||
| 4841 | 171 | 2124 | if ($code =~ /exists\s*\(\s*\$$param\s*\)/) { | |||
| 4842 | 0 | 0 | $$p->{type} = 'hashkey'; | |||
| 4843 | 0 | 0 | $self->_log(" CODE: $param is a hash key"); | |||
| 4844 | } | |||||
| 4845 | ||||||
| 4846 | # Scalar context for arrays | |||||
| 4847 | 171 | 2094 | if ($code =~ /scalar\s*\(\s*\@?\$$param\s*\)/) { | |||
| 4848 | 0 | 0 | $$p->{type} = 'array'; | |||
| 4849 | 0 | 0 | $self->_log(" CODE: $param used in scalar context (array)"); | |||
| 4850 | } | |||||
| 4851 | ||||||
| 4852 | 171 | 238 | $self->_extract_error_constraints($p, $param, $code); | |||
| 4853 | } | |||||
| 4854 | ||||||
| 4855 | 268 | 296 | return \%params; | |||
| 4856 | } | |||||
| 4857 | ||||||
| 4858 | # -------------------------------------------------- | |||||
| 4859 | # _analyze_parameter_type | |||||
| 4860 | # | |||||
| 4861 | # Purpose: Infer the type of a single parameter | |||||
| 4862 | # from ref() checks, isa() calls, | |||||
| 4863 | # bless patterns, array/hash operations, | |||||
| 4864 | # and numeric operator usage in the | |||||
| 4865 | # method body. | |||||
| 4866 | # | |||||
| 4867 | # Entry: $p_ref - reference to the parameter | |||||
| 4868 | # hashref (modified in place | |||||
| 4869 | # via the referenced hash). | |||||
| 4870 | # $param - parameter name string. | |||||
| 4871 | # $code - method body source string. | |||||
| 4872 | # | |||||
| 4873 | # Exit: Returns nothing. Modifies the | |||||
| 4874 | # referenced parameter hashref. | |||||
| 4875 | # | |||||
| 4876 | # Side effects: Logs detections to stdout when | |||||
| 4877 | # verbose is set. | |||||
| 4878 | # -------------------------------------------------- | |||||
| 4879 | sub _analyze_parameter_type { | |||||
| 4880 | 175 | 183 | my ($self, $p_ref, $param, $code) = @_; | |||
| 4881 | 175 | 161 | my $p = $$p_ref; | |||
| 4882 | ||||||
| 4883 | # Type inference from ref() checks | |||||
| 4884 | 175 | 13216 | if ($code =~ /ref\s*\(\s*\$$param\s*\)\s*eq\s*['"](ARRAY|HASH|SCALAR)['"]/gi) { | |||
| 4885 | 4 | 6 | my $reftype = lc($1); | |||
| 4886 | 4 | 8 | $p->{type} = $reftype eq 'array' ? 'arrayref' : | |||
| 4887 | $reftype eq 'hash' ? 'hashref' : | |||||
| 4888 | 'scalar'; | |||||
| 4889 | 4 | 8 | $self->_log(" CODE: $param is $p->{type} (ref check)"); | |||
| 4890 | } | |||||
| 4891 | # ISA checks for objects | |||||
| 4892 | elsif ($code =~ /\$$param\s*->\s*isa\s*\(\s*['"]([^'"]+)['"]\s*\)/i) { | |||||
| 4893 | 3 | 6 | $p->{type} = 'object'; | |||
| 4894 | 3 | 5 | $p->{isa} = $1; | |||
| 4895 | 3 | 9 | $self->_log(" CODE: $param is object of class $1"); | |||
| 4896 | } | |||||
| 4897 | # Blessed references | |||||
| 4898 | elsif ($code =~ /bless\s+.*\$$param/) { | |||||
| 4899 | 0 | 0 | $p->{type} = 'object'; | |||
| 4900 | 0 | 0 | $self->_log(" CODE: $param is blessed object"); | |||
| 4901 | } | |||||
| 4902 | # Array/hash operations | |||||
| 4903 | 175 | 347 | if (!$p->{type}) { | |||
| 4904 | 151 | 5726 | if ($code =~ /\@\{\s*\$$param\s*\}/ || $code =~ /push\s*\(\s*\@?\$$param/) { | |||
| 4905 | 0 | 0 | $p->{type} = 'arrayref'; | |||
| 4906 | } elsif ($code =~ /\%\{\s*\$$param\s*\}/ || $code =~ /\$$param\s*->\s*\{/) { | |||||
| 4907 | 3 | 5 | $p->{type} = 'hashref'; | |||
| 4908 | } | |||||
| 4909 | } | |||||
| 4910 | ||||||
| 4911 | # Infer type from the default value if type is unknown | |||||
| 4912 | 175 | 353 | if (!$p->{type} && exists $p->{_default}) { | |||
| 4913 | 20 | 14 | my $default = $p->{_default}; | |||
| 4914 | 20 | 28 | if (ref($default) eq 'HASH') { | |||
| 4915 | 0 | 0 | $p->{type} = 'hashref'; | |||
| 4916 | 0 | 0 | $self->_log(" CODE: $param type inferred as hashref from default"); | |||
| 4917 | } elsif (ref($default) eq 'ARRAY') { | |||||
| 4918 | 0 | 0 | $p->{type} = 'arrayref'; | |||
| 4919 | 0 | 0 | $self->_log(" CODE: $param type inferred as arrayref from default"); | |||
| 4920 | } | |||||
| 4921 | } | |||||
| 4922 | ||||||
| 4923 | # ------------------------------------------------------------ | |||||
| 4924 | # Heuristic numeric inference (low confidence) | |||||
| 4925 | # ------------------------------------------------------------ | |||||
| 4926 | 175 | 220 | if (!$p->{type}) { | |||
| 4927 | # Numeric operators: + - * / % ** | |||||
| 4928 | 148 | 7246 | if ( | |||
| 4929 | $code =~ /\$$param\s*[\+\-\*\/%]/ || | |||||
| 4930 | $code =~ /[\+\-\*\/%]\s*\$$param/ || | |||||
| 4931 | $code =~ /\bint\s*\(\s*\$$param\s*\)/ || | |||||
| 4932 | $code =~ /\babs\s*\(\s*\$$param\s*\)/ | |||||
| 4933 | ) { | |||||
| 4934 | 47 | 54 | $p->{type} = 'number'; | |||
| 4935 | 47 | 53 | $p->{_type_confidence} = 'heuristic'; | |||
| 4936 | 47 | 71 | $self->_log(" CODE: $param inferred as number (numeric operator)"); | |||
| 4937 | } | |||||
| 4938 | # Numeric comparison | |||||
| 4939 | elsif ( | |||||
| 4940 | $code =~ /\$$param\s*(?:==|!=|<=|>=|<|>)/ || | |||||
| 4941 | $code =~ /(?:==|!=|<=|>=|<|>)\s*\$$param/ | |||||
| 4942 | ) { | |||||
| 4943 | 14 | 23 | $p->{type} = 'number'; | |||
| 4944 | 14 | 18 | $p->{_type_confidence} = 'heuristic'; | |||
| 4945 | 14 | 27 | $self->_log(" CODE: $param inferred as number (numeric comparison)"); | |||
| 4946 | } | |||||
| 4947 | } | |||||
| 4948 | } | |||||
| 4949 | ||||||
| 4950 | # -------------------------------------------------- | |||||
| 4951 | # _analyze_advanced_types | |||||
| 4952 | # | |||||
| 4953 | # Purpose: Apply enhanced type detection to a | |||||
| 4954 | # single parameter, checking for | |||||
| 4955 | # DateTime objects, file handles, | |||||
| 4956 | # coderefs, and enum-like constraints | |||||
| 4957 | # beyond what basic type inference | |||||
| 4958 | # can determine. | |||||
| 4959 | # | |||||
| 4960 | # Entry: $p_ref - reference to the parameter | |||||
| 4961 | # hashref (modified in place | |||||
| 4962 | # via the referenced hash). | |||||
| 4963 | # $param - the parameter name string. | |||||
| 4964 | # $code - method body source string. | |||||
| 4965 | # | |||||
| 4966 | # Exit: Returns nothing. Modifies the | |||||
| 4967 | # referenced parameter hashref in place. | |||||
| 4968 | # | |||||
| 4969 | # Side effects: Logs detections to stdout when | |||||
| 4970 | # verbose is set. | |||||
| 4971 | # | |||||
| 4972 | # Notes: Delegates to four specialised | |||||
| 4973 | # detectors: _detect_datetime_type, | |||||
| 4974 | # _detect_filehandle_type, | |||||
| 4975 | # _detect_coderef_type, and | |||||
| 4976 | # _detect_enum_type. Each detector | |||||
| 4977 | # returns early on first match so | |||||
| 4978 | # detectors are implicitly prioritised | |||||
| 4979 | # in that order. | |||||
| 4980 | # -------------------------------------------------- | |||||
| 4981 | sub _analyze_advanced_types { | |||||
| 4982 | 171 | 176 | my ($self, $p_ref, $param, $code) = @_; | |||
| 4983 | ||||||
| 4984 | # Dereference once to get the hash reference | |||||
| 4985 | 171 | 129 | my $p = $$p_ref; | |||
| 4986 | ||||||
| 4987 | # Now pass the dereferenced hash to the detection methods | |||||
| 4988 | 171 | 272 | $self->_detect_datetime_type($p, $param, $code); | |||
| 4989 | 171 | 319 | $self->_detect_filehandle_type($p, $param, $code); | |||
| 4990 | 171 | 298 | $self->_detect_coderef_type($p, $param, $code); | |||
| 4991 | 171 | 212 | $self->_detect_enum_type($p, $param, $code); | |||
| 4992 | } | |||||
| 4993 | ||||||
| 4994 | # -------------------------------------------------- | |||||
| 4995 | # _detect_datetime_type | |||||
| 4996 | # | |||||
| 4997 | # Purpose: Detect DateTime objects, Time::Piece | |||||
| 4998 | # objects, date strings, ISO 8601 | |||||
| 4999 | # strings, and UNIX timestamps by | |||||
| 5000 | # analysing code patterns involving | |||||
| 5001 | # the parameter. | |||||
| 5002 | # | |||||
| 5003 | # Entry: $p - parameter hashref (modified | |||||
| 5004 | # in place). | |||||
| 5005 | # $param - parameter name string. | |||||
| 5006 | # $code - method body source string. | |||||
| 5007 | # | |||||
| 5008 | # Exit: Returns nothing. Modifies $p in place, | |||||
| 5009 | # setting type, isa, semantic, min, | |||||
| 5010 | # matches, and/or format keys. | |||||
| 5011 | # Returns immediately on first match. | |||||
| 5012 | # | |||||
| 5013 | # Side effects: Logs detections to stdout when | |||||
| 5014 | # verbose is set. | |||||
| 5015 | # -------------------------------------------------- | |||||
| 5016 | sub _detect_datetime_type { | |||||
| 5017 | 173 | 157 | my ($self, $p, $param, $code) = @_; | |||
| 5018 | ||||||
| 5019 | # Validate param is just a simple word | |||||
| 5020 | 173 | 449 | return unless defined $param && $param =~ /^\w+$/; | |||
| 5021 | ||||||
| 5022 | # DateTime object detection via isa/UNIVERSAL checks | |||||
| 5023 | 173 | 5013 | if ($code =~ /\$$param\s*->\s*isa\s*\(\s*['"]DateTime['"]\s*\)/i) { | |||
| 5024 | 2 | 4 | $p->{type} = 'object'; | |||
| 5025 | 2 | 6 | $p->{isa} = 'DateTime'; | |||
| 5026 | 2 | 2 | $p->{semantic} = 'datetime_object'; | |||
| 5027 | 2 | 5 | $self->_log(" ADVANCED: $param is DateTime object"); | |||
| 5028 | 2 | 2 | return; | |||
| 5029 | } | |||||
| 5030 | ||||||
| 5031 | # Check for DateTime method calls | |||||
| 5032 | 171 | 2890 | if ($code =~ /\$$param\s*->\s*(ymd|dmy|mdy|hms|iso8601|epoch|strftime)/) { | |||
| 5033 | 1 | 2 | $p->{type} = 'object'; | |||
| 5034 | 1 | 2 | $p->{isa} = 'DateTime'; | |||
| 5035 | 1 | 1 | $p->{semantic} = 'datetime_object'; | |||
| 5036 | 1 | 3 | $self->_log(" ADVANCED: $param uses DateTime methods"); | |||
| 5037 | 1 | 1 | return; | |||
| 5038 | } | |||||
| 5039 | ||||||
| 5040 | # Time::Piece detection | |||||
| 5041 | 170 | 6602 | if ($code =~ /\$$param\s*->\s*isa\s*\(\s*['"]Time::Piece['"]\s*\)/i || | |||
| 5042 | $code =~ /\$$param\s*->\s*(strftime|epoch|year|mon|mday)/) { | |||||
| 5043 | 0 | 0 | $p->{type} = 'object'; | |||
| 5044 | 0 | 0 | $p->{isa} = 'Time::Piece'; | |||
| 5045 | 0 | 0 | $p->{semantic} = 'timepiece_object'; | |||
| 5046 | 0 | 0 | $self->_log(" ADVANCED: $param is Time::Piece object"); | |||
| 5047 | 0 | 0 | return; | |||
| 5048 | } | |||||
| 5049 | ||||||
| 5050 | # String date/time patterns via regex matching | |||||
| 5051 | 170 | 1863 | if ($code =~ /\$$param\s*=~\s*\/.*?\\d\{4\}.*?\\d\{2\}.*?\\d\{2\}/) { | |||
| 5052 | 1 | 2 | $p->{type} = 'string'; | |||
| 5053 | 1 | 2 | $p->{semantic} = 'date_string'; | |||
| 5054 | 1 | 1 | $p->{format} = 'YYYY-MM-DD or similar'; | |||
| 5055 | 1 | 2 | $self->_log(" ADVANCED: $param validated as date string pattern"); | |||
| 5056 | 1 | 2 | return; | |||
| 5057 | } | |||||
| 5058 | ||||||
| 5059 | # ISO 8601 date pattern | |||||
| 5060 | 169 | 1861 | if ($code =~ /\$$param\s*=~\s*\/.*?[Tt].*?[Zz].*?\//) { | |||
| 5061 | 1 | 2 | $p->{type} = 'string'; | |||
| 5062 | 1 | 2 | $p->{semantic} = 'iso8601_string'; | |||
| 5063 | 1 | 1 | $p->{matches} = '/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z?$/'; | |||
| 5064 | 1 | 3 | $self->_log(" ADVANCED: $param validated as ISO 8601 datetime"); | |||
| 5065 | 1 | 3 | return; | |||
| 5066 | } | |||||
| 5067 | ||||||
| 5068 | # UNIX timestamp detection (numeric with specific range) | |||||
| 5069 | 168 | 5865 | if ($code =~ /\$$param\s*>\s*\d{9,}/ || # UNIX timestamps are 10+ digits | |||
| 5070 | $code =~ /time\(\s*\)\s*-\s*\$$param/ || | |||||
| 5071 | $code =~ /\$$param\s*-\s*time\(\s*\)/) { | |||||
| 5072 | 2 | 3 | $p->{type} = 'integer'; | |||
| 5073 | 2 | 3 | $p->{semantic} = 'unix_timestamp'; | |||
| 5074 | 2 | 2 | $p->{min} = 0; | |||
| 5075 | 2 | 5 | $self->_log(" ADVANCED: $param appears to be UNIX timestamp"); | |||
| 5076 | 2 | 4 | return; | |||
| 5077 | } | |||||
| 5078 | ||||||
| 5079 | # Date parsing with strptime or similar | |||||
| 5080 | 166 | 5473 | if ($code =~ /strptime\s*\(\s*\$$param/ || | |||
| 5081 | $code =~ /DateTime::Format::\w+\s*->\s*parse_datetime\s*\(\s*\$$param/) { | |||||
| 5082 | 0 | 0 | $p->{type} = 'string'; | |||
| 5083 | 0 | 0 | $p->{semantic} = 'datetime_parseable'; | |||
| 5084 | 0 | 0 | $self->_log(" ADVANCED: $param is parsed as datetime"); | |||
| 5085 | 0 | 0 | return; | |||
| 5086 | } | |||||
| 5087 | } | |||||
| 5088 | ||||||
| 5089 | # -------------------------------------------------- | |||||
| 5090 | # _detect_filehandle_type | |||||
| 5091 | # | |||||
| 5092 | # Purpose: Detect file handle parameters and | |||||
| 5093 | # file path string parameters by | |||||
| 5094 | # analysing I/O operations, file test | |||||
| 5095 | # operators, and path manipulation | |||||
| 5096 | # patterns involving the parameter. | |||||
| 5097 | # | |||||
| 5098 | # Entry: $p - parameter hashref (modified | |||||
| 5099 | # in place). | |||||
| 5100 | # $param - parameter name string. | |||||
| 5101 | # $code - method body source string. | |||||
| 5102 | # | |||||
| 5103 | # Exit: Returns nothing. Modifies $p in place, | |||||
| 5104 | # setting type, isa, and semantic keys. | |||||
| 5105 | # Returns immediately on first match. | |||||
| 5106 | # | |||||
| 5107 | # Side effects: Logs detections to stdout when | |||||
| 5108 | # verbose is set. | |||||
| 5109 | # -------------------------------------------------- | |||||
| 5110 | sub _detect_filehandle_type { | |||||
| 5111 | 173 | 174 | my ($self, $p, $param, $code) = @_; | |||
| 5112 | ||||||
| 5113 | 173 | 382 | return unless defined $param && $param =~ /^\w+$/; | |||
| 5114 | ||||||
| 5115 | # File handle operations | |||||
| 5116 | 173 | 3867 | if ($code =~ /(?:open|close|read|print|say|sysread|syswrite)\s*\(?\s*\$$param/) { | |||
| 5117 | 2 | 3 | $p->{type} = 'object'; | |||
| 5118 | 2 | 3 | $p->{isa} = 'IO::Handle'; | |||
| 5119 | 2 | 3 | $p->{semantic} = 'filehandle'; | |||
| 5120 | 2 | 4 | $self->_log(" ADVANCED: $param is a file handle"); | |||
| 5121 | 2 | 4 | return; | |||
| 5122 | } | |||||
| 5123 | ||||||
| 5124 | # Filehandle-specific operations | |||||
| 5125 | 171 | 2919 | if ($code =~ /\$$param\s*->\s*(readline|getline|print|say|close|flush|autoflush)/) { | |||
| 5126 | 1 | 3 | $p->{type} = 'object'; | |||
| 5127 | 1 | 2 | $p->{isa} = 'IO::Handle'; | |||
| 5128 | 1 | 2 | $p->{semantic} = 'filehandle'; | |||
| 5129 | 1 | 4 | $self->_log(" ADVANCED: $param uses filehandle methods"); | |||
| 5130 | 1 | 3 | return; | |||
| 5131 | } | |||||
| 5132 | ||||||
| 5133 | # File test operators | |||||
| 5134 | 170 | 1989 | if ($code =~ /(?:-[frwxoOeszlpSbctugkTBMAC])\s+\$$param/) { | |||
| 5135 | 2 | 3 | $p->{type} = 'string'; | |||
| 5136 | 2 | 3 | $p->{semantic} = 'filepath'; | |||
| 5137 | 2 | 4 | $self->_log(" ADVANCED: $param is tested as file path"); | |||
| 5138 | 2 | 3 | return; | |||
| 5139 | } | |||||
| 5140 | ||||||
| 5141 | # File::Spec operations or path manipulation | |||||
| 5142 | 168 | 5809 | if ($code =~ /File::(?:Spec|Basename)::\w+\s*\(\s*\$$param/ || | |||
| 5143 | $code =~ /(?:basename|dirname|fileparse)\s*\(\s*\$$param/) { | |||||
| 5144 | 0 | 0 | $p->{type} = 'string'; | |||
| 5145 | 0 | 0 | $p->{semantic} = 'filepath'; | |||
| 5146 | 0 | 0 | $self->_log(" ADVANCED: $param manipulated as file path"); | |||
| 5147 | 0 | 0 | return; | |||
| 5148 | } | |||||
| 5149 | ||||||
| 5150 | # Path validation patterns | |||||
| 5151 | # Only match a literal path assigned or defaulted to this variable | |||||
| 5152 | 168 | 343 | if(defined $p->{_default} && $p->{_default} =~ m{^([A-Za-z]:\\|/|\./|\.\./)}) { | |||
| 5153 | 0 | 0 | $p->{type} = 'string'; | |||
| 5154 | 0 | 0 | $p->{semantic} = 'filepath'; | |||
| 5155 | 0 | 0 | $self->_log(" ADVANCED: $param default looks like a path"); | |||
| 5156 | 0 | 0 | return; | |||
| 5157 | } | |||||
| 5158 | ||||||
| 5159 | # IO::File detection | |||||
| 5160 | 168 | 6501 | if ($code =~ /\$$param\s*->\s*isa\s*\(\s*['"]IO::File['"]\s*\)/ || | |||
| 5161 | $code =~ /IO::File\s*->\s*new\s*\(\s*\$$param/) { | |||||
| 5162 | 0 | 0 | $p->{type} = 'object'; | |||
| 5163 | 0 | 0 | $p->{isa} = 'IO::File'; | |||
| 5164 | 0 | 0 | $p->{semantic} = 'filehandle'; | |||
| 5165 | 0 | 0 | $self->_log(" ADVANCED: $param is IO::File object"); | |||
| 5166 | 0 | 0 | return; | |||
| 5167 | } | |||||
| 5168 | } | |||||
| 5169 | ||||||
| 5170 | # -------------------------------------------------- | |||||
| 5171 | # _detect_coderef_type | |||||
| 5172 | # | |||||
| 5173 | # Purpose: Detect coderef and callback parameters | |||||
| 5174 | # by analysing ref() checks, invocation | |||||
| 5175 | # patterns, and parameter naming | |||||
| 5176 | # conventions. | |||||
| 5177 | # | |||||
| 5178 | # Entry: $p - parameter hashref (modified | |||||
| 5179 | # in place). | |||||
| 5180 | # $param - parameter name string. | |||||
| 5181 | # $code - method body source string. | |||||
| 5182 | # | |||||
| 5183 | # Exit: Returns nothing. Modifies $p in place, | |||||
| 5184 | # setting type and semantic keys. | |||||
| 5185 | # Returns immediately on first match. | |||||
| 5186 | # | |||||
| 5187 | # Side effects: Logs detections to stdout when | |||||
| 5188 | # verbose is set. | |||||
| 5189 | # -------------------------------------------------- | |||||
| 5190 | sub _detect_coderef_type { | |||||
| 5191 | 174 | 181 | my ($self, $p, $param, $code) = @_; | |||
| 5192 | ||||||
| 5193 | 174 | 363 | return unless defined $param && $param =~ /^\w+$/; | |||
| 5194 | ||||||
| 5195 | # ref() check for CODE | |||||
| 5196 | 174 | 4508 | if ($code =~ /ref\s*\(\s*\$$param\s*\)\s*eq\s*['"]CODE['"]/i) { | |||
| 5197 | 2 | 2 | $p->{type} = 'coderef'; | |||
| 5198 | 2 | 3 | $p->{semantic} = 'callback'; | |||
| 5199 | 2 | 6 | $self->_log(" ADVANCED: $param is coderef (ref check)"); | |||
| 5200 | 2 | 2 | return; | |||
| 5201 | } | |||||
| 5202 | ||||||
| 5203 | # Invocation as coderef - note the escaped @ in \@_ | |||||
| 5204 | 172 | 6118 | if ($code =~ /\$$param\s*->\s*\(/ || | |||
| 5205 | $code =~ /\$$param\s*->\s*\(\s*\@_\s*\)/ || | |||||
| 5206 | $code =~ /&\s*\{\s*\$$param\s*\}/) { | |||||
| 5207 | 2 | 3 | $p->{type} = 'coderef'; | |||
| 5208 | 2 | 2 | $p->{semantic} = 'callback'; | |||
| 5209 | 2 | 4 | $self->_log(" ADVANCED: $param invoked as coderef"); | |||
| 5210 | 2 | 3 | return; | |||
| 5211 | } | |||||
| 5212 | ||||||
| 5213 | # Parameter name suggests callback | |||||
| 5214 | 170 | 319 | if ($param =~ /^(?:callback|cb|handler|sub|code|fn|func|on_\w+)$/i) { | |||
| 5215 | 2 | 3 | $p->{type} = 'coderef'; | |||
| 5216 | 2 | 2 | $p->{semantic} = 'callback'; | |||
| 5217 | 2 | 4 | $self->_log(" ADVANCED: $param name suggests coderef"); | |||
| 5218 | 2 | 4 | return; | |||
| 5219 | } | |||||
| 5220 | ||||||
| 5221 | # Blessed coderef (unusual but valid) | |||||
| 5222 | 168 | 2193 | if ($code =~ /blessed\s*\(\s*\$$param\s*\)/ && | |||
| 5223 | $code =~ /ref\s*\(\s*\$$param\s*\)\s*eq\s*['"]CODE['"]/i) { | |||||
| 5224 | 0 | 0 | $p->{type} = 'object'; | |||
| 5225 | 0 | 0 | $p->{isa} = 'blessed_coderef'; | |||
| 5226 | 0 | 0 | $p->{semantic} = 'callback'; | |||
| 5227 | 0 | 0 | $self->_log(" ADVANCED: $param is blessed coderef"); | |||
| 5228 | 0 | 0 | return; | |||
| 5229 | } | |||||
| 5230 | } | |||||
| 5231 | ||||||
| 5232 | # -------------------------------------------------- | |||||
| 5233 | # _detect_enum_type | |||||
| 5234 | # | |||||
| 5235 | # Purpose: Detect enum-like parameters whose | |||||
| 5236 | # valid values are a fixed set, by | |||||
| 5237 | # analysing validation patterns | |||||
| 5238 | # including regex alternations, hash | |||||
| 5239 | # lookups, grep checks, given/when, | |||||
| 5240 | # if/elsif chains, and smart match. | |||||
| 5241 | # | |||||
| 5242 | # Entry: $p - parameter hashref (modified | |||||
| 5243 | # in place). | |||||
| 5244 | # $param - parameter name string. | |||||
| 5245 | # $code - method body source string. | |||||
| 5246 | # | |||||
| 5247 | # Exit: Returns nothing. Modifies $p in place, | |||||
| 5248 | # setting type, enum, and semantic keys. | |||||
| 5249 | # Returns immediately on first match. | |||||
| 5250 | # | |||||
| 5251 | # Side effects: Logs detections to stdout when | |||||
| 5252 | # verbose is set. | |||||
| 5253 | # | |||||
| 5254 | # Notes: Requires at least 3 if/elsif branches | |||||
| 5255 | # for pattern 5 to avoid false positives | |||||
| 5256 | # from ordinary conditional code. | |||||
| 5257 | # -------------------------------------------------- | |||||
| 5258 | sub _detect_enum_type { | |||||
| 5259 | 174 | 179 | my ($self, $p, $param, $code) = @_; | |||
| 5260 | ||||||
| 5261 | 174 | 326 | return unless defined $param && $param =~ /^\w+$/; | |||
| 5262 | ||||||
| 5263 | # Pattern 1: die/croak unless value is in list | |||||
| 5264 | # die 'Invalid status' unless $status =~ /^(active|inactive|pending)$/; | |||||
| 5265 | 174 | 2650 | if ($code =~ /unless\s+\$$param\s*=~\s*\/\^?\(([^)]+)\)/) { | |||
| 5266 | 3 | 6 | my $values = $1; | |||
| 5267 | 3 | 5 | my @enum_values = split(/\|/, $values); | |||
| 5268 | 3 | 47 | $p->{type} = 'string' unless $p->{type}; | |||
| 5269 | 3 | 3 | $p->{enum} = \@enum_values; | |||
| 5270 | 3 | 5 | $p->{semantic} = 'enum'; | |||
| 5271 | 3 | 10 | $self->_log(" ADVANCED: $param is enum with values: " . join(', ', @enum_values)); | |||
| 5272 | 3 | 6 | return; | |||
| 5273 | } | |||||
| 5274 | ||||||
| 5275 | # Pattern 2: Hash lookup for validation | |||||
| 5276 | # my %valid = map { $_ => 1 } qw(red green blue); | |||||
| 5277 | # die unless $valid{$param}; | |||||
| 5278 | 171 | 254 | if ($code =~ /\%(\w+)\s*=.*?qw\s*[\(\[<{]([^)\]>}]+)[\)\]>}]/) { | |||
| 5279 | 3 | 3 | my $hash_name = $1; | |||
| 5280 | 3 | 4 | my $values_str = $2; | |||
| 5281 | 3 | 47 | if (defined $values_str && $code =~ /\$$hash_name\s*\{\s*\$$param\s*\}/) { | |||
| 5282 | 2 | 4 | my @enum_values = split(/\s+/, $values_str); | |||
| 5283 | 2 | 4 | $p->{type} = 'string' unless $p->{type}; | |||
| 5284 | 2 | 3 | $p->{enum} = \@enum_values; | |||
| 5285 | 2 | 3 | $p->{semantic} = 'enum'; | |||
| 5286 | 2 | 6 | $self->_log(" ADVANCED: $param validated via hash lookup: " . join(', ', @enum_values)); | |||
| 5287 | 2 | 4 | return; | |||
| 5288 | } | |||||
| 5289 | } | |||||
| 5290 | ||||||
| 5291 | # Pattern 3: Array grep validation | |||||
| 5292 | # die unless grep { $_ eq $param } qw(foo bar baz); | |||||
| 5293 | 169 | 3859 | if ($code =~ /grep\s*\{[^}]*\$$param[^}]*\}\s*qw\s*[\(\[<{]([^)\]>}]+)[\)\]>}]/) { | |||
| 5294 | 1 | 2 | my $values_str = $1; | |||
| 5295 | 1 | 2 | my @enum_values = split(/\s+/, $values_str); | |||
| 5296 | 1 | 2 | $p->{type} = 'string' unless $p->{type}; | |||
| 5297 | 1 | 2 | $p->{enum} = \@enum_values; | |||
| 5298 | 1 | 2 | $p->{semantic} = 'enum'; | |||
| 5299 | 1 | 4 | $self->_log(" ADVANCED: $param validated via grep: " . join(', ', @enum_values)); | |||
| 5300 | 1 | 1 | return; | |||
| 5301 | } | |||||
| 5302 | ||||||
| 5303 | # Pattern 4: Given/when (Perl 5.10+) | |||||
| 5304 | 168 | 2086 | if ($code =~ /given\s*\(\s*\$$param\s*\)/) { | |||
| 5305 | 0 | 0 | my @enum_values; | |||
| 5306 | 0 | 0 | while ($code =~ /when\s*\(\s*['"]([^'"]+)['"]\s*\)/g) { | |||
| 5307 | 0 | 0 | push @enum_values, $1; | |||
| 5308 | } | |||||
| 5309 | 0 | 0 | if (@enum_values >= 2) { | |||
| 5310 | 0 | 0 | $p->{type} = 'string' unless $p->{type}; | |||
| 5311 | 0 | 0 | $p->{enum} = \@enum_values; | |||
| 5312 | 0 | 0 | $p->{semantic} = 'enum'; | |||
| 5313 | 0 | 0 | $self->_log(" ADVANCED: $param has enum values from given/when: " . | |||
| 5314 | join(', ', @enum_values)); | |||||
| 5315 | 0 | 0 | return; | |||
| 5316 | } | |||||
| 5317 | } | |||||
| 5318 | ||||||
| 5319 | # Pattern 5: Multiple if/elsif checking specific values | |||||
| 5320 | 168 | 177 | my @if_values; | |||
| 5321 | 168 | 4484 | while ($code =~ /if\s*\(\s*\$$param\s*eq\s*['"]([^'"]+)['"]\s*\)/g) { | |||
| 5322 | 7 | 19 | push @if_values, $1; | |||
| 5323 | } | |||||
| 5324 | 168 | 4457 | while ($code =~ /elsif\s*\(\s*\$$param\s*eq\s*['"]([^'"]+)['"]\s*\)/g) { | |||
| 5325 | 5 | 11 | push @if_values, $1; | |||
| 5326 | } | |||||
| 5327 | 168 | 221 | if (@if_values >= 3) { | |||
| 5328 | 2 | 5 | $p->{type} = 'string' unless $p->{type}; | |||
| 5329 | 2 | 2 | $p->{enum} = \@if_values; | |||
| 5330 | 2 | 27 | $p->{semantic} = 'enum'; | |||
| 5331 | 2 | 8 | $self->_log(" ADVANCED: $param appears to be enum from if/elsif: " . | |||
| 5332 | join(', ', @if_values)); | |||||
| 5333 | 2 | 4 | return; | |||
| 5334 | } | |||||
| 5335 | ||||||
| 5336 | # Pattern 6: Smart match (~~) with array | |||||
| 5337 | 166 | 5335 | if ($code =~ /\$$param\s*~~\s*\[([^\]]+)\]/ || | |||
| 5338 | $code =~ /\$$param\s*~~\s*qw\s*[\(\[<{]([^)\]>}]+)[\)\]>}]/) { | |||||
| 5339 | 0 | 0 | my $values_str = $1; | |||
| 5340 | 0 | 0 | my @enum_values; | |||
| 5341 | 0 | 0 | if ($values_str =~ /['"]/) { | |||
| 5342 | 0 | 0 | @enum_values = $values_str =~ /['"](.*?)['"]/g; | |||
| 5343 | } else { | |||||
| 5344 | 0 | 0 | @enum_values = split(/\s+/, $values_str); | |||
| 5345 | } | |||||
| 5346 | 0 | 0 | if (@enum_values) { | |||
| 5347 | 0 | 0 | $p->{type} = 'string' unless $p->{type}; | |||
| 5348 | 0 | 0 | $p->{enum} = \@enum_values; | |||
| 5349 | 0 | 0 | $p->{semantic} = 'enum'; | |||
| 5350 | 0 | 0 | $self->_log(" ADVANCED: $param validated with smart match: " . | |||
| 5351 | join(', ', @enum_values)); | |||||
| 5352 | 0 | 0 | return; | |||
| 5353 | } | |||||
| 5354 | } | |||||
| 5355 | } | |||||
| 5356 | ||||||
| 5357 | # -------------------------------------------------- | |||||
| 5358 | # _extract_error_constraints | |||||
| 5359 | # | |||||
| 5360 | # Purpose: Extract invalid-value constraints and | |||||
| 5361 | # error messages from die/croak patterns | |||||
| 5362 | # referencing a specific parameter, and | |||||
| 5363 | # infer numeric bounds from comparisons | |||||
| 5364 | # with literals. | |||||
| 5365 | # | |||||
| 5366 | # Entry: $p_ref - reference to the parameter | |||||
| 5367 | # hashref (modified in place). | |||||
| 5368 | # $param - parameter name string. | |||||
| 5369 | # $code - method body source string. | |||||
| 5370 | # | |||||
| 5371 | # Exit: Returns nothing. May add _invalid, | |||||
| 5372 | # _errors, min, and/or max to the | |||||
| 5373 | # referenced parameter hashref. | |||||
| 5374 | # | |||||
| 5375 | # Side effects: Logs detections to stdout when | |||||
| 5376 | # verbose is set. | |||||
| 5377 | # -------------------------------------------------- | |||||
| 5378 | sub _extract_error_constraints { | |||||
| 5379 | 174 | 194 | my ($self, $p, $param, $code) = @_; | |||
| 5380 | ||||||
| 5381 | # Look for die/croak/confess with a condition involving this param | |||||
| 5382 | 174 | 368 | while ($code =~ / | |||
| 5383 | (?:die|croak|confess) # error call | |||||
| 5384 | \s* | |||||
| 5385 | (?: | |||||
| 5386 | ["']([^"']+)["'] # captured error message | |||||
| 5387 | | | |||||
| 5388 | q[qw]?\s*[\(\[]([^)\]]+)[\)\]] # q(), qq(), qw() | |||||
| 5389 | )? | |||||
| 5390 | \s* | |||||
| 5391 | if\s+ | |||||
| 5392 | (.+?) # condition | |||||
| 5393 | \s*; | |||||
| 5394 | /gsx) { | |||||
| 5395 | ||||||
| 5396 | 33 | 58 | my $message = $1 || $2; | |||
| 5397 | 33 | 27 | my $condition = $3; | |||
| 5398 | ||||||
| 5399 | # Only keep conditions that reference this parameter | |||||
| 5400 | 33 | 131 | next unless $condition =~ /\$$param\b/; | |||
| 5401 | ||||||
| 5402 | # Initialize storage | |||||
| 5403 | 22 | 49 | $$p->{_invalid} ||= []; | |||
| 5404 | 22 | 48 | $$p->{_errors} ||= []; | |||
| 5405 | ||||||
| 5406 | # Normalize condition (strip surrounding parens) | |||||
| 5407 | 22 | 44 | $condition =~ s/^\(|\)$//g; | |||
| 5408 | 22 | 48 | $condition =~ s/\s+/ /g; | |||
| 5409 | ||||||
| 5410 | # Try to extract a meaningful invalid constraint | |||||
| 5411 | 22 | 41 | my $constraint; | |||
| 5412 | ||||||
| 5413 | # Examples: | |||||
| 5414 | # $age <= 0 | |||||
| 5415 | # $x eq '' | |||||
| 5416 | # length($s) < 3 | |||||
| 5417 | 22 | 913 | if ($condition =~ /\$$param\s*([!<>=]=?|eq|ne|lt|gt|le|ge)\s*(.+)/) { | |||
| 5418 | 8 | 13 | $constraint = "$1 $2"; | |||
| 5419 | } | |||||
| 5420 | elsif ($condition =~ /length\s*\(\s*\$$param\s*\)\s*([<>=!]+)\s*(\d+)/) { | |||||
| 5421 | 1 | 3 | $constraint = "length $1 $2"; | |||
| 5422 | } | |||||
| 5423 | elsif ($condition =~ /\$$param\s*==\s*0/) { | |||||
| 5424 | 0 | 0 | $constraint = '== 0'; | |||
| 5425 | } | |||||
| 5426 | ||||||
| 5427 | # Store results | |||||
| 5428 | 22 9 | 31 13 | push @{ $$p->{_invalid} }, $constraint if $constraint; | |||
| 5429 | 22 17 | 25 24 | push @{ $$p->{_errors} }, $message if defined $message; | |||
| 5430 | ||||||
| 5431 | 22 | 48 | $self->_log( | |||
| 5432 | " ERROR: $param invalid when [$condition]" . | |||||
| 5433 | (defined $message ? " => '$message'" : '') | |||||
| 5434 | ); | |||||
| 5435 | } | |||||
| 5436 | ||||||
| 5437 | # Numeric comparison with literal | |||||
| 5438 | 174 | 3226 | if ($code =~ /\b\Q$param\E\s*(<=|<|>=|>)\s*(-?\d+)/) { | |||
| 5439 | 15 | 27 | my ($op, $num) = ($1, $2); | |||
| 5440 | ||||||
| 5441 | # Mark required | |||||
| 5442 | 15 | 22 | $$p->{optional} = 0; | |||
| 5443 | ||||||
| 5444 | 15 | 37 | if ($op eq '<=') { | |||
| 5445 | 1 | 2 | $$p->{min} = $num + 1; | |||
| 5446 | } elsif ($op eq '<') { | |||||
| 5447 | 4 | 6 | $$p->{min} = $num; | |||
| 5448 | } elsif ($op eq '>=') { | |||||
| 5449 | 1 | 1 | $$p->{max} = $num - 1; | |||
| 5450 | } elsif ($op eq '>') { | |||||
| 5451 | 9 | 12 | $$p->{max} = $num; | |||
| 5452 | } | |||||
| 5453 | ||||||
| 5454 | 15 | 29 | $self->_log(" ERROR: $param normalized constraint from '$op $num'"); | |||
| 5455 | } | |||||
| 5456 | } | |||||
| 5457 | ||||||
| 5458 | # -------------------------------------------------- | |||||
| 5459 | # _extract_parameters_from_signature | |||||
| 5460 | # | |||||
| 5461 | # Purpose: Extract parameter names and positions | |||||
| 5462 | # from a method's signature, trying | |||||
| 5463 | # modern Perl subroutine signatures | |||||
| 5464 | # first and falling back to traditional | |||||
| 5465 | # @_ extraction styles. | |||||
| 5466 | # | |||||
| 5467 | # Entry: $params - hashref to populate with | |||||
| 5468 | # parameter specs (modified | |||||
| 5469 | # in place). | |||||
| 5470 | # $code - method body source string. | |||||
| 5471 | # | |||||
| 5472 | # Exit: Returns nothing. Populates $params. | |||||
| 5473 | # | |||||
| 5474 | # Side effects: Logs detections to stdout when | |||||
| 5475 | # verbose is set. | |||||
| 5476 | # | |||||
| 5477 | # Notes: Three traditional styles are | |||||
| 5478 | # supported: (1) my ($self, ...) = @_, | |||||
| 5479 | # (2) my $self = shift; my $x = shift, | |||||
| 5480 | # (3) my $x = $_[N]. $self and $class | |||||
| 5481 | # are always excluded from the returned | |||||
| 5482 | # parameters. | |||||
| 5483 | # -------------------------------------------------- | |||||
| 5484 | sub _extract_parameters_from_signature { | |||||
| 5485 | 271 | 302 | my ($self, $params, $code) = @_; | |||
| 5486 | ||||||
| 5487 | # Modern Style: Subroutine signatures with attributes | |||||
| 5488 | # Handle multi-line signatures | |||||
| 5489 | # sub foo :attr1 :attr2(val) ( | |||||
| 5490 | # $self, | |||||
| 5491 | # $x :Type, | |||||
| 5492 | # $y = default | |||||
| 5493 | # ) { } | |||||
| 5494 | ||||||
| 5495 | # Try to match signature after attributes | |||||
| 5496 | # Look for the parameter list - it's the last (...) before the opening brace | |||||
| 5497 | # that contains sigils ($, %, @) | |||||
| 5498 | 271 | 715 | if ($code =~ /sub\s+\w+\s*(?::\w+(?:\([^)]*\))?\s*)*\(((?:[^()]|\([^)]*\))*)\)\s*\{/s) { | |||
| 5499 | 12 | 14 | my $potential_sig = $1; | |||
| 5500 | ||||||
| 5501 | # Check if this looks like parameters (has sigils) | |||||
| 5502 | 12 | 15 | if ($potential_sig =~ /[\$\%\@]/) { | |||
| 5503 | 10 | 13 | $self->_log(" SIG: Found modern signature: ($potential_sig)"); | |||
| 5504 | 10 | 25 | $self->_parse_modern_signature($params, $potential_sig); | |||
| 5505 | 10 | 9 | return; | |||
| 5506 | } | |||||
| 5507 | } | |||||
| 5508 | ||||||
| 5509 | # Traditional Style 1: my ($self, $arg1, $arg2) = @_; | |||||
| 5510 | 261 | 549 | if ($code =~ /my\s*\(\s*([^)]+)\)\s*=\s*\@_/s) { | |||
| 5511 | 126 | 138 | my $sig = $1; | |||
| 5512 | 126 | 93 | my $pos = 0; | |||
| 5513 | ||||||
| 5514 | 126 | 226 | while ($sig =~ /\$(\w+)/g) { | |||
| 5515 | 269 | 206 | my $name = $1; | |||
| 5516 | ||||||
| 5517 | 269 | 439 | next if $name =~ /^(self|class)$/i; | |||
| 5518 | ||||||
| 5519 | 149 | 391 | $params->{$name} //= { | |||
| 5520 | _source => 'code', | |||||
| 5521 | optional => 1, | |||||
| 5522 | }; | |||||
| 5523 | ||||||
| 5524 | 149 | 192 | $params->{$name}{position} = $pos unless exists $params->{$name}{position}; | |||
| 5525 | ||||||
| 5526 | 149 | 177 | $pos++; | |||
| 5527 | } | |||||
| 5528 | 126 | 160 | return; | |||
| 5529 | } elsif ($code =~ /my\s+\$self\s*=\s*shift/) { | |||||
| 5530 | # Traditional Style 2: my $self = shift; my $arg1 = shift; | |||||
| 5531 | 11 | 10 | my @shifts; | |||
| 5532 | 11 | 26 | while ($code =~ /my\s+\$(\w+)\s*=\s*shift/g) { | |||
| 5533 | 13 | 22 | push @shifts, $1; | |||
| 5534 | } | |||||
| 5535 | 11 | 51 | shift @shifts if @shifts && $shifts[0] =~ /^(self|class)$/i; | |||
| 5536 | 11 | 10 | my $pos = 0; | |||
| 5537 | 11 | 12 | foreach my $param (@shifts) { | |||
| 5538 | 2 | 6 | $params->{$param} ||= { _source => 'code', optional => 1, position => $pos++ }; | |||
| 5539 | } | |||||
| 5540 | 11 | 13 | return; | |||
| 5541 | } | |||||
| 5542 | ||||||
| 5543 | # Traditional Style 3: Function parameters (no $self) | |||||
| 5544 | 124 | 188 | if ($code =~ /my\s*\(\s*([^)]+)\)\s*=\s*\@_/s) { | |||
| 5545 | 0 | 0 | my $sig = $1; | |||
| 5546 | 0 | 0 | my @param_names = $sig =~ /\$(\w+)/g; | |||
| 5547 | 0 | 0 | my $pos = 0; | |||
| 5548 | 0 | 0 | foreach my $param (@param_names) { | |||
| 5549 | 0 | 0 | next if $param =~ /^(self|class)$/i; | |||
| 5550 | 0 | 0 | $params->{$param} ||= { _source => 'code', optional => 1, position => $pos++ }; | |||
| 5551 | } | |||||
| 5552 | } | |||||
| 5553 | ||||||
| 5554 | # De-duplicate | |||||
| 5555 | 124 | 97 | my %seen; | |||
| 5556 | 124 | 205 | foreach my $param (keys %$params) { | |||
| 5557 | 0 | 0 | if ($seen{$param}++) { | |||
| 5558 | 0 | 0 | $self->_log(" WARNING: Duplicate parameter '$param' found"); | |||
| 5559 | } | |||||
| 5560 | } | |||||
| 5561 | } | |||||
| 5562 | ||||||
| 5563 | # -------------------------------------------------- | |||||
| 5564 | # _parse_modern_signature | |||||
| 5565 | # | |||||
| 5566 | # Purpose: Parse a Perl 5.20+ subroutine | |||||
| 5567 | # signature string into individual | |||||
| 5568 | # parameter specs, respecting nested | |||||
| 5569 | # structures when splitting on commas. | |||||
| 5570 | # | |||||
| 5571 | # Entry: $params - hashref to populate | |||||
| 5572 | # (modified in place). | |||||
| 5573 | # $sig - signature string with outer | |||||
| 5574 | # parentheses already removed. | |||||
| 5575 | # | |||||
| 5576 | # Exit: Returns nothing. Populates $params | |||||
| 5577 | # via _parse_signature_parameter. | |||||
| 5578 | # | |||||
| 5579 | # Side effects: Logs parsing details to stdout when | |||||
| 5580 | # verbose is set. | |||||
| 5581 | # -------------------------------------------------- | |||||
| 5582 | sub _parse_modern_signature { | |||||
| 5583 | 10 | 9 | my ($self, $params, $sig) = @_; | |||
| 5584 | ||||||
| 5585 | 10 | 11 | $self->_log(" DEBUG: Parsing signature: [$sig]"); | |||
| 5586 | ||||||
| 5587 | # Split signature by commas, but respect nested structures | |||||
| 5588 | 10 | 8 | my @parts; | |||
| 5589 | 10 | 9 | my $current = ''; | |||
| 5590 | 10 | 5 | my $depth = 0; | |||
| 5591 | ||||||
| 5592 | 10 | 26 | for my $char (split //, $sig) { | |||
| 5593 | 303 | 627 | if ($char eq '(' || $char eq '[' || $char eq '{') { | |||
| 5594 | 0 | 0 | $depth++; | |||
| 5595 | 0 | 0 | $current .= $char; | |||
| 5596 | } elsif ($char eq ')' || $char eq ']' || $char eq '}') { | |||||
| 5597 | 0 | 0 | $depth--; | |||
| 5598 | 0 | 0 | $current .= $char; | |||
| 5599 | } elsif ($char eq ',' && $depth == 0) { | |||||
| 5600 | 18 | 14 | push @parts, $current; | |||
| 5601 | 18 | 11 | $current = ''; | |||
| 5602 | } else { | |||||
| 5603 | 285 | 172 | $current .= $char; | |||
| 5604 | } | |||||
| 5605 | } | |||||
| 5606 | 10 | 24 | push @parts, $current if $current =~ /\S/; | |||
| 5607 | ||||||
| 5608 | 10 | 6 | my $position = 0; | |||
| 5609 | ||||||
| 5610 | 10 | 8 | foreach my $part (@parts) { | |||
| 5611 | 28 | 61 | $part =~ s/^\s+|\s+$//g; | |||
| 5612 | ||||||
| 5613 | # Skip empty parts | |||||
| 5614 | 28 | 24 | next unless $part; | |||
| 5615 | ||||||
| 5616 | # Parse different parameter types | |||||
| 5617 | 28 | 27 | my $param_info = $self->_parse_signature_parameter($part, $position); | |||
| 5618 | ||||||
| 5619 | 28 | 25 | if ($param_info) { | |||
| 5620 | 28 | 27 | my $name = $param_info->{name}; | |||
| 5621 | ||||||
| 5622 | # Skip self/class | |||||
| 5623 | 28 | 28 | if ($name =~ /^(self|class)$/i) { | |||
| 5624 | 6 | 8 | next; | |||
| 5625 | } | |||||
| 5626 | ||||||
| 5627 | 22 | 21 | $params->{$name} = $param_info; | |||
| 5628 | $self->_log(" SIG: $name has position $position" . | |||||
| 5629 | ($param_info->{optional} ? ' (optional)' : '') . | |||||
| 5630 | 22 | 45 | ($param_info->{_default} ? ", default: $param_info->{_default}" : '')); | |||
| 5631 | 22 | 24 | $position++; | |||
| 5632 | } | |||||
| 5633 | } | |||||
| 5634 | } | |||||
| 5635 | ||||||
| 5636 | # -------------------------------------------------- | |||||
| 5637 | # _parse_signature_parameter | |||||
| 5638 | # | |||||
| 5639 | # Purpose: Parse a single parameter declaration | |||||
| 5640 | # from a modern Perl signature, handling | |||||
| 5641 | # type constraints, default values, | |||||
| 5642 | # plain scalars, and slurpy array/hash | |||||
| 5643 | # parameters. | |||||
| 5644 | # | |||||
| 5645 | # Entry: $part - a single parameter string | |||||
| 5646 | # (one comma-separated | |||||
| 5647 | # element from the signature). | |||||
| 5648 | # $position - zero-based position index | |||||
| 5649 | # of this parameter. | |||||
| 5650 | # | |||||
| 5651 | # Exit: Returns a parameter info hashref on | |||||
| 5652 | # success, or undef if the string does | |||||
| 5653 | # not match any known pattern. | |||||
| 5654 | # | |||||
| 5655 | # Side effects: None. | |||||
| 5656 | # | |||||
| 5657 | # Notes: Six patterns are tried in order: | |||||
| 5658 | # (1) :Type with default, | |||||
| 5659 | # (2) :Type without default, | |||||
| 5660 | # (3) default without type, | |||||
| 5661 | # (4) plain $name, | |||||
| 5662 | # (5) slurpy @name, | |||||
| 5663 | # (6) slurpy %name. | |||||
| 5664 | # -------------------------------------------------- | |||||
| 5665 | sub _parse_signature_parameter { | |||||
| 5666 | 41 | 5256 | my ($self, $part, $position) = @_; | |||
| 5667 | ||||||
| 5668 | 41 | 58 | my %info = ( | |||
| 5669 | _source => 'signature', | |||||
| 5670 | position => $position, | |||||
| 5671 | optional => 0, | |||||
| 5672 | ); | |||||
| 5673 | ||||||
| 5674 | # Pattern 1: Type constraint WITH default: $name :Type = default | |||||
| 5675 | 41 | 115 | if ($part =~ /^\$(\w+)\s*:\s*(\w+)\s*=\s*(.+)$/s) { | |||
| 5676 | 4 | 7 | my ($name, $constraint, $default) = ($1, $2, $3); | |||
| 5677 | 4 | 5 | $default =~ s/^\s+|\s+$//g; | |||
| 5678 | ||||||
| 5679 | 4 | 5 | $info{name} = $name; | |||
| 5680 | 4 | 4 | $info{optional} = 1; | |||
| 5681 | 4 | 5 | $info{_default} = $self->_clean_default_value($default, 1); | |||
| 5682 | ||||||
| 5683 | # Apply type constraint | |||||
| 5684 | 4 | 11 | if ($constraint =~ /^(Int|Integer)$/i) { | |||
| 5685 | 1 | 2 | $info{type} = 'integer'; | |||
| 5686 | } elsif ($constraint =~ /^(Num|Number)$/i) { | |||||
| 5687 | 2 | 3 | $info{type} = 'number'; | |||
| 5688 | } elsif ($constraint =~ /^(Str|String)$/i) { | |||||
| 5689 | 1 | 1 | $info{type} = 'string'; | |||
| 5690 | } elsif ($constraint =~ /^(Bool|Boolean)$/i) { | |||||
| 5691 | 0 | 0 | $info{type} = 'boolean'; | |||
| 5692 | } elsif ($constraint =~ /^(Array|ArrayRef)$/i) { | |||||
| 5693 | 0 | 0 | $info{type} = 'arrayref'; | |||
| 5694 | } elsif ($constraint =~ /^(Hash|HashRef)$/i) { | |||||
| 5695 | 0 | 0 | $info{type} = 'hashref'; | |||
| 5696 | } else { | |||||
| 5697 | 0 | 0 | $info{type} = 'object'; | |||
| 5698 | 0 | 0 | $info{isa} = $constraint; | |||
| 5699 | } | |||||
| 5700 | ||||||
| 5701 | 4 | 5 | return \%info; | |||
| 5702 | } elsif ($part =~ /^\$(\w+)\s*:\s*(\w+)\s*$/s) { | |||||
| 5703 | # Pattern 2: Type constraint WITHOUT default: $name :Type | |||||
| 5704 | 8 | 12 | my ($name, $constraint) = ($1, $2); | |||
| 5705 | 8 | 9 | $info{name} = $name; | |||
| 5706 | 8 | 5 | $info{optional} = 0; | |||
| 5707 | ||||||
| 5708 | # Apply type constraint (same as above) | |||||
| 5709 | 8 | 26 | if ($constraint =~ /^(Int|Integer)$/i) { | |||
| 5710 | 3 | 4 | $info{type} = 'integer'; | |||
| 5711 | } elsif ($constraint =~ /^(Num|Number)$/i) { | |||||
| 5712 | 1 | 2 | $info{type} = 'number'; | |||
| 5713 | } elsif ($constraint =~ /^(Str|String)$/i) { | |||||
| 5714 | 1 | 1 | $info{type} = 'string'; | |||
| 5715 | } elsif ($constraint =~ /^(Bool|Boolean)$/i) { | |||||
| 5716 | 1 | 1 | $info{type} = 'boolean'; | |||
| 5717 | } elsif ($constraint =~ /^(Array|ArrayRef)$/i) { | |||||
| 5718 | 1 | 1 | $info{type} = 'arrayref'; | |||
| 5719 | } elsif ($constraint =~ /^(Hash|HashRef)$/i) { | |||||
| 5720 | 0 | 0 | $info{type} = 'hashref'; | |||
| 5721 | } else { | |||||
| 5722 | 1 | 1 | $info{type} = 'object'; | |||
| 5723 | 1 | 2 | $info{isa} = $constraint; | |||
| 5724 | } | |||||
| 5725 | ||||||
| 5726 | 8 | 8 | return \%info; | |||
| 5727 | } elsif ($part =~ /^\$(\w+)\s*=\s*(.+)$/s) { | |||||
| 5728 | # Pattern 3: Default WITHOUT type: $name = default | |||||
| 5729 | 7 | 10 | my ($name, $default) = ($1, $2); | |||
| 5730 | 7 | 11 | $default =~ s/^\s+|\s+$//g; | |||
| 5731 | ||||||
| 5732 | 7 | 7 | $info{name} = $name; | |||
| 5733 | 7 | 5 | $info{optional} = 1; | |||
| 5734 | 7 | 11 | $info{_default} = $self->_clean_default_value($default, 1); | |||
| 5735 | 7 | 23 | $info{type} = $self->_infer_type_from_default($info{_default}) if $self->can('_infer_type_from_default'); | |||
| 5736 | ||||||
| 5737 | 7 | 8 | return \%info; | |||
| 5738 | } | |||||
| 5739 | ||||||
| 5740 | # Pattern 4: Plain parameter: $name | |||||
| 5741 | elsif ($part =~ /^\$(\w+)$/s) { | |||||
| 5742 | 14 | 15 | $info{name} = $1; | |||
| 5743 | 14 | 11 | $info{optional} = 0; | |||
| 5744 | 14 | 15 | return \%info; | |||
| 5745 | } | |||||
| 5746 | ||||||
| 5747 | # Pattern 5: Array parameter: @name | |||||
| 5748 | elsif ($part =~ /^\@(\w+)$/s) { | |||||
| 5749 | 3 | 5 | $info{name} = $1; | |||
| 5750 | 3 | 3 | $info{type} = 'array'; | |||
| 5751 | 3 | 3 | $info{slurpy} = 1; | |||
| 5752 | 3 | 3 | $info{optional} = 1; | |||
| 5753 | 3 | 5 | return \%info; | |||
| 5754 | } | |||||
| 5755 | ||||||
| 5756 | # Pattern 6: Hash parameter: %name | |||||
| 5757 | elsif ($part =~ /^\%(\w+)$/s) { | |||||
| 5758 | 3 | 5 | $info{name} = $1; | |||
| 5759 | 3 | 3 | $info{type} = 'hash'; | |||
| 5760 | 3 | 4 | $info{slurpy} = 1; | |||
| 5761 | 3 | 3 | $info{optional} = 1; | |||
| 5762 | 3 | 4 | return \%info; | |||
| 5763 | } | |||||
| 5764 | ||||||
| 5765 | 2 | 4 | return undef; | |||
| 5766 | } | |||||
| 5767 | ||||||
| 5768 | # -------------------------------------------------- | |||||
| 5769 | # _infer_type_from_default | |||||
| 5770 | # | |||||
| 5771 | # Purpose: Infer a parameter type from its | |||||
| 5772 | # default value when no explicit type | |||||
| 5773 | # annotation is available. | |||||
| 5774 | # | |||||
| 5775 | # Entry: $default - the cleaned default value | |||||
| 5776 | # scalar, hashref, or | |||||
| 5777 | # arrayref. May be undef. | |||||
| 5778 | # | |||||
| 5779 | # Exit: Returns a type string ('hashref', | |||||
| 5780 | # 'arrayref', 'integer', 'number', | |||||
| 5781 | # 'boolean', 'string'), or undef if | |||||
| 5782 | # $default is undef. | |||||
| 5783 | # | |||||
| 5784 | # Side effects: None. | |||||
| 5785 | # -------------------------------------------------- | |||||
| 5786 | sub _infer_type_from_default { | |||||
| 5787 | 24 | 36 | my ($self, $default) = @_; | |||
| 5788 | ||||||
| 5789 | 24 | 28 | return undef unless defined $default; | |||
| 5790 | ||||||
| 5791 | 21 | 64 | if (ref($default) eq 'HASH') { | |||
| 5792 | 2 | 4 | return 'hashref'; | |||
| 5793 | } elsif (ref($default) eq 'ARRAY') { | |||||
| 5794 | 2 | 4 | return 'arrayref'; | |||
| 5795 | } elsif ($default =~ /^-?\d+$/) { | |||||
| 5796 | 13 | 21 | return 'integer'; | |||
| 5797 | } elsif ($default =~ /^-?\d+\.\d+$/) { | |||||
| 5798 | 2 | 4 | return 'number'; | |||
| 5799 | } elsif ($default eq '1' || $default eq '0') { | |||||
| 5800 | 0 | 0 | return 'boolean'; | |||
| 5801 | } else { | |||||
| 5802 | 2 | 3 | return 'string'; | |||
| 5803 | } | |||||
| 5804 | } | |||||
| 5805 | ||||||
| 5806 | # -------------------------------------------------- | |||||
| 5807 | # _extract_subroutine_attributes | |||||
| 5808 | # | |||||
| 5809 | # Purpose: Extract Perl subroutine attributes | |||||
| 5810 | # (e.g. :lvalue, :method, :Returns(Int)) | |||||
| 5811 | # from a method's source string. | |||||
| 5812 | # | |||||
| 5813 | # Entry: $code - method body source string. | |||||
| 5814 | # | |||||
| 5815 | # Exit: Returns a hashref of attribute name | |||||
| 5816 | # to value (1 for flag-only attributes, | |||||
| 5817 | # the attribute argument string for | |||||
| 5818 | # attributes with values). | |||||
| 5819 | # Returns an empty hashref if no | |||||
| 5820 | # attributes are found. | |||||
| 5821 | # | |||||
| 5822 | # Side effects: Logs detections to stdout when | |||||
| 5823 | # verbose is set. | |||||
| 5824 | # -------------------------------------------------- | |||||
| 5825 | sub _extract_subroutine_attributes { | |||||
| 5826 | 273 | 1781 | my ($self, $code) = @_; | |||
| 5827 | ||||||
| 5828 | 273 | 278 | my %attributes; | |||
| 5829 | ||||||
| 5830 | # Extract all attributes from the sub declaration | |||||
| 5831 | # Attributes are :name or :name(value) between sub name and either ( or { | |||||
| 5832 | # Pattern: sub name ATTRIBUTES ( params ) { } | |||||
| 5833 | # or: sub name ATTRIBUTES { } | |||||
| 5834 | ||||||
| 5835 | # First, find the attributes section (everything between sub name and ( or { ) | |||||
| 5836 | 273 | 254 | my $attr_section = ''; | |||
| 5837 | ||||||
| 5838 | 273 | 531 | if($code =~ /sub\s+\w+\s+((?::\w+(?:\([^)]*\))?\s*)+)/s) { | |||
| 5839 | 9 | 9 | $attr_section = $1; | |||
| 5840 | } | |||||
| 5841 | ||||||
| 5842 | # Parse individual attributes from the section | |||||
| 5843 | 273 | 308 | if($attr_section) { | |||
| 5844 | 9 | 20 | while($attr_section =~ /:(\w+)(?:\(([^)]*)\))?/g) { | |||
| 5845 | 11 | 12 | my ($name, $value) = ($1, $2); | |||
| 5846 | ||||||
| 5847 | 11 | 18 | if (defined $value && $value ne '') { | |||
| 5848 | 5 | 8 | $attributes{$name} = $value; | |||
| 5849 | 5 | 7 | $self->_log(" ATTR: Found attribute :$name($value)"); | |||
| 5850 | } else { | |||||
| 5851 | 6 | 26 | $attributes{$name} = 1; | |||
| 5852 | 6 | 8 | $self->_log(" ATTR: Found attribute :$name"); | |||
| 5853 | } | |||||
| 5854 | } | |||||
| 5855 | } | |||||
| 5856 | ||||||
| 5857 | # Process common attributes | |||||
| 5858 | 273 | 338 | if ($attributes{Returns}) { | |||
| 5859 | 4 | 3 | my $return_type = $attributes{Returns}; | |||
| 5860 | 4 | 5 | if ($return_type ne '1') { # Only log if it's an actual type, not just the flag | |||
| 5861 | 4 | 5 | $self->_log(" ATTR: Method declares return type: $return_type"); | |||
| 5862 | } | |||||
| 5863 | } | |||||
| 5864 | ||||||
| 5865 | 273 | 304 | if ($attributes{lvalue}) { | |||
| 5866 | 4 | 5 | $self->_log(" ATTR: Method is lvalue (can be assigned to)"); | |||
| 5867 | } | |||||
| 5868 | ||||||
| 5869 | 273 | 308 | if ($attributes{method}) { | |||
| 5870 | 2 | 2 | $self->_log(' ATTR: Method explicitly marked as :method'); | |||
| 5871 | } | |||||
| 5872 | ||||||
| 5873 | 273 | 293 | return \%attributes; | |||
| 5874 | } | |||||
| 5875 | ||||||
| 5876 | # -------------------------------------------------- | |||||
| 5877 | # _analyze_postfix_dereferencing | |||||
| 5878 | # | |||||
| 5879 | # Purpose: Detect usage of Perl 5.20+ postfix | |||||
| 5880 | # dereferencing syntax in a method body | |||||
| 5881 | # and record which dereference forms | |||||
| 5882 | # are used. | |||||
| 5883 | # | |||||
| 5884 | # Entry: $code - method body source string. | |||||
| 5885 | # | |||||
| 5886 | # Exit: Returns a hashref whose keys are | |||||
| 5887 | # dereference form names (array_deref, | |||||
| 5888 | # hash_deref, scalar_deref, code_deref, | |||||
| 5889 | # array_slice, hash_slice) with value 1 | |||||
| 5890 | # when detected. | |||||
| 5891 | # Returns an empty hashref if no | |||||
| 5892 | # postfix dereferencing is found. | |||||
| 5893 | # | |||||
| 5894 | # Side effects: Logs detections to stdout when | |||||
| 5895 | # verbose is set. | |||||
| 5896 | # -------------------------------------------------- | |||||
| 5897 | sub _analyze_postfix_dereferencing { | |||||
| 5898 | 275 | 1189 | my ($self, $code) = @_; | |||
| 5899 | ||||||
| 5900 | 275 | 203 | my %derefs; | |||
| 5901 | ||||||
| 5902 | # Array dereference: $ref->@* | |||||
| 5903 | 275 | 532 | if ($code =~ /\$\w+\s*->\s*\@\*/) { | |||
| 5904 | 4 | 5 | $derefs{array_deref} = 1; | |||
| 5905 | 4 | 6 | $self->_log(" MODERN: Uses postfix array dereferencing (->@*)"); | |||
| 5906 | } | |||||
| 5907 | ||||||
| 5908 | # Hash dereference: $ref->%* | |||||
| 5909 | 275 | 449 | if ($code =~ /\$\w+\s*->\s*\%\*/) { | |||
| 5910 | 3 | 3 | $derefs{hash_deref} = 1; | |||
| 5911 | 3 | 5 | $self->_log(" MODERN: Uses postfix hash dereferencing (->%*)"); | |||
| 5912 | } | |||||
| 5913 | ||||||
| 5914 | # Scalar dereference: $ref->$* | |||||
| 5915 | 275 | 405 | if ($code =~ /\$\w+\s*->\s*\$\*/) { | |||
| 5916 | 2 | 3 | $derefs{scalar_deref} = 1; | |||
| 5917 | 2 | 3 | $self->_log(' MODERN: Uses postfix scalar dereferencing (->$*)'); | |||
| 5918 | } | |||||
| 5919 | ||||||
| 5920 | # Code dereference: $ref->&* | |||||
| 5921 | 275 | 396 | if ($code =~ /\$\w+\s*->\s*\&\*/) { | |||
| 5922 | 1 | 1 | $derefs{code_deref} = 1; | |||
| 5923 | 1 | 1 | $self->_log(" MODERN: Uses postfix code dereferencing (->&*)"); | |||
| 5924 | } | |||||
| 5925 | ||||||
| 5926 | # Array element: $ref->@[0,2,4] | |||||
| 5927 | 275 | 401 | if ($code =~ /\$\w+\s*->\s*\@\[/) { | |||
| 5928 | 2 | 2 | $derefs{array_slice} = 1; | |||
| 5929 | 2 | 3 | $self->_log(" MODERN: Uses postfix array slice (->@[...])"); | |||
| 5930 | } | |||||
| 5931 | ||||||
| 5932 | # Hash element: $ref->%{key1,key2} | |||||
| 5933 | 275 | 423 | if ($code =~ /\$\w+\s*->\s*\%\{/) { | |||
| 5934 | 2 | 2 | $derefs{hash_slice} = 1; | |||
| 5935 | 2 | 2 | $self->_log(" MODERN: Uses postfix hash slice (->%{...})"); | |||
| 5936 | } | |||||
| 5937 | ||||||
| 5938 | 275 | 251 | return \%derefs; | |||
| 5939 | } | |||||
| 5940 | ||||||
| 5941 | # -------------------------------------------------- | |||||
| 5942 | # _extract_field_declarations | |||||
| 5943 | # | |||||
| 5944 | # Purpose: Extract Perl 5.38 field declarations | |||||
| 5945 | # from a class body or method source | |||||
| 5946 | # string, capturing field names, | |||||
| 5947 | # :param attributes, default values, | |||||
| 5948 | # and :isa type constraints. | |||||
| 5949 | # | |||||
| 5950 | # Entry: $code - source string potentially | |||||
| 5951 | # containing 'field $name ...' | |||||
| 5952 | # declarations. | |||||
| 5953 | # | |||||
| 5954 | # Exit: Returns a hashref of field name to | |||||
| 5955 | # field_info hashref. Returns an empty | |||||
| 5956 | # hashref if no field declarations | |||||
| 5957 | # are found. | |||||
| 5958 | # | |||||
| 5959 | # Side effects: Logs detections to stdout when | |||||
| 5960 | # verbose is set. | |||||
| 5961 | # -------------------------------------------------- | |||||
| 5962 | sub _extract_field_declarations { | |||||
| 5963 | 276 | 2257 | my ($self, $code) = @_; | |||
| 5964 | ||||||
| 5965 | 276 | 188 | my %fields; | |||
| 5966 | ||||||
| 5967 | # Pattern: field $name :param; | |||||
| 5968 | # Pattern: field $name :param(name); | |||||
| 5969 | # Pattern: field $name = default; | |||||
| 5970 | # More lenient pattern to catch various formats | |||||
| 5971 | 276 | 412 | while ($code =~ /^\s*field\s+\$(\w+)\s*([^;]*);/gm) { | |||
| 5972 | 12 | 18 | my ($name, $modifiers) = ($1, $2); | |||
| 5973 | ||||||
| 5974 | 12 | 17 | $self->_log(" FIELD: Found field \$$name with modifiers: [$modifiers]"); | |||
| 5975 | ||||||
| 5976 | 12 | 42 | my %field_info = ( | |||
| 5977 | name => $name, | |||||
| 5978 | _source => 'field' | |||||
| 5979 | ); | |||||
| 5980 | ||||||
| 5981 | # Check for :param attribute | |||||
| 5982 | 12 | 19 | if ($modifiers =~ /:param(?:\(([^)]+)\))?/) { | |||
| 5983 | 11 | 10 | $field_info{is_param} = 1; | |||
| 5984 | ||||||
| 5985 | 11 | 16 | if (defined $1) { | |||
| 5986 | # Explicit parameter name | |||||
| 5987 | 2 | 2 | $field_info{param_name} = $1; | |||
| 5988 | } else { | |||||
| 5989 | # Implicit - field name is param name | |||||
| 5990 | 9 | 8 | $field_info{param_name} = $name; | |||
| 5991 | } | |||||
| 5992 | ||||||
| 5993 | 11 | 12 | $self->_log(" FIELD: $name maps to parameter: $field_info{param_name}"); | |||
| 5994 | } | |||||
| 5995 | ||||||
| 5996 | # Check for default value - must come before type constraint check | |||||
| 5997 | 12 | 17 | if ($modifiers =~ /=\s*([^:;]+)(?::|;|$)/) { | |||
| 5998 | 4 | 4 | my $default = $1; | |||
| 5999 | 4 | 4 | $default =~ s/\s+$//; | |||
| 6000 | 4 | 6 | $field_info{_default} = $self->_clean_default_value($default, 1); | |||
| 6001 | 4 | 6 | $field_info{optional} = 1; | |||
| 6002 | 4 | 8 | $self->_log(" FIELD: $name has default: " . (defined $field_info{_default} ? $field_info{_default} : 'undef')); | |||
| 6003 | } | |||||
| 6004 | ||||||
| 6005 | # Check for type constraints | |||||
| 6006 | 12 | 13 | if ($modifiers =~ /:isa\(([^)]+)\)/) { | |||
| 6007 | 3 | 5 | $field_info{isa} = $1; | |||
| 6008 | 3 | 4 | $field_info{type} = 'object'; | |||
| 6009 | 3 | 7 | $self->_log(" FIELD: $name has type constraint: $1"); | |||
| 6010 | } | |||||
| 6011 | ||||||
| 6012 | 12 | 22 | $fields{$name} = \%field_info; | |||
| 6013 | } | |||||
| 6014 | ||||||
| 6015 | 276 | 255 | return \%fields; | |||
| 6016 | } | |||||
| 6017 | ||||||
| 6018 | # -------------------------------------------------- | |||||
| 6019 | # _merge_field_declarations | |||||
| 6020 | # | |||||
| 6021 | # Purpose: Integrate Perl 5.38 field declarations | |||||
| 6022 | # that carry the :param attribute into | |||||
| 6023 | # the code parameter hashref, so they | |||||
| 6024 | # appear as constructor parameters in | |||||
| 6025 | # the generated schema. | |||||
| 6026 | # | |||||
| 6027 | # Entry: $params - hashref of parameters | |||||
| 6028 | # extracted from code analysis | |||||
| 6029 | # (modified in place). | |||||
| 6030 | # $fields - hashref of field declarations | |||||
| 6031 | # as returned by | |||||
| 6032 | # _extract_field_declarations. | |||||
| 6033 | # | |||||
| 6034 | # Exit: Returns nothing. Modifies $params | |||||
| 6035 | # in place. | |||||
| 6036 | # | |||||
| 6037 | # Side effects: Logs merges to stdout when verbose | |||||
| 6038 | # is set. | |||||
| 6039 | # | |||||
| 6040 | # Notes: Only fields with is_param => 1 are | |||||
| 6041 | # merged. The param_name key in the | |||||
| 6042 | # field (which may differ from the | |||||
| 6043 | # field name if :param(name) was used) | |||||
| 6044 | # determines the parameter key. | |||||
| 6045 | # -------------------------------------------------- | |||||
| 6046 | sub _merge_field_declarations { | |||||
| 6047 | 6 | 25 | my ($self, $params, $fields) = @_; | |||
| 6048 | ||||||
| 6049 | 6 | 9 | foreach my $field_name (keys %$fields) { | |||
| 6050 | 11 | 10 | my $field = $fields->{$field_name}; | |||
| 6051 | ||||||
| 6052 | # Only process fields that are parameters | |||||
| 6053 | 11 | 13 | next unless $field->{is_param}; | |||
| 6054 | ||||||
| 6055 | 9 | 7 | my $param_name = $field->{param_name}; | |||
| 6056 | ||||||
| 6057 | # Create or update parameter info | |||||
| 6058 | 9 | 18 | $params->{$param_name} ||= {}; | |||
| 6059 | 9 | 6 | my $p = $params->{$param_name}; | |||
| 6060 | ||||||
| 6061 | # Merge field information into parameter | |||||
| 6062 | 9 | 12 | $p->{_source} = 'field' unless $p->{_source}; | |||
| 6063 | 9 | 10 | $p->{field_name} = $field_name if $field_name ne $param_name; | |||
| 6064 | ||||||
| 6065 | 9 | 10 | if ($field->{_default}) { | |||
| 6066 | 3 | 3 | $p->{_default} = $field->{_default}; | |||
| 6067 | 3 | 3 | $p->{optional} = 1; | |||
| 6068 | } | |||||
| 6069 | ||||||
| 6070 | 9 | 9 | if ($field->{isa}) { | |||
| 6071 | 2 | 3 | $p->{isa} = $field->{isa}; | |||
| 6072 | 2 | 2 | $p->{type} = 'object'; | |||
| 6073 | } | |||||
| 6074 | ||||||
| 6075 | 9 | 22 | $self->_log(" MERGED: Field $field_name -> parameter $param_name"); | |||
| 6076 | } | |||||
| 6077 | } | |||||
| 6078 | ||||||
| 6079 | # -------------------------------------------------- | |||||
| 6080 | # _extract_defaults_from_code | |||||
| 6081 | # | |||||
| 6082 | # Purpose: Scan a method body for default value | |||||
| 6083 | # assignment patterns and populate the | |||||
| 6084 | # optional and _default fields of | |||||
| 6085 | # known parameters. | |||||
| 6086 | # | |||||
| 6087 | # Entry: $params - hashref of parameters | |||||
| 6088 | # (modified in place). | |||||
| 6089 | # $code - method body source string. | |||||
| 6090 | # $method - method hashref, used for | |||||
| 6091 | # constructor-specific | |||||
| 6092 | # exclusions of $class and | |||||
| 6093 | # $self. | |||||
| 6094 | # | |||||
| 6095 | # Exit: Returns nothing. Modifies $params | |||||
| 6096 | # in place. | |||||
| 6097 | # | |||||
| 6098 | # Side effects: Logs detections to stdout when | |||||
| 6099 | # verbose is set. | |||||
| 6100 | # | |||||
| 6101 | # Notes: Eight default patterns are tried. | |||||
| 6102 | # Only parameters already present in | |||||
| 6103 | # $params are updated â this method | |||||
| 6104 | # does not add new parameters. | |||||
| 6105 | # Falls back to extracting all @_ | |||||
| 6106 | # assignments if $params is empty | |||||
| 6107 | # after the main pass. | |||||
| 6108 | # -------------------------------------------------- | |||||
| 6109 | sub _extract_defaults_from_code { | |||||
| 6110 | 273 | 333 | my ($self, $params, $code, $method) = @_; | |||
| 6111 | ||||||
| 6112 | # Pattern 1: my $param = value; | |||||
| 6113 | 273 | 479 | while ($code =~ /my\s+\$(\w+)\s*=\s*([^;]+);/g) { | |||
| 6114 | 25 | 32 | my ($param, $value) = ($1, $2); | |||
| 6115 | 25 | 47 | next unless exists $params->{$param}; | |||
| 6116 | ||||||
| 6117 | 1 | 1 | $params->{$param}{_default} = $self->_clean_default_value($value, 1); | |||
| 6118 | 1 | 1 | $params->{$param}{optional} = 1; | |||
| 6119 | 1 | 2 | $self->_log(" CODE: $param has default: " . $self->_format_default($params->{$param}{_default})); | |||
| 6120 | } | |||||
| 6121 | ||||||
| 6122 | # Pattern 2: $param = value unless defined $param; | |||||
| 6123 | 273 | 473 | while ($code =~ /\$(\w+)\s*=\s*([^;]+?)\s+unless\s+(?:defined\s+)?\$\1/g) { | |||
| 6124 | 4 | 37 | my ($param, $value) = ($1, $2); | |||
| 6125 | 4 | 7 | next unless exists $params->{$param}; | |||
| 6126 | ||||||
| 6127 | 4 | 5 | $params->{$param}{_default} = $self->_clean_default_value($value, 1); | |||
| 6128 | 4 | 4 | $params->{$param}{optional} = 1; | |||
| 6129 | 4 | 7 | $self->_log(" CODE: $param has default (unless): " . $self->_format_default($params->{$param}{_default})); | |||
| 6130 | } | |||||
| 6131 | ||||||
| 6132 | # Pattern 3: $param = value unless $param; | |||||
| 6133 | 273 | 449 | while ($code =~ /\$(\w+)\s*=\s*([^;]+?)\s+unless\s+\$\1/g) { | |||
| 6134 | 1 | 1 | my ($param, $value) = ($1, $2); | |||
| 6135 | 1 | 1 | next unless exists $params->{$param}; | |||
| 6136 | ||||||
| 6137 | 1 | 1 | $params->{$param}{_default} = $self->_clean_default_value($value, 1); | |||
| 6138 | 1 | 1 | $params->{$param}{optional} = 1; | |||
| 6139 | 1 | 1 | $self->_log(" CODE: $param has default (unless): " . $self->_format_default($params->{$param}{_default})); | |||
| 6140 | } | |||||
| 6141 | ||||||
| 6142 | # Pattern 4: $param = $param || 'default'; | |||||
| 6143 | 273 | 359 | while ($code =~ /\$(\w+)\s*=\s*\$\1\s*\|\|\s*([^;]+);/g) { | |||
| 6144 | 8 | 12 | my ($param, $value) = ($1, $2); | |||
| 6145 | 8 | 9 | next unless exists $params->{$param}; | |||
| 6146 | ||||||
| 6147 | 8 | 8 | $params->{$param}{_default} = $self->_clean_default_value($value, 1); | |||
| 6148 | 8 | 10 | $params->{$param}{optional} = 1; | |||
| 6149 | 8 | 11 | $self->_log(" CODE: $param has default (||): " . $self->_format_default($params->{$param}{_default})); | |||
| 6150 | } | |||||
| 6151 | ||||||
| 6152 | # Pattern 5: $param ||= 'default'; | |||||
| 6153 | 273 | 358 | while ($code =~ /\$(\w+)\s*\|\|=\s*([^;]+);/g) { | |||
| 6154 | 2 | 4 | my ($param, $value) = ($1, $2); | |||
| 6155 | 2 | 2 | next unless exists $params->{$param}; | |||
| 6156 | ||||||
| 6157 | 2 | 3 | $params->{$param}{_default} = $self->_clean_default_value($value, 1); | |||
| 6158 | 2 | 2 | $params->{$param}{optional} = 1; | |||
| 6159 | 2 | 4 | $self->_log(" CODE: $param has default (||=): " . $self->_format_default($params->{$param}{_default})); | |||
| 6160 | } | |||||
| 6161 | ||||||
| 6162 | # Pattern 6: $param //= 'default'; | |||||
| 6163 | 273 | 369 | while ($code =~ /\$(\w+)\s*\/\/=\s*([^;]+);/g) { | |||
| 6164 | 8 | 10 | my ($param, $value) = ($1, $2); | |||
| 6165 | 8 | 12 | next unless exists $params->{$param}; # Using -> because $params is a reference | |||
| 6166 | ||||||
| 6167 | 7 | 7 | $params->{$param}{_default} = $self->_clean_default_value($value, 1); | |||
| 6168 | ||||||
| 6169 | 7 | 6 | $params->{$param}{optional} = 1; | |||
| 6170 | 7 | 9 | $self->_log(" CODE: $param has default (//=): " . $self->_format_default($params->{$param}{_default})); | |||
| 6171 | } | |||||
| 6172 | ||||||
| 6173 | # Pattern 7: $param = defined $param ? $param : 'default'; | |||||
| 6174 | 273 | 394 | while ($code =~ /\$(\w+)\s*=\s*defined\s+\$\1\s*\?\s*\$\1\s*:\s*([^;]+);/g) { | |||
| 6175 | 4 | 4 | my ($param, $value) = ($1, $2); | |||
| 6176 | ||||||
| 6177 | # Create param entry if it doesn't exist | |||||
| 6178 | 4 | 5 | $params->{$param} ||= {}; | |||
| 6179 | ||||||
| 6180 | 4 | 4 | my $cleaned = $self->_clean_default_value($value, 1); | |||
| 6181 | ||||||
| 6182 | 4 | 4 | $params->{$param}{_default} = $cleaned; | |||
| 6183 | 4 | 4 | $params->{$param}{optional} = 1; | |||
| 6184 | 4 | 4 | $self->_log(" CODE: $param has default (ternary): " . $self->_format_default($params->{$param}{_default})); | |||
| 6185 | } | |||||
| 6186 | ||||||
| 6187 | # Pattern 8: $param = $args{param} || 'default'; | |||||
| 6188 | 273 | 367 | while ($code =~ /\$(\w+)\s*=\s*\$args\{['"]?\w+['"]?\}\s*\|\|\s*([^;]+);/g) { | |||
| 6189 | 0 | 0 | my ($param, $value) = ($1, $2); | |||
| 6190 | 0 | 0 | next unless exists $params->{$param}; | |||
| 6191 | ||||||
| 6192 | 0 | 0 | $params->{$param}{_default} = $self->_clean_default_value($value, 1); | |||
| 6193 | 0 | 0 | $params->{$param}{optional} = 1; | |||
| 6194 | 0 | 0 | $self->_log(" CODE: $param has default (from args): " . $self->_format_default($params->{$param}{_default})); | |||
| 6195 | } | |||||
| 6196 | ||||||
| 6197 | # Pattern for non-empty hashref | |||||
| 6198 | 273 | 321 | while ($code =~ /\$(\w+)\s*\|\|=\s*\{[^}]+\}/gs) { | |||
| 6199 | 1 | 1 | my $param = $1; | |||
| 6200 | 1 | 2 | next unless exists $params->{$param}; | |||
| 6201 | ||||||
| 6202 | # Return empty hashref as placeholder (can't evaluate complex hashrefs) | |||||
| 6203 | 1 | 1 | $params->{$param}{_default} = {}; | |||
| 6204 | 1 | 1 | $params->{$param}{optional} = 1; | |||
| 6205 | 1 | 1 | $self->_log(" CODE: $param has hashref default (||=)"); | |||
| 6206 | } | |||||
| 6207 | ||||||
| 6208 | # Fallback: extract parameters from classic Perl body styles | |||||
| 6209 | # Only run if signature extraction found nothing | |||||
| 6210 | # TODO: On constructors, use $class to help to determine the output type | |||||
| 6211 | 273 273 | 175 373 | if (!keys %{$params}) { | |||
| 6212 | 155 | 113 | my $position = 0; | |||
| 6213 | ||||||
| 6214 | # Style 1: my ($a, $b) = @_; | |||||
| 6215 | 155 | 227 | while ($code =~ /my\s*\(\s*([^)]+)\s*\)\s*=\s*\@_/g) { | |||
| 6216 | 22 | 52 | my @vars = $1 =~ /\$(\w+)/g; | |||
| 6217 | 22 | 24 | foreach my $var (@vars) { | |||
| 6218 | 22 | 79 | if(($var eq 'class') && ($position == 0) && ($method->{name} eq 'new')) { | |||
| 6219 | # Don't include "class" in the variable names of the constructor | |||||
| 6220 | 12 | 21 | delete $params->{'class'}; | |||
| 6221 | } elsif(($var eq 'self') && ($position == 0) && ($method->{name} ne 'new')) { | |||||
| 6222 | # Don't include "self" in the variable names | |||||
| 6223 | 10 | 20 | delete $params->{'self'}; | |||
| 6224 | } else { | |||||
| 6225 | 0 | 0 | $params->{$var} ||= { position => $position++ }; | |||
| 6226 | 0 | 0 | $self->_log(" CODE: $var extracted from \@_ list assignment"); | |||
| 6227 | } | |||||
| 6228 | } | |||||
| 6229 | } | |||||
| 6230 | ||||||
| 6231 | # Style 2: my $x = shift; | |||||
| 6232 | 155 | 221 | while ($code =~ /my\s+\$(\w+)\s*=\s*shift\b/g) { | |||
| 6233 | 9 | 8 | my $var = $1; | |||
| 6234 | 9 | 36 | if(($var eq 'class') && ($position == 0) && ($method->{name} eq 'new')) { | |||
| 6235 | # Don't include "class" in the variable names of the constructor | |||||
| 6236 | 0 | 0 | delete $params->{'class'}; | |||
| 6237 | } elsif(($var eq 'self') && ($position == 0) && ($method->{name} ne 'new')) { | |||||
| 6238 | # Don't include "self" in the variable names | |||||
| 6239 | 9 | 10 | delete $params->{'self'}; | |||
| 6240 | } else { | |||||
| 6241 | 0 | 0 | $params->{$var} ||= { position => $position++ }; | |||
| 6242 | 0 | 0 | $self->_log(" CODE: $var is extracted from shift"); | |||
| 6243 | } | |||||
| 6244 | } | |||||
| 6245 | ||||||
| 6246 | # Style 3: my $x = $_[0]; | |||||
| 6247 | 155 | 251 | while ($code =~ /my\s+\$(\w+)\s*=\s*\$_\[(\d+)\]/g) { | |||
| 6248 | 3 | 3 | my ($var, $index) = ($1, $2); | |||
| 6249 | 3 | 5 | if(($var ne 'class') || ($position > 0) || ($method->{name} ne 'new')) { | |||
| 6250 | 3 | 8 | $params->{$var} ||= { position => $index }; | |||
| 6251 | 3 | 4 | $self->_log(" CODE: $var is extracted from \$_\[$index\]"); | |||
| 6252 | } | |||||
| 6253 | } | |||||
| 6254 | } | |||||
| 6255 | } | |||||
| 6256 | ||||||
| 6257 | # -------------------------------------------------- | |||||
| 6258 | # _format_default | |||||
| 6259 | # | |||||
| 6260 | # Purpose: Format a default value for display | |||||
| 6261 | # in verbose log output. | |||||
| 6262 | # | |||||
| 6263 | # Entry: $default - the default value to | |||||
| 6264 | # format. May be undef, | |||||
| 6265 | # a scalar, a hashref, or | |||||
| 6266 | # an arrayref. | |||||
| 6267 | # | |||||
| 6268 | # Exit: Returns a display string: 'undef' | |||||
| 6269 | # for undef, 'HASH ref' / 'ARRAY ref' | |||||
| 6270 | # for references, or the value itself | |||||
| 6271 | # for scalars. | |||||
| 6272 | # | |||||
| 6273 | # Side effects: None. | |||||
| 6274 | # -------------------------------------------------- | |||||
| 6275 | sub _format_default { | |||||
| 6276 | 32 | 36 | my ($self, $default) = @_; | |||
| 6277 | 32 | 27 | return 'undef' unless defined $default; | |||
| 6278 | 30 | 33 | return ref($default) . ' ref' if ref($default); | |||
| 6279 | 25 | 36 | return $default; | |||
| 6280 | } | |||||
| 6281 | ||||||
| 6282 | # -------------------------------------------------- | |||||
| 6283 | # _analyze_parameter_constraints | |||||
| 6284 | # | |||||
| 6285 | # Purpose: Infer min, max, and regex match | |||||
| 6286 | # constraints for a single parameter | |||||
| 6287 | # from length checks, numeric | |||||
| 6288 | # comparisons, and regex match | |||||
| 6289 | # patterns in the method body. | |||||
| 6290 | # | |||||
| 6291 | # Entry: $p_ref - reference to the parameter | |||||
| 6292 | # hashref (modified in place). | |||||
| 6293 | # $param - parameter name string. | |||||
| 6294 | # $code - method body source string. | |||||
| 6295 | # | |||||
| 6296 | # Exit: Returns nothing. Modifies the | |||||
| 6297 | # referenced parameter hashref. | |||||
| 6298 | # | |||||
| 6299 | # Side effects: Logs detections to stdout when | |||||
| 6300 | # verbose is set. | |||||
| 6301 | # | |||||
| 6302 | # Notes: Numeric comparisons that appear | |||||
| 6303 | # inside die/croak guard conditions | |||||
| 6304 | # are excluded to avoid inferring | |||||
| 6305 | # invalid-input ranges as valid | |||||
| 6306 | # constraints. | |||||
| 6307 | # -------------------------------------------------- | |||||
| 6308 | sub _analyze_parameter_constraints { | |||||
| 6309 | 174 | 177 | my ($self, $p_ref, $param, $code) = @_; | |||
| 6310 | 174 | 121 | my $p = $$p_ref; | |||
| 6311 | ||||||
| 6312 | # Do not treat comparisons inside die/croak/confess as valid constraints | |||||
| 6313 | 174 | 128 | my $guarded = 0; | |||
| 6314 | 174 | 3542 | if ($code =~ /(die|croak|confess)\b[^{;]*\bif\b[^{;]*\$$param\b/s) { | |||
| 6315 | 22 | 21 | $guarded = 1; | |||
| 6316 | } | |||||
| 6317 | ||||||
| 6318 | # Length checks for strings | |||||
| 6319 | 174 | 3966 | if ($code =~ /length\s*\(\s*\$$param\s*\)\s*([<>]=?)\s*(\d+)/) { | |||
| 6320 | 4 | 8 | my ($op, $val) = ($1, $2); | |||
| 6321 | 4 | 11 | $p->{type} ||= 'string'; | |||
| 6322 | 4 | 7 | if ($op eq '<') { | |||
| 6323 | 2 | 4 | $p->{max} = $val - 1; | |||
| 6324 | } elsif ($op eq '<=') { | |||||
| 6325 | 0 | 0 | $p->{max} = $val; | |||
| 6326 | } elsif ($op eq '>') { | |||||
| 6327 | 1 | 2 | $p->{min} = $val + 1; | |||
| 6328 | } elsif ($op eq '>=') { | |||||
| 6329 | 1 | 1 | $p->{min} = $val; | |||
| 6330 | } | |||||
| 6331 | 4 | 8 | $self->_log(" CODE: $param length constraint $op $val"); | |||
| 6332 | } | |||||
| 6333 | ||||||
| 6334 | # Numeric range checks (only if NOT part of error guard) | |||||
| 6335 | 174 | 3009 | if ( | |||
| 6336 | !$guarded | |||||
| 6337 | && $code =~ /\$$param\s*([<>]=?)\s*([+-]?(?:\d+\.?\d*|\.\d+))/ | |||||
| 6338 | ) { | |||||
| 6339 | 10 | 19 | my ($op, $val) = ($1, $2); | |||
| 6340 | 10 | 15 | $p->{type} ||= looks_like_number($val) ? 'number' : 'integer'; | |||
| 6341 | ||||||
| 6342 | 10 | 36 | if ($op eq '<' || $op eq '<=') { | |||
| 6343 | # Only set max if it tightens the range | |||||
| 6344 | 1 | 1 | my $max = ($op eq '<') ? $val - 1 : $val; | |||
| 6345 | 1 | 3 | $p->{max} = $max if !defined($p->{max}) || $max < $p->{max}; | |||
| 6346 | } elsif ($op eq '>' || $op eq '>=') { | |||||
| 6347 | 9 | 13 | my $min = ($op eq '>') ? $val + 1 : $val; | |||
| 6348 | 9 | 22 | $p->{min} = $min if !defined($p->{min}) || $min > $p->{min}; | |||
| 6349 | } | |||||
| 6350 | } | |||||
| 6351 | ||||||
| 6352 | # Regex pattern matching with better capture | |||||
| 6353 | 174 | 9082 | if ($code =~ /\$$param\s*=~\s*((?:qr?\/[^\/]+\/|\$[\w:]+|\$\{\w+\}))/) { | |||
| 6354 | 1 | 1 | my $pattern = $1; | |||
| 6355 | 1 | 4 | $p->{type} ||= 'string'; | |||
| 6356 | ||||||
| 6357 | # Clean up the pattern if it's a straightforward regex | |||||
| 6358 | 1 | 2 | if ($pattern =~ /^qr?\/([^\/]+)\/$/) { | |||
| 6359 | 1 | 3 | $p->{matches} = "/$1/"; | |||
| 6360 | } else { | |||||
| 6361 | 0 | 0 | $p->{matches} = $pattern; | |||
| 6362 | } | |||||
| 6363 | 1 | 2 | $self->_log(" CODE: $param matches pattern: $p->{matches}"); | |||
| 6364 | } | |||||
| 6365 | } | |||||
| 6366 | ||||||
| 6367 | # -------------------------------------------------- | |||||
| 6368 | # _analyze_parameter_validation | |||||
| 6369 | # | |||||
| 6370 | # Purpose: Determine optionality and extract | |||||
| 6371 | # default values for a single parameter | |||||
| 6372 | # by analysing explicit required checks | |||||
| 6373 | # (die/croak unless defined) and default | |||||
| 6374 | # assignment patterns in the method body. | |||||
| 6375 | # | |||||
| 6376 | # Entry: $p_ref - reference to the parameter | |||||
| 6377 | # hashref (modified in place). | |||||
| 6378 | # $param - parameter name string. | |||||
| 6379 | # $code - method body source string. | |||||
| 6380 | # | |||||
| 6381 | # Exit: Returns nothing. Modifies the | |||||
| 6382 | # referenced parameter hashref. | |||||
| 6383 | # | |||||
| 6384 | # Side effects: Logs detections to stdout when | |||||
| 6385 | # verbose is set. | |||||
| 6386 | # | |||||
| 6387 | # Notes: Explicit required checks take highest | |||||
| 6388 | # priority and override any default | |||||
| 6389 | # value detected earlier. | |||||
| 6390 | # -------------------------------------------------- | |||||
| 6391 | sub _analyze_parameter_validation { | |||||
| 6392 | 175 | 235 | my ($self, $p_ref, $param, $code) = @_; | |||
| 6393 | 175 | 124 | my $p = $$p_ref; | |||
| 6394 | ||||||
| 6395 | # Required/optional checks | |||||
| 6396 | 175 | 114 | my $is_required = 0; | |||
| 6397 | ||||||
| 6398 | # Die/croak if not defined | |||||
| 6399 | 175 | 4418 | if ($code =~ /(?:die|croak|confess)\s+[^;]*unless\s+(?:defined\s+)?\$$param/s) { | |||
| 6400 | 24 | 25 | $is_required = 1; | |||
| 6401 | } | |||||
| 6402 | ||||||
| 6403 | # Extract default values with the new method | |||||
| 6404 | 175 | 277 | my $default_value = $self->_extract_default_value($param, $code); | |||
| 6405 | 175 | 251 | if (defined $default_value && !exists $p->{_default}) { | |||
| 6406 | 3 | 4 | $p->{optional} = 1; | |||
| 6407 | 3 | 3 | $p->{_default} = $default_value; | |||
| 6408 | ||||||
| 6409 | # Try to infer type from default value if not already set | |||||
| 6410 | 3 | 5 | unless ($p->{type}) { | |||
| 6411 | 2 | 3 | if (looks_like_number($default_value)) { | |||
| 6412 | 2 | 4 | $p->{type} = $default_value =~ /\./ ? 'number' : 'integer'; | |||
| 6413 | } elsif (ref($default_value) eq 'ARRAY') { | |||||
| 6414 | 0 | 0 | $p->{type} = 'arrayref'; | |||
| 6415 | } elsif (ref($default_value) eq 'HASH') { | |||||
| 6416 | 0 | 0 | $p->{type} = 'hashref'; | |||
| 6417 | } elsif ($default_value eq 'undef') { | |||||
| 6418 | 0 | 0 | $p->{type} = 'scalar'; # undef can be any scalar | |||
| 6419 | } elsif (defined $default_value && !ref($default_value)) { | |||||
| 6420 | 0 | 0 | $p->{type} = 'string'; | |||
| 6421 | } | |||||
| 6422 | } | |||||
| 6423 | ||||||
| 6424 | 3 | 6 | $self->_log(" CODE: $param has default value: " . (ref($default_value) ? ref($default_value) . ' ref' : $default_value)); | |||
| 6425 | } | |||||
| 6426 | ||||||
| 6427 | # Also check for simple default assignment without condition | |||||
| 6428 | # Pattern: $param = 'value'; | |||||
| 6429 | 175 | 3264 | if (!$default_value && !exists $p->{_default} && $code =~ /\$$param\s*=\s*([^;{}]+?)(?:\s*[;}])/s) { | |||
| 6430 | 9 | 13 | my $assignment = $1; | |||
| 6431 | # Make sure it's not part of a larger expression | |||||
| 6432 | 9 | 39 | if ($assignment !~ /\$$param/ && $assignment !~ /^shift/) { | |||
| 6433 | 9 | 8 | my $possible_default = $assignment; | |||
| 6434 | 9 | 8 | $possible_default =~ s/\s*;\s*$//; | |||
| 6435 | 9 | 14 | $possible_default = $self->_clean_default_value($possible_default); | |||
| 6436 | 9 | 10 | if (defined $possible_default) { | |||
| 6437 | 6 | 7 | $p->{_default} = $possible_default; | |||
| 6438 | 6 | 8 | $p->{optional} = 1; | |||
| 6439 | 6 | 12 | $self->_log(" CODE: $param has unconditional default: $possible_default"); | |||
| 6440 | } | |||||
| 6441 | } | |||||
| 6442 | } | |||||
| 6443 | ||||||
| 6444 | # Explicit required check overrides default detection | |||||
| 6445 | 175 | 290 | if ($is_required) { | |||
| 6446 | 24 | 28 | $p->{optional} = 0; | |||
| 6447 | 24 | 31 | delete $p->{_default} if exists $p->{_default}; | |||
| 6448 | 24 | 40 | $self->_log(" CODE: $param is required (validation check)"); | |||
| 6449 | } | |||||
| 6450 | } | |||||
| 6451 | ||||||
| 6452 | # -------------------------------------------------- | |||||
| 6453 | # _merge_parameter_analyses | |||||
| 6454 | # | |||||
| 6455 | # Purpose: Merge parameter information from POD, | |||||
| 6456 | # code, and signature analysis into a | |||||
| 6457 | # single authoritative parameter hashref | |||||
| 6458 | # for each parameter. | |||||
| 6459 | # | |||||
| 6460 | # Entry: $pod - hashref of parameters from POD | |||||
| 6461 | # analysis. | |||||
| 6462 | # $code - hashref of parameters from | |||||
| 6463 | # code analysis. | |||||
| 6464 | # $sig - hashref of parameters from | |||||
| 6465 | # signature analysis (optional, | |||||
| 6466 | # defaults to empty hashref). | |||||
| 6467 | # | |||||
| 6468 | # Exit: Returns a merged hashref of parameter | |||||
| 6469 | # name to spec hashref. Each spec has | |||||
| 6470 | # all available information combined, | |||||
| 6471 | # with POD taking highest priority, | |||||
| 6472 | # code second, and signature filling | |||||
| 6473 | # remaining gaps. | |||||
| 6474 | # | |||||
| 6475 | # Side effects: Logs merged parameter details to | |||||
| 6476 | # stdout when verbose is set. | |||||
| 6477 | # | |||||
| 6478 | # Notes: Position is determined by majority | |||||
| 6479 | # vote across all sources, with the | |||||
| 6480 | # lowest position winning ties. Optional | |||||
| 6481 | # status is determined by | |||||
| 6482 | # _determine_optional_status. Internal | |||||
| 6483 | # _source keys are stripped from the | |||||
| 6484 | # merged result. | |||||
| 6485 | # -------------------------------------------------- | |||||
| 6486 | sub _merge_parameter_analyses { | |||||
| 6487 | 269 | 349 | my ($self, $pod, $code, $sig) = @_; | |||
| 6488 | ||||||
| 6489 | 269 | 229 | my %merged; | |||
| 6490 | ||||||
| 6491 | # Start with all parameters from all sources | |||||
| 6492 | 269 241 | 430 275 | my %all_params = map { $_ => 1 } (keys %$pod, keys %$code, keys %$sig); | |||
| 6493 | ||||||
| 6494 | 269 | 315 | foreach my $param (keys %all_params) { | |||
| 6495 | 176 | 184 | my $p = $merged{$param} = {}; | |||
| 6496 | ||||||
| 6497 | # Collect position from all sources | |||||
| 6498 | 176 | 125 | my @positions; | |||
| 6499 | 176 | 268 | push @positions, $pod->{$param}{position} if $pod->{$param} && defined $pod->{$param}{position}; | |||
| 6500 | 176 | 197 | push @positions, $sig->{$param}{position} if $sig->{$param} && defined $sig->{$param}{position}; | |||
| 6501 | 176 | 327 | push @positions, $code->{$param}{position} if $code->{$param} && defined $code->{$param}{position}; | |||
| 6502 | ||||||
| 6503 | # Use the most common position, or lowest if tie | |||||
| 6504 | 176 | 156 | if (@positions) { | |||
| 6505 | 171 | 115 | my %pos_count; | |||
| 6506 | 171 | 285 | $pos_count{$_}++ for @positions; | |||
| 6507 | 171 0 | 189 0 | my ($best_pos) = sort { $pos_count{$b} <=> $pos_count{$a} || $a <=> $b } keys %pos_count; | |||
| 6508 | 171 | 309 | $p->{position} = $best_pos unless(exists($p->{position})); | |||
| 6509 | } | |||||
| 6510 | ||||||
| 6511 | # POD has highest priority for type info and explicit declarations | |||||
| 6512 | 176 | 179 | if ($pod->{$param}) { | |||
| 6513 | 67 67 | 89 111 | %$p = (%$p, %{$pod->{$param}}); | |||
| 6514 | } | |||||
| 6515 | ||||||
| 6516 | # Code analysis adds concrete evidence (but doesn't override POD explicit types) | |||||
| 6517 | 176 | 173 | if ($code->{$param}) { | |||
| 6518 | 174 174 | 114 206 | foreach my $key (keys %{$code->{$param}}) { | |||
| 6519 | 811 | 598 | next if $key eq '_source'; | |||
| 6520 | 644 | 543 | next if $key eq 'position'; | |||
| 6521 | ||||||
| 6522 | # Only override if POD didn't provide this info or it's a stronger signal | |||||
| 6523 | 475 | 309 | my $from_pod = exists $pod->{$param}; | |||
| 6524 | 475 | 500 | if (!exists $p->{$key} || | |||
| 6525 | ($key eq 'type' && $from_pod && $p->{type} eq 'string' && | |||||
| 6526 | $code->{$param}{$key} ne 'string')) { | |||||
| 6527 | 425 | 410 | $p->{$key} = $code->{$param}{$key}; | |||
| 6528 | } | |||||
| 6529 | } | |||||
| 6530 | } | |||||
| 6531 | ||||||
| 6532 | # Signature fills in remaining gaps | |||||
| 6533 | 176 | 191 | if ($sig->{$param}) { | |||
| 6534 | 0 0 | 0 0 | foreach my $key (keys %{$sig->{$param}}) { | |||
| 6535 | 0 | 0 | next if $key eq '_source'; | |||
| 6536 | 0 | 0 | next if $key eq 'position'; | |||
| 6537 | 0 | 0 | $p->{$key} //= $sig->{$param}{$key}; | |||
| 6538 | } | |||||
| 6539 | } | |||||
| 6540 | ||||||
| 6541 | # Handle optional field with better logic | |||||
| 6542 | 176 | 305 | $self->_determine_optional_status($p, $pod->{$param}, $code->{$param}); | |||
| 6543 | ||||||
| 6544 | # Clean up internal fields | |||||
| 6545 | 176 | 215 | delete $p->{_source}; | |||
| 6546 | } | |||||
| 6547 | ||||||
| 6548 | # Debug logging | |||||
| 6549 | 269 | 338 | if ($self->{verbose}) { | |||
| 6550 | 1 1 | 2 3 | foreach my $param (sort { ($merged{$a}{position} || 999) <=> ($merged{$b}{position} || 999) } keys %merged) { | |||
| 6551 | 2 | 2 | my $p = $merged{$param}; | |||
| 6552 | $self->_log(" MERGED $param: " . | |||||
| 6553 | 'pos=' . ($p->{position} || 'none') . | |||||
| 6554 | ", type=" . ($p->{type} || 'none') . | |||||
| 6555 | 2 | 5 | ", optional=" . (defined($p->{optional}) ? $p->{optional} : 'undef')); | |||
| 6556 | } | |||||
| 6557 | } | |||||
| 6558 | ||||||
| 6559 | 269 | 515 | return \%merged; | |||
| 6560 | } | |||||
| 6561 | ||||||
| 6562 | # -------------------------------------------------- | |||||
| 6563 | # _determine_optional_status | |||||
| 6564 | # | |||||
| 6565 | # Purpose: Set the optional field on a merged | |||||
| 6566 | # parameter spec based on evidence from | |||||
| 6567 | # POD and code analysis, with POD taking | |||||
| 6568 | # highest priority. | |||||
| 6569 | # | |||||
| 6570 | # Entry: $merged_param - the merged parameter | |||||
| 6571 | # hashref (modified in | |||||
| 6572 | # place). | |||||
| 6573 | # $pod_param - parameter spec from | |||||
| 6574 | # POD analysis, or undef. | |||||
| 6575 | # $code_param - parameter spec from | |||||
| 6576 | # code analysis, or undef. | |||||
| 6577 | # | |||||
| 6578 | # Exit: Returns nothing. Sets or leaves | |||||
| 6579 | # $merged_param->{optional}. | |||||
| 6580 | # | |||||
| 6581 | # Side effects: None. | |||||
| 6582 | # -------------------------------------------------- | |||||
| 6583 | sub _determine_optional_status { | |||||
| 6584 | 182 | 1192 | my ($self, $merged_param, $pod_param, $code_param) = @_; | |||
| 6585 | ||||||
| 6586 | 182 | 190 | my $pod_optional = $pod_param ? $pod_param->{optional} : undef; | |||
| 6587 | 182 | 169 | my $code_optional = $code_param ? $code_param->{optional} : undef; | |||
| 6588 | ||||||
| 6589 | # Explicit POD declaration wins | |||||
| 6590 | 182 | 218 | if (defined $pod_optional) { | |||
| 6591 | 16 | 18 | $merged_param->{optional} = $pod_optional; | |||
| 6592 | } | |||||
| 6593 | # Code validation evidence | |||||
| 6594 | elsif (defined $code_optional) { | |||||
| 6595 | 158 | 162 | $merged_param->{optional} = $code_optional; | |||
| 6596 | } | |||||
| 6597 | # Default: if we have any info about the param, assume required | |||||
| 6598 | elsif (keys %$merged_param > 0) { | |||||
| 6599 | 7 | 8 | $merged_param->{optional} = 0; | |||
| 6600 | } | |||||
| 6601 | # Otherwise leave undef (unknown) | |||||
| 6602 | } | |||||
| 6603 | ||||||
| 6604 | ||||||
| 6605 | # -------------------------------------------------- | |||||
| 6606 | # _calculate_input_confidence | |||||
| 6607 | # | |||||
| 6608 | # Purpose: Calculate a confidence score and level | |||||
| 6609 | # for the input parameter analysis, | |||||
| 6610 | # based on how much type, constraint, | |||||
| 6611 | # and semantic information was inferred | |||||
| 6612 | # for each parameter. | |||||
| 6613 | # | |||||
| 6614 | # Entry: $params - hashref of merged parameter | |||||
| 6615 | # specs as produced by | |||||
| 6616 | # _merge_parameter_analyses. | |||||
| 6617 | # | |||||
| 6618 | # Exit: Returns a hashref with keys: | |||||
| 6619 | # level - one of: none, | |||||
| 6620 | # very_low, low, | |||||
| 6621 | # medium, high | |||||
| 6622 | # score - numeric average | |||||
| 6623 | # across all params | |||||
| 6624 | # factors - arrayref of | |||||
| 6625 | # human-readable | |||||
| 6626 | # factor strings | |||||
| 6627 | # per_parameter - hashref of per- | |||||
| 6628 | # parameter score | |||||
| 6629 | # and factor detail | |||||
| 6630 | # Returns { level => 'none', ... } if | |||||
| 6631 | # no parameters were found. | |||||
| 6632 | # | |||||
| 6633 | # Side effects: None. | |||||
| 6634 | # -------------------------------------------------- | |||||
| 6635 | sub _calculate_input_confidence { | |||||
| 6636 | 70 | 1331 | my ($self, $params) = @_; | |||
| 6637 | ||||||
| 6638 | 70 | 48 | my @factors; # Track all confidence factors | |||
| 6639 | ||||||
| 6640 | 70 | 115 | return { level => 'none', factors => ['No parameters found'] } unless keys %$params; | |||
| 6641 | ||||||
| 6642 | 56 | 41 | my $total_score = 0; | |||
| 6643 | 56 | 37 | my $count = 0; | |||
| 6644 | 56 | 44 | my %param_details; # Store per-parameter analysis | |||
| 6645 | ||||||
| 6646 | 56 | 58 | foreach my $param (keys %$params) { | |||
| 6647 | 82 | 56 | my $p = $params->{$param}; | |||
| 6648 | 82 | 55 | my $score = 0; | |||
| 6649 | 82 | 52 | my @param_factors; | |||
| 6650 | ||||||
| 6651 | # Type information | |||||
| 6652 | 82 | 118 | if ($p->{type}) { | |||
| 6653 | 81 | 190 | if ($p->{type} eq 'string' && ($p->{min} || $p->{max} || $p->{matches})) { | |||
| 6654 | 6 | 5 | $score += 25; | |||
| 6655 | 6 | 5 | push @param_factors, "Type: constrained string (+25)"; | |||
| 6656 | } elsif ($p->{type} eq 'string') { | |||||
| 6657 | 14 | 9 | $score += 10; | |||
| 6658 | 14 | 34 | push @param_factors, "Type: plain string (+10)"; | |||
| 6659 | } else { | |||||
| 6660 | 61 | 44 | $score += 30; | |||
| 6661 | 61 | 76 | push @param_factors, "Type: $p->{type} (+30)"; | |||
| 6662 | } | |||||
| 6663 | } else { | |||||
| 6664 | 1 | 1 | push @param_factors, "No type information (-0)"; | |||
| 6665 | } | |||||
| 6666 | ||||||
| 6667 | # Constraints | |||||
| 6668 | 82 | 84 | if (defined $p->{min}) { | |||
| 6669 | 9 | 5 | $score += 15; | |||
| 6670 | 9 | 10 | push @param_factors, 'Has min constraint (+15)'; | |||
| 6671 | } | |||||
| 6672 | 82 | 66 | if (defined $p->{max}) { | |||
| 6673 | 8 | 5 | $score += 15; | |||
| 6674 | 8 | 11 | push @param_factors, "Has max constraint (+15)"; | |||
| 6675 | } | |||||
| 6676 | 82 | 77 | if (defined $p->{optional}) { | |||
| 6677 | 80 | 54 | $score += 20; | |||
| 6678 | 80 | 59 | push @param_factors, "Optional/required explicitly defined (+20)"; | |||
| 6679 | } | |||||
| 6680 | 82 | 82 | if ($p->{matches}) { | |||
| 6681 | 2 | 2 | $score += 20; | |||
| 6682 | 2 | 2 | push @param_factors, 'Has regex pattern constraint (+20)'; | |||
| 6683 | } | |||||
| 6684 | 82 | 79 | if ($p->{isa}) { | |||
| 6685 | 5 | 5 | $score += 25; | |||
| 6686 | 5 | 5 | push @param_factors, "Specific class constraint: $p->{isa} (+25)"; | |||
| 6687 | } | |||||
| 6688 | ||||||
| 6689 | # Position information | |||||
| 6690 | 82 | 75 | if (defined $p->{position}) { | |||
| 6691 | 79 | 51 | $score += 10; | |||
| 6692 | 79 | 83 | push @param_factors, "Position defined: $p->{position} (+10)"; | |||
| 6693 | } | |||||
| 6694 | ||||||
| 6695 | # Default value | |||||
| 6696 | 82 | 76 | if (exists $p->{_default}) { | |||
| 6697 | 19 | 38 | $score += 10; | |||
| 6698 | 19 | 11 | push @param_factors, "Has default value (+10)"; | |||
| 6699 | } | |||||
| 6700 | ||||||
| 6701 | # Semantic information | |||||
| 6702 | 82 | 71 | if ($p->{semantic}) { | |||
| 6703 | 15 | 12 | $score += 15; | |||
| 6704 | 15 | 15 | push @param_factors, "Semantic type: $p->{semantic} (+15)"; | |||
| 6705 | } | |||||
| 6706 | ||||||
| 6707 | 82 | 106 | $param_details{$param} = { | |||
| 6708 | score => $score, | |||||
| 6709 | factors => \@param_factors | |||||
| 6710 | }; | |||||
| 6711 | ||||||
| 6712 | 82 | 67 | $total_score += $score; | |||
| 6713 | 82 | 65 | $count++; | |||
| 6714 | } | |||||
| 6715 | ||||||
| 6716 | 56 | 100 | my $avg = $count ? ($total_score / $count) : 0; | |||
| 6717 | ||||||
| 6718 | # Build summary factors | |||||
| 6719 | 56 | 129 | push @factors, sprintf("Analyzed %d parameter%s", $count, $count == 1 ? '' : 's'); | |||
| 6720 | 56 | 250 | push @factors, sprintf("Average confidence score: %.1f", $avg); | |||
| 6721 | ||||||
| 6722 | # Add top contributing factors | |||||
| 6723 | 56 39 | 88 49 | my @sorted_params = sort { $param_details{$b}{score} <=> $param_details{$a}{score} } keys %param_details; | |||
| 6724 | ||||||
| 6725 | 56 | 61 | if (@sorted_params) { | |||
| 6726 | 56 | 50 | my $highest = $sorted_params[0]; | |||
| 6727 | 56 | 42 | my $highest_score = $param_details{$highest}{score}; | |||
| 6728 | 56 | 82 | push @factors, sprintf("Highest scoring parameter: \$$highest (score: %d)", $highest_score); | |||
| 6729 | ||||||
| 6730 | 56 | 68 | if (@sorted_params > 1) { | |||
| 6731 | 16 | 13 | my $lowest = $sorted_params[-1]; | |||
| 6732 | 16 | 21 | my $lowest_score = $param_details{$lowest}{score}; | |||
| 6733 | 16 | 21 | push @factors, sprintf("Lowest scoring parameter: \$$lowest (score: %d)", $lowest_score); | |||
| 6734 | } | |||||
| 6735 | } | |||||
| 6736 | ||||||
| 6737 | # Determine confidence level | |||||
| 6738 | 56 | 39 | my $level; | |||
| 6739 | 56 | 88 | if ($avg >= $CONFIDENCE_HIGH_THRESHOLD) { | |||
| 6740 | 43 | 129 | $level = $LEVEL_HIGH; | |||
| 6741 | 43 | 101 | push @factors, "High confidence: comprehensive type and constraint information"; | |||
| 6742 | } elsif ($avg >= $CONFIDENCE_MEDIUM_THRESHOLD) { | |||||
| 6743 | 11 | 49 | $level = $LEVEL_MEDIUM; | |||
| 6744 | 11 | 26 | push @factors, "Medium confidence: some type or constraint information present"; | |||
| 6745 | } elsif ($avg >= $CONFIDENCE_LOW_THRESHOLD) { | |||||
| 6746 | 0 | 0 | $level = $LEVEL_LOW; | |||
| 6747 | 0 | 0 | push @factors, "Low confidence: minimal type information"; | |||
| 6748 | } else { | |||||
| 6749 | 2 | 15 | $level = $LEVEL_VERY_LOW; | |||
| 6750 | 2 | 4 | push @factors, "Very low confidence: little to no type information"; | |||
| 6751 | } | |||||
| 6752 | ||||||
| 6753 | return { | |||||
| 6754 | 56 | 202 | level => $level, | |||
| 6755 | score => $avg, | |||||
| 6756 | factors => \@factors, | |||||
| 6757 | per_parameter => \%param_details | |||||
| 6758 | }; | |||||
| 6759 | } | |||||
| 6760 | ||||||
| 6761 | # -------------------------------------------------- | |||||
| 6762 | # _calculate_output_confidence | |||||
| 6763 | # | |||||
| 6764 | # Purpose: Calculate a confidence score and level | |||||
| 6765 | # for the output analysis based on how | |||||
| 6766 | # much return type, value, class, | |||||
| 6767 | # context, and error convention | |||||
| 6768 | # information was determined. | |||||
| 6769 | # | |||||
| 6770 | # Entry: $output - the output hashref as built | |||||
| 6771 | # by _analyze_output. | |||||
| 6772 | # | |||||
| 6773 | # Exit: Returns a hashref with keys: | |||||
| 6774 | # level - one of: none, very_low, | |||||
| 6775 | # low, medium, high | |||||
| 6776 | # score - numeric confidence score | |||||
| 6777 | # factors - arrayref of factor strings | |||||
| 6778 | # Returns { level => 'none', ... } if | |||||
| 6779 | # output is empty. | |||||
| 6780 | # | |||||
| 6781 | # Side effects: None. | |||||
| 6782 | # -------------------------------------------------- | |||||
| 6783 | sub _calculate_output_confidence { | |||||
| 6784 | 276 | 961 | my ($self, $output) = @_; | |||
| 6785 | ||||||
| 6786 | 276 | 194 | my @factors; | |||
| 6787 | ||||||
| 6788 | 276 | 341 | return { level => 'none', factors => ['No return information found'] } unless keys %$output; | |||
| 6789 | ||||||
| 6790 | 259 | 201 | my $score = 0; | |||
| 6791 | ||||||
| 6792 | # Type information | |||||
| 6793 | 259 | 305 | if ($output->{type}) { | |||
| 6794 | 254 | 192 | $score += 30; | |||
| 6795 | 254 | 389 | push @factors, "Return type defined: $output->{type} (+30)"; | |||
| 6796 | } else { | |||||
| 6797 | 5 | 7 | push @factors, 'No return type information (-0)'; | |||
| 6798 | } | |||||
| 6799 | ||||||
| 6800 | # Specific value known | |||||
| 6801 | 259 | 322 | if (defined $output->{value}) { | |||
| 6802 | 20 | 15 | $score += 30; | |||
| 6803 | 20 | 27 | push @factors, "Specific return value: $output->{value} (+30)"; | |||
| 6804 | } | |||||
| 6805 | ||||||
| 6806 | # Class information for objects | |||||
| 6807 | 259 | 257 | if ($output->{isa}) { | |||
| 6808 | 21 | 15 | $score += 30; | |||
| 6809 | 21 | 27 | push @factors, "Returns specific class: $output->{isa} (+30)"; | |||
| 6810 | } | |||||
| 6811 | ||||||
| 6812 | # Context-aware returns | |||||
| 6813 | 259 | 275 | if ($output->{_context_aware}) { | |||
| 6814 | 4 | 2 | $score += 20; | |||
| 6815 | 4 | 3 | push @factors, "Context-aware return (wantarray) (+20)"; | |||
| 6816 | ||||||
| 6817 | 4 | 4 | if ($output->{_list_context}) { | |||
| 6818 | 4 | 4 | push @factors, " List context: $output->{_list_context}{type}"; | |||
| 6819 | } | |||||
| 6820 | 4 | 4 | if ($output->{_scalar_context}) { | |||
| 6821 | 3 | 3 | push @factors, " Scalar context: $output->{_scalar_context}{type}"; | |||
| 6822 | } | |||||
| 6823 | } | |||||
| 6824 | ||||||
| 6825 | # Error handling information | |||||
| 6826 | 259 | 260 | if ($output->{_error_return}) { | |||
| 6827 | 13 | 12 | $score += 15; | |||
| 6828 | 13 | 12 | push @factors, "Error return convention documented: $output->{_error_return} (+15)"; | |||
| 6829 | } | |||||
| 6830 | ||||||
| 6831 | # Success/failure pattern | |||||
| 6832 | 259 | 296 | if ($output->{_success_failure_pattern}) { | |||
| 6833 | 3 | 1 | $score += 10; | |||
| 6834 | 3 | 3 | push @factors, 'Success/failure pattern detected (+10)'; | |||
| 6835 | } | |||||
| 6836 | ||||||
| 6837 | # Chainable methods | |||||
| 6838 | 259 | 269 | if ($output->{_returns_self}) { | |||
| 6839 | 6 | 4 | $score += 15; | |||
| 6840 | 6 | 5 | push @factors, "Chainable method (fluent interface) (+15)"; | |||
| 6841 | } | |||||
| 6842 | ||||||
| 6843 | # Void context | |||||
| 6844 | 259 | 289 | if ($output->{_void_context}) { | |||
| 6845 | 5 | 4 | $score += 20; | |||
| 6846 | 5 | 4 | push @factors, "Void context method (no meaningful return) (+20)"; | |||
| 6847 | } | |||||
| 6848 | ||||||
| 6849 | # Exception handling | |||||
| 6850 | 259 | 278 | if ($output->{_error_handling} && $output->{_error_handling}{exception_handling}) { | |||
| 6851 | 2 | 2 | $score += 10; | |||
| 6852 | 2 | 2 | push @factors, 'Exception handling present (+10)'; | |||
| 6853 | } | |||||
| 6854 | ||||||
| 6855 | 259 | 455 | push @factors, sprintf("Total output confidence score: %d", $score); | |||
| 6856 | ||||||
| 6857 | # Determine confidence level | |||||
| 6858 | 259 | 210 | my $level; | |||
| 6859 | 259 | 521 | if ($score >= $CONFIDENCE_HIGH_THRESHOLD) { | |||
| 6860 | 44 | 134 | $level = $LEVEL_HIGH; | |||
| 6861 | 44 | 103 | push @factors, "High confidence: detailed return type and behavior"; | |||
| 6862 | } elsif ($score >= $CONFIDENCE_MEDIUM_THRESHOLD) { | |||||
| 6863 | 15 | 63 | $level = $LEVEL_MEDIUM; | |||
| 6864 | 15 | 31 | push @factors, "Medium confidence: return type defined"; | |||
| 6865 | } elsif ($score >= $CONFIDENCE_LOW_THRESHOLD) { | |||||
| 6866 | 199 | 1296 | $level = $LEVEL_LOW; | |||
| 6867 | 199 | 486 | push @factors, "Low confidence: minimal return information"; | |||
| 6868 | } else { | |||||
| 6869 | 1 | 6 | $level = $LEVEL_VERY_LOW; | |||
| 6870 | 1 | 2 | push @factors, 'Very low confidence: little return information'; | |||
| 6871 | } | |||||
| 6872 | ||||||
| 6873 | return { | |||||
| 6874 | 259 | 723 | level => $level, | |||
| 6875 | score => $score, | |||||
| 6876 | factors => \@factors | |||||
| 6877 | }; | |||||
| 6878 | } | |||||
| 6879 | ||||||
| 6880 | # -------------------------------------------------- | |||||
| 6881 | # _generate_confidence_report | |||||
| 6882 | # | |||||
| 6883 | # Purpose: Generate a human-readable text report | |||||
| 6884 | # of all confidence factors for a | |||||
| 6885 | # schema, for debugging and review | |||||
| 6886 | # purposes. | |||||
| 6887 | # | |||||
| 6888 | # Entry: $schema - schema hashref containing | |||||
| 6889 | # a populated _analysis key. | |||||
| 6890 | # | |||||
| 6891 | # Exit: Returns a multi-line string report, | |||||
| 6892 | # or nothing if $schema->{_analysis} | |||||
| 6893 | # is absent. | |||||
| 6894 | # | |||||
| 6895 | # Side effects: None. | |||||
| 6896 | # -------------------------------------------------- | |||||
| 6897 | sub _generate_confidence_report | |||||
| 6898 | { | |||||
| 6899 | 3 | 12 | my ($self, $schema) = @_; | |||
| 6900 | ||||||
| 6901 | 3 | 6 | return unless $schema->{_analysis}; | |||
| 6902 | ||||||
| 6903 | 2 | 2 | my $analysis = $schema->{_analysis}; | |||
| 6904 | 2 | 1 | my @report; | |||
| 6905 | ||||||
| 6906 | 2 | 6 | push @report, "Confidence Analysis for " . ($schema->{method_name} || 'method'); | |||
| 6907 | 2 | 2 | push @report, '=' x 60; | |||
| 6908 | 2 | 1 | push @report, ''; | |||
| 6909 | ||||||
| 6910 | 2 | 3 | push @report, "Overall Confidence: " . uc($analysis->{overall_confidence}); | |||
| 6911 | 2 | 2 | push @report, ''; | |||
| 6912 | ||||||
| 6913 | 2 | 2 | if ($analysis->{confidence_factors}{input}) { | |||
| 6914 | push @report, ( | |||||
| 6915 | "Input Parameters:", | |||||
| 6916 | " Confidence Level: " . uc($analysis->{input_confidence}) | |||||
| 6917 | 2 | 3 | ); | |||
| 6918 | 2 2 | 2 4 | foreach my $factor (@{$analysis->{confidence_factors}{input}}) { | |||
| 6919 | 2 | 2 | push @report, " - $factor"; | |||
| 6920 | } | |||||
| 6921 | 2 | 2 | push @report, ''; | |||
| 6922 | } | |||||
| 6923 | ||||||
| 6924 | 2 | 3 | if ($analysis->{confidence_factors}{output}) { | |||
| 6925 | push @report, 'Return Value:', | |||||
| 6926 | 2 | 2 | " Confidence Level: " . uc($analysis->{output_confidence}); | |||
| 6927 | 2 2 | 3 2 | foreach my $factor (@{$analysis->{confidence_factors}{output}}) { | |||
| 6928 | 2 | 2 | push @report, " - $factor"; | |||
| 6929 | } | |||||
| 6930 | 2 | 2 | push @report, ''; | |||
| 6931 | } | |||||
| 6932 | ||||||
| 6933 | 2 | 3 | if ($analysis->{per_parameter_scores}) { | |||
| 6934 | 0 | 0 | push @report, 'Per-Parameter Analysis:'; | |||
| 6935 | 0 0 | 0 0 | foreach my $param (sort keys %{$analysis->{per_parameter_scores}}) { | |||
| 6936 | 0 | 0 | my $details = $analysis->{per_parameter_scores}{$param}; | |||
| 6937 | 0 | 0 | push @report, " \$$param (score: $details->{score}):"; | |||
| 6938 | 0 0 | 0 0 | foreach my $factor (@{$details->{factors}}) { | |||
| 6939 | 0 | 0 | push @report, " - $factor"; | |||
| 6940 | } | |||||
| 6941 | } | |||||
| 6942 | 0 | 0 | push @report, ''; | |||
| 6943 | } | |||||
| 6944 | ||||||
| 6945 | 2 | 4 | return join("\n", @report); | |||
| 6946 | } | |||||
| 6947 | ||||||
| 6948 | # -------------------------------------------------- | |||||
| 6949 | # _generate_notes | |||||
| 6950 | # | |||||
| 6951 | # Purpose: Generate human-readable advisory notes | |||||
| 6952 | # about parameters whose type or | |||||
| 6953 | # optionality could not be determined, | |||||
| 6954 | # to guide manual schema review. | |||||
| 6955 | # | |||||
| 6956 | # Entry: $params - hashref of merged parameter | |||||
| 6957 | # specs. | |||||
| 6958 | # | |||||
| 6959 | # Exit: Returns an arrayref of note strings. | |||||
| 6960 | # Returns an empty arrayref if all | |||||
| 6961 | # parameters have known types and | |||||
| 6962 | # optionality. | |||||
| 6963 | # | |||||
| 6964 | # Side effects: None. | |||||
| 6965 | # -------------------------------------------------- | |||||
| 6966 | sub _generate_notes { | |||||
| 6967 | 271 | 1387 | my ($self, $params) = @_; | |||
| 6968 | ||||||
| 6969 | 271 | 200 | my @notes; | |||
| 6970 | ||||||
| 6971 | 271 | 376 | foreach my $param (keys %$params) { | |||
| 6972 | 173 | 130 | my $p = $params->{$param}; | |||
| 6973 | ||||||
| 6974 | 173 | 171 | unless ($p->{type}) { | |||
| 6975 | 53 | 54 | push @notes, "$param: type unknown - please review - will set to 'string' as a default"; | |||
| 6976 | } | |||||
| 6977 | ||||||
| 6978 | 173 | 183 | unless (defined $p->{optional}) { | |||
| 6979 | 11 | 14 | push @notes, "$param: optional status unknown"; | |||
| 6980 | # Don't automatically set - let it be undef if we don't know | |||||
| 6981 | } | |||||
| 6982 | } | |||||
| 6983 | ||||||
| 6984 | 271 | 378 | return \@notes; | |||
| 6985 | } | |||||
| 6986 | ||||||
| 6987 | # -------------------------------------------------- | |||||
| 6988 | # _set_defaults | |||||
| 6989 | # | |||||
| 6990 | # Purpose: Apply default type values to any | |||||
| 6991 | # parameters in a schema mode (input | |||||
| 6992 | # or output) whose type was not set | |||||
| 6993 | # during analysis, setting them to | |||||
| 6994 | # 'string' as a conservative fallback. | |||||
| 6995 | # | |||||
| 6996 | # Entry: $schema - the schema hashref being | |||||
| 6997 | # built by _analyze_method. | |||||
| 6998 | # $mode - either 'input' or 'output'. | |||||
| 6999 | # | |||||
| 7000 | # Exit: Returns nothing. Modifies $schema in | |||||
| 7001 | # place by setting type => 'string' on | |||||
| 7002 | # any parameter that lacks a type, and | |||||
| 7003 | # downgrading input confidence to 'low'. | |||||
| 7004 | # | |||||
| 7005 | # Side effects: Logs type defaulting to stdout when | |||||
| 7006 | # verbose is set. | |||||
| 7007 | # | |||||
| 7008 | # Notes: Called after all analysis is complete | |||||
| 7009 | # so that genuine type unknowns can be | |||||
| 7010 | # distinguished from analysis gaps. | |||||
| 7011 | # -------------------------------------------------- | |||||
| 7012 | sub _set_defaults { | |||||
| 7013 | 534 | 462 | my ($self, $schema, $mode) = @_; | |||
| 7014 | ||||||
| 7015 | 534 | 406 | my $params = $schema->{$mode}; | |||
| 7016 | ||||||
| 7017 | 534 | 568 | foreach my $param (keys %$params) { | |||
| 7018 | 665 | 481 | my $p = $params->{$param}; | |||
| 7019 | ||||||
| 7020 | 665 | 769 | next unless(ref($p) eq 'HASH'); | |||
| 7021 | 192 | 210 | unless ($p->{type}) { | |||
| 7022 | 67 | 95 | $self->_log(" DEBUG {$mode}{$param}: Setting to 'string' as a default"); | |||
| 7023 | 67 | 59 | $p->{'type'} = 'string'; | |||
| 7024 | 67 | 91 | $schema->{_confidence}{$mode}->{level} = 'low'; # Setting a default means it's a guess | |||
| 7025 | } | |||||
| 7026 | } | |||||
| 7027 | } | |||||
| 7028 | ||||||
| 7029 | # -------------------------------------------------- | |||||
| 7030 | # _analyze_relationships | |||||
| 7031 | # | |||||
| 7032 | # Purpose: Detect inter-parameter relationships | |||||
| 7033 | # in a method's source code, including | |||||
| 7034 | # mutually exclusive parameters, required | |||||
| 7035 | # groups, conditional requirements, | |||||
| 7036 | # dependencies, and value-based | |||||
| 7037 | # constraints. | |||||
| 7038 | # | |||||
| 7039 | # Entry: $method - method hashref containing | |||||
| 7040 | # at minimum a 'body' key | |||||
| 7041 | # with the source string. | |||||
| 7042 | # | |||||
| 7043 | # Exit: Returns an arrayref of relationship | |||||
| 7044 | # hashrefs. Returns an empty arrayref | |||||
| 7045 | # if no parameters or no relationships | |||||
| 7046 | # are found. | |||||
| 7047 | # | |||||
| 7048 | # Side effects: Logs detections to stdout when | |||||
| 7049 | # verbose is set. | |||||
| 7050 | # | |||||
| 7051 | # Notes: Parameter names are extracted from | |||||
| 7052 | # the my (...) = @_ pattern only â | |||||
| 7053 | # shift-style parameters are not | |||||
| 7054 | # currently analysed for relationships. | |||||
| 7055 | # -------------------------------------------------- | |||||
| 7056 | sub _analyze_relationships { | |||||
| 7057 | 270 | 285 | my ($self, $method) = @_; | |||
| 7058 | ||||||
| 7059 | 270 | 254 | my $code = $method->{body}; | |||
| 7060 | 270 | 203 | my @relationships; | |||
| 7061 | ||||||
| 7062 | # Extract all parameter names from the method | |||||
| 7063 | my @param_names; | |||||
| 7064 | 270 | 579 | if ($code =~ /my\s*\(\s*\$\w+\s*,\s*(.+?)\)\s*=\s*\@_/s) { | |||
| 7065 | 113 | 125 | my $params = $1; | |||
| 7066 | 113 | 224 | @param_names = $params =~ /\$(\w+)/g; | |||
| 7067 | } | |||||
| 7068 | ||||||
| 7069 | 270 | 323 | return [] unless @param_names; | |||
| 7070 | ||||||
| 7071 | # Detect mutually exclusive parameters | |||||
| 7072 | 99 99 | 68 114 | push @relationships, @{$self->_detect_mutually_exclusive($code, \@param_names)}; | |||
| 7073 | ||||||
| 7074 | # Detect required groups (OR logic) | |||||
| 7075 | 99 99 | 91 120 | push @relationships, @{$self->_detect_required_groups($code, \@param_names)}; | |||
| 7076 | ||||||
| 7077 | # Detect conditional requirements (IF-THEN) | |||||
| 7078 | 99 99 | 74 124 | push @relationships, @{$self->_detect_conditional_requirements($code, \@param_names)}; | |||
| 7079 | ||||||
| 7080 | # Detect dependencies | |||||
| 7081 | 99 99 | 71 115 | push @relationships, @{$self->_detect_dependencies($code, \@param_names)}; | |||
| 7082 | ||||||
| 7083 | # Detect value-based constraints | |||||
| 7084 | 99 99 | 74 120 | push @relationships, @{$self->_detect_value_constraints($code, \@param_names)}; | |||
| 7085 | ||||||
| 7086 | # Deduplicate relationships | |||||
| 7087 | 99 | 120 | my @unique = $self->_deduplicate_relationships(\@relationships); | |||
| 7088 | ||||||
| 7089 | 99 | 111 | return \@unique; | |||
| 7090 | } | |||||
| 7091 | ||||||
| 7092 | # -------------------------------------------------- | |||||
| 7093 | # _deduplicate_relationships | |||||
| 7094 | # | |||||
| 7095 | # Purpose: Remove duplicate relationship entries | |||||
| 7096 | # from the relationships list by | |||||
| 7097 | # computing a canonical signature for | |||||
| 7098 | # each relationship type. | |||||
| 7099 | # | |||||
| 7100 | # Entry: $relationships - arrayref of | |||||
| 7101 | # relationship hashrefs. | |||||
| 7102 | # | |||||
| 7103 | # Exit: Returns a deduplicated list of | |||||
| 7104 | # relationship hashrefs. | |||||
| 7105 | # | |||||
| 7106 | # Side effects: None. | |||||
| 7107 | # -------------------------------------------------- | |||||
| 7108 | sub _deduplicate_relationships { | |||||
| 7109 | 103 | 1059 | my ($self, $relationships) = @_; | |||
| 7110 | ||||||
| 7111 | 103 | 77 | my @unique; | |||
| 7112 | my %seen; | |||||
| 7113 | ||||||
| 7114 | 103 | 81 | foreach my $rel (@$relationships) { | |||
| 7115 | # Create a signature for this relationship | |||||
| 7116 | 26 | 14 | my $sig; | |||
| 7117 | 26 | 39 | if ($rel->{type} eq 'mutually_exclusive') { | |||
| 7118 | 11 11 | 9 19 | $sig = join(':', 'mutex', sort @{$rel->{params}}); | |||
| 7119 | } elsif ($rel->{type} eq 'required_group') { | |||||
| 7120 | 5 5 | 5 9 | $sig = join(':', 'reqgroup', sort @{$rel->{params}}); | |||
| 7121 | } elsif ($rel->{type} eq 'conditional_requirement') { | |||||
| 7122 | 7 | 7 | $sig = join(':', 'condreq', $rel->{if}, $rel->{then_required}); | |||
| 7123 | } elsif ($rel->{type} eq 'dependency') { | |||||
| 7124 | 0 | 0 | $sig = join(':', 'dep', $rel->{param}, $rel->{requires}); | |||
| 7125 | } elsif ($rel->{type} eq 'value_constraint') { | |||||
| 7126 | 2 | 2 | $sig = join(':', 'valcon', $rel->{if}, $rel->{then}, $rel->{operator}, $rel->{value}); | |||
| 7127 | } elsif ($rel->{type} eq 'value_conditional') { | |||||
| 7128 | 1 | 3 | $sig = join(':', 'valcond', $rel->{if}, $rel->{equals}, $rel->{then_required}); | |||
| 7129 | } else { | |||||
| 7130 | 0 | 0 | $sig = join(':', $rel->{type}, %$rel); | |||
| 7131 | } | |||||
| 7132 | ||||||
| 7133 | 26 | 33 | unless ($seen{$sig}++) { | |||
| 7134 | 20 | 17 | push @unique, $rel; | |||
| 7135 | } | |||||
| 7136 | } | |||||
| 7137 | ||||||
| 7138 | 103 | 127 | return @unique; | |||
| 7139 | } | |||||
| 7140 | ||||||
| 7141 | # -------------------------------------------------- | |||||
| 7142 | # _detect_mutually_exclusive | |||||
| 7143 | # | |||||
| 7144 | # Purpose: Detect pairs of parameters that cannot | |||||
| 7145 | # be specified together, by searching | |||||
| 7146 | # for die/croak/confess patterns | |||||
| 7147 | # that fire when both are truthy. | |||||
| 7148 | # | |||||
| 7149 | # Entry: $code - method body source string. | |||||
| 7150 | # $param_names - arrayref of parameter | |||||
| 7151 | # name strings. | |||||
| 7152 | # | |||||
| 7153 | # Exit: Returns an arrayref of relationship | |||||
| 7154 | # hashrefs of type 'mutually_exclusive'. | |||||
| 7155 | # Returns an empty arrayref if none found. | |||||
| 7156 | # | |||||
| 7157 | # Side effects: Logs detections to stdout when | |||||
| 7158 | # verbose is set. | |||||
| 7159 | # -------------------------------------------------- | |||||
| 7160 | sub _detect_mutually_exclusive { | |||||
| 7161 | 104 | 1511 | my ($self, $code, $param_names) = @_; | |||
| 7162 | ||||||
| 7163 | 104 | 71 | my @relationships; | |||
| 7164 | ||||||
| 7165 | # Pattern 1: die/croak if $x && $y | |||||
| 7166 | # Look for: die/croak ... if $param1 && $param2 | |||||
| 7167 | 104 | 98 | foreach my $param1 (@$param_names) { | |||
| 7168 | 151 | 115 | foreach my $param2 (@$param_names) { | |||
| 7169 | 299 | 320 | next if $param1 eq $param2; | |||
| 7170 | ||||||
| 7171 | # Check various patterns | |||||
| 7172 | 148 | 6478 | if ($code =~ /(?:die|croak|confess)[^;]*if\s+\$$param1\s+&&\s+\$$param2/ || | |||
| 7173 | $code =~ /(?:die|croak|confess)[^;]*if\s+\$$param2\s+&&\s+\$$param1/) { | |||||
| 7174 | ||||||
| 7175 | # Avoid duplicates (param1,param2 vs param2,param1) | |||||
| 7176 | 18 | 16 | my $found_reverse = 0; | |||
| 7177 | 18 | 17 | foreach my $rel (@relationships) { | |||
| 7178 | 11 | 28 | if ($rel->{type} eq 'mutually_exclusive' && | |||
| 7179 | (($rel->{params}[0] eq $param2 && $rel->{params}[1] eq $param1))) { | |||||
| 7180 | 9 | 7 | $found_reverse = 1; | |||
| 7181 | 9 | 6 | last; | |||
| 7182 | } | |||||
| 7183 | } | |||||
| 7184 | ||||||
| 7185 | 18 | 27 | next if $found_reverse; | |||
| 7186 | ||||||
| 7187 | 9 | 22 | push @relationships, { | |||
| 7188 | type => 'mutually_exclusive', | |||||
| 7189 | params => [$param1, $param2], | |||||
| 7190 | description => "Cannot specify both $param1 and $param2" | |||||
| 7191 | }; | |||||
| 7192 | ||||||
| 7193 | 9 | 16 | $self->_log(" RELATIONSHIP: $param1 and $param2 are mutually exclusive"); | |||
| 7194 | } | |||||
| 7195 | ||||||
| 7196 | # Pattern 2: die "Cannot specify both X and Y" | |||||
| 7197 | 139 | 12729 | if ($code =~ /(?:die|croak|confess)\s+['"](Cannot|Can't)[^'"]*both[^'"]*$param1[^'"]*$param2/i || | |||
| 7198 | $code =~ /(?:die|croak|confess)\s+['"](Cannot|Can't)[^'"]*both[^'"]*$param2[^'"]*$param1/i) { | |||||
| 7199 | ||||||
| 7200 | 1 | 1 | my $found_reverse = 0; | |||
| 7201 | 1 | 1 | foreach my $rel (@relationships) { | |||
| 7202 | 1 | 15 | if ($rel->{type} eq 'mutually_exclusive' && | |||
| 7203 | (($rel->{params}[0] eq $param2 && $rel->{params}[1] eq $param1))) { | |||||
| 7204 | 0 | 0 | $found_reverse = 1; | |||
| 7205 | 0 | 0 | last; | |||
| 7206 | } | |||||
| 7207 | } | |||||
| 7208 | ||||||
| 7209 | 1 | 2 | next if $found_reverse; | |||
| 7210 | ||||||
| 7211 | 1 | 2 | push @relationships, { | |||
| 7212 | type => 'mutually_exclusive', | |||||
| 7213 | params => [$param1, $param2], | |||||
| 7214 | description => "Cannot specify both $param1 and $param2" | |||||
| 7215 | }; | |||||
| 7216 | ||||||
| 7217 | 1 | 1 | $self->_log(" RELATIONSHIP: $param1 and $param2 are mutually exclusive (from error message)"); | |||
| 7218 | } | |||||
| 7219 | } | |||||
| 7220 | } | |||||
| 7221 | ||||||
| 7222 | 104 | 139 | return \@relationships; | |||
| 7223 | } | |||||
| 7224 | ||||||
| 7225 | # -------------------------------------------------- | |||||
| 7226 | # _detect_required_groups | |||||
| 7227 | # | |||||
| 7228 | # Purpose: Detect parameter groups where at least | |||||
| 7229 | # one parameter must be specified (OR | |||||
| 7230 | # logic), by searching for die/croak | |||||
| 7231 | # patterns that fire unless any of the | |||||
| 7232 | # group is truthy. | |||||
| 7233 | # | |||||
| 7234 | # Entry: $code - method body source string. | |||||
| 7235 | # $param_names - arrayref of parameter | |||||
| 7236 | # name strings. | |||||
| 7237 | # | |||||
| 7238 | # Exit: Returns an arrayref of relationship | |||||
| 7239 | # hashrefs of type 'required_group'. | |||||
| 7240 | # Returns an empty arrayref if none found. | |||||
| 7241 | # | |||||
| 7242 | # Side effects: Logs detections to stdout when | |||||
| 7243 | # verbose is set. | |||||
| 7244 | # -------------------------------------------------- | |||||
| 7245 | sub _detect_required_groups { | |||||
| 7246 | 102 | 1085 | my ($self, $code, $param_names) = @_; | |||
| 7247 | ||||||
| 7248 | 102 | 67 | my @relationships; | |||
| 7249 | ||||||
| 7250 | # Pattern 1: die/croak unless $x || $y | |||||
| 7251 | 102 | 84 | foreach my $param1 (@$param_names) { | |||
| 7252 | 147 | 123 | foreach my $param2 (@$param_names) { | |||
| 7253 | 291 | 303 | next if $param1 eq $param2; | |||
| 7254 | ||||||
| 7255 | 144 | 5751 | if ($code =~ /(?:die|croak|confess)[^;]*unless\s+\$$param1\s+\|\|\s+\$$param2/ || | |||
| 7256 | $code =~ /(?:die|croak|confess)[^;]*unless\s+\$$param2\s+\|\|\s+\$$param1/) { | |||||
| 7257 | ||||||
| 7258 | # Avoid duplicates | |||||
| 7259 | 10 | 8 | my $found_reverse = 0; | |||
| 7260 | 10 | 12 | foreach my $rel (@relationships) { | |||
| 7261 | 5 | 14 | if ($rel->{type} eq 'required_group' && | |||
| 7262 | (($rel->{params}[0] eq $param2 && $rel->{params}[1] eq $param1))) { | |||||
| 7263 | 5 | 3 | $found_reverse = 1; | |||
| 7264 | 5 | 5 | last; | |||
| 7265 | } | |||||
| 7266 | } | |||||
| 7267 | ||||||
| 7268 | 10 | 14 | next if $found_reverse; | |||
| 7269 | ||||||
| 7270 | 5 | 14 | push @relationships, { | |||
| 7271 | type => 'required_group', | |||||
| 7272 | params => [$param1, $param2], | |||||
| 7273 | logic => 'or', | |||||
| 7274 | description => "Must specify either $param1 or $param2" | |||||
| 7275 | }; | |||||
| 7276 | ||||||
| 7277 | 5 | 12 | $self->_log(" RELATIONSHIP: Must specify either $param1 or $param2"); | |||
| 7278 | } | |||||
| 7279 | ||||||
| 7280 | # Pattern 2: die "Must specify either X or Y" | |||||
| 7281 | 139 | 12900 | if ($code =~ /(?:die|croak|confess)\s+['"]Must\s+specify\s+either[^'"]*$param1[^'"]*or[^'"]*$param2/i || | |||
| 7282 | $code =~ /(?:die|croak|confess)\s+['"]Must\s+specify\s+either[^'"]*$param2[^'"]*or[^'"]*$param1/i) { | |||||
| 7283 | ||||||
| 7284 | 1 | 1 | my $found_reverse = 0; | |||
| 7285 | 1 | 1 | foreach my $rel (@relationships) { | |||
| 7286 | 1 | 3 | if ($rel->{type} eq 'required_group' && | |||
| 7287 | (($rel->{params}[0] eq $param2 && $rel->{params}[1] eq $param1))) { | |||||
| 7288 | 0 | 0 | $found_reverse = 1; | |||
| 7289 | 0 | 0 | last; | |||
| 7290 | } | |||||
| 7291 | } | |||||
| 7292 | ||||||
| 7293 | 1 | 1 | next if $found_reverse; | |||
| 7294 | ||||||
| 7295 | 1 | 3 | push @relationships, { | |||
| 7296 | type => 'required_group', | |||||
| 7297 | params => [$param1, $param2], | |||||
| 7298 | logic => 'or', | |||||
| 7299 | description => "Must specify either $param1 or $param2" | |||||
| 7300 | }; | |||||
| 7301 | ||||||
| 7302 | 1 | 2 | $self->_log(" RELATIONSHIP: Must specify either $param1 or $param2 (from error message)"); | |||
| 7303 | } | |||||
| 7304 | } | |||||
| 7305 | } | |||||
| 7306 | ||||||
| 7307 | 102 | 101 | return \@relationships; | |||
| 7308 | } | |||||
| 7309 | ||||||
| 7310 | # -------------------------------------------------- | |||||
| 7311 | # _detect_conditional_requirements | |||||
| 7312 | # | |||||
| 7313 | # Purpose: Detect IF-THEN parameter relationships | |||||
| 7314 | # where one parameter being present | |||||
| 7315 | # makes another required, by searching | |||||
| 7316 | # for die/croak patterns of the form | |||||
| 7317 | # 'die if $x && !$y'. | |||||
| 7318 | # | |||||
| 7319 | # Entry: $code - method body source string. | |||||
| 7320 | # $param_names - arrayref of parameter | |||||
| 7321 | # name strings. | |||||
| 7322 | # | |||||
| 7323 | # Exit: Returns an arrayref of relationship | |||||
| 7324 | # hashrefs of type | |||||
| 7325 | # 'conditional_requirement'. | |||||
| 7326 | # Returns an empty arrayref if none found. | |||||
| 7327 | # | |||||
| 7328 | # Side effects: Logs detections to stdout when | |||||
| 7329 | # verbose is set. | |||||
| 7330 | # -------------------------------------------------- | |||||
| 7331 | sub _detect_conditional_requirements { | |||||
| 7332 | 102 | 1369 | my ($self, $code, $param_names) = @_; | |||
| 7333 | ||||||
| 7334 | 102 | 74 | my @relationships; | |||
| 7335 | ||||||
| 7336 | 102 | 75 | foreach my $param1 (@$param_names) { | |||
| 7337 | 147 | 119 | foreach my $param2 (@$param_names) { | |||
| 7338 | 291 | 292 | next if $param1 eq $param2; | |||
| 7339 | ||||||
| 7340 | # Pattern 1: die if $x && !$y (if x then y required) | |||||
| 7341 | 144 | 3111 | if ($code =~ /(?:die|croak|confess)[^;]*if\s+\$$param1\s+&&\s+!\$$param2/) { | |||
| 7342 | 5 | 15 | push @relationships, { | |||
| 7343 | type => 'conditional_requirement', | |||||
| 7344 | if => $param1, | |||||
| 7345 | then_required => $param2, | |||||
| 7346 | description => "When $param1 is specified, $param2 is required" | |||||
| 7347 | }; | |||||
| 7348 | ||||||
| 7349 | 5 | 8 | $self->_log(" RELATIONSHIP: $param1 requires $param2"); | |||
| 7350 | } | |||||
| 7351 | ||||||
| 7352 | # Pattern 2: die if $x && !defined($y) | |||||
| 7353 | 144 | 4377 | if ($code =~ /(?:die|croak|confess)[^;]*if\s+\$$param1\s+&&\s+!defined\s*\(\s*\$$param2\s*\)/) { | |||
| 7354 | 0 | 0 | push @relationships, { | |||
| 7355 | type => 'conditional_requirement', | |||||
| 7356 | if => $param1, | |||||
| 7357 | then_required => $param2, | |||||
| 7358 | description => "When $param1 is specified, $param2 is required" | |||||
| 7359 | }; | |||||
| 7360 | ||||||
| 7361 | 0 | 0 | $self->_log(" RELATIONSHIP: $param1 requires $param2 (defined check)"); | |||
| 7362 | } | |||||
| 7363 | ||||||
| 7364 | # Pattern 3: Error message "X requires Y" | |||||
| 7365 | 144 | 6419 | if ($code =~ /(?:die|croak|confess)\s+['"]\w*$param1[^'"]*requires[^'"]*$param2/i) { | |||
| 7366 | 4 | 8 | push @relationships, { | |||
| 7367 | type => 'conditional_requirement', | |||||
| 7368 | if => $param1, | |||||
| 7369 | then_required => $param2, | |||||
| 7370 | description => "When $param1 is specified, $param2 is required" | |||||
| 7371 | }; | |||||
| 7372 | ||||||
| 7373 | 4 | 5 | $self->_log(" RELATIONSHIP: $param1 requires $param2 (from error message)"); | |||
| 7374 | } | |||||
| 7375 | } | |||||
| 7376 | } | |||||
| 7377 | ||||||
| 7378 | 102 | 99 | return \@relationships; | |||
| 7379 | } | |||||
| 7380 | ||||||
| 7381 | # -------------------------------------------------- | |||||
| 7382 | # _detect_dependencies | |||||
| 7383 | # | |||||
| 7384 | # Purpose: Detect simple parameter dependencies | |||||
| 7385 | # where one parameter requires another | |||||
| 7386 | # to also be present, by combining | |||||
| 7387 | # error message pattern matching with | |||||
| 7388 | # code condition matching. | |||||
| 7389 | # | |||||
| 7390 | # Entry: $code - method body source string. | |||||
| 7391 | # $param_names - arrayref of parameter | |||||
| 7392 | # name strings. | |||||
| 7393 | # | |||||
| 7394 | # Exit: Returns an arrayref of relationship | |||||
| 7395 | # hashrefs of type 'dependency'. | |||||
| 7396 | # Returns an empty arrayref if none found. | |||||
| 7397 | # | |||||
| 7398 | # Side effects: Logs detections to stdout when | |||||
| 7399 | # verbose is set. | |||||
| 7400 | # -------------------------------------------------- | |||||
| 7401 | sub _detect_dependencies { | |||||
| 7402 | 99 | 90 | my ($self, $code, $param_names) = @_; | |||
| 7403 | ||||||
| 7404 | 99 | 65 | my @relationships; | |||
| 7405 | ||||||
| 7406 | 99 | 86 | foreach my $param1 (@$param_names) { | |||
| 7407 | 141 | 98 | foreach my $param2 (@$param_names) { | |||
| 7408 | 279 | 260 | next if $param1 eq $param2; | |||
| 7409 | ||||||
| 7410 | # Pattern 1: Error message mentions "X requires Y" AND code checks $x && !$y | |||||
| 7411 | # Split into two checks to be more flexible | |||||
| 7412 | 138 | 5749 | if (($code =~ /(?:die|croak|confess)\s+['"]\w*$param1[^'"]*requires[^'"]*$param2/i) && | |||
| 7413 | ($code =~ /if\s+\$param1\s+&&\s+!\$param2/)) { | |||||
| 7414 | ||||||
| 7415 | 0 | 0 | push @relationships, { | |||
| 7416 | type => 'dependency', | |||||
| 7417 | param => $param1, | |||||
| 7418 | requires => $param2, | |||||
| 7419 | description => "$param1 requires $param2 to be specified" | |||||
| 7420 | }; | |||||
| 7421 | ||||||
| 7422 | 0 | 0 | $self->_log(" RELATIONSHIP: $param1 depends on $param2"); | |||
| 7423 | } | |||||
| 7424 | } | |||||
| 7425 | } | |||||
| 7426 | ||||||
| 7427 | 99 | 92 | return \@relationships; | |||
| 7428 | } | |||||
| 7429 | ||||||
| 7430 | # -------------------------------------------------- | |||||
| 7431 | # _detect_value_constraints | |||||
| 7432 | # | |||||
| 7433 | # Purpose: Detect value-based constraints between | |||||
| 7434 | # parameters, such as 'if $ssl then | |||||
| 7435 | # $port must equal 443' or 'if $mode | |||||
| 7436 | # eq secure then $key is required'. | |||||
| 7437 | # | |||||
| 7438 | # Entry: $code - method body source string. | |||||
| 7439 | # $param_names - arrayref of parameter | |||||
| 7440 | # name strings. | |||||
| 7441 | # | |||||
| 7442 | # Exit: Returns an arrayref of relationship | |||||
| 7443 | # hashrefs of type 'value_constraint' | |||||
| 7444 | # or 'value_conditional'. | |||||
| 7445 | # Returns an empty arrayref if none found. | |||||
| 7446 | # | |||||
| 7447 | # Side effects: Logs detections to stdout when | |||||
| 7448 | # verbose is set. | |||||
| 7449 | # -------------------------------------------------- | |||||
| 7450 | sub _detect_value_constraints { | |||||
| 7451 | 100 | 99 | my ($self, $code, $param_names) = @_; | |||
| 7452 | ||||||
| 7453 | 100 | 69 | my @relationships; | |||
| 7454 | ||||||
| 7455 | 100 | 75 | foreach my $param1 (@$param_names) { | |||
| 7456 | 143 | 113 | foreach my $param2 (@$param_names) { | |||
| 7457 | 283 | 246 | next if $param1 eq $param2; | |||
| 7458 | ||||||
| 7459 | # Pattern 1: die if $x && $y != value | |||||
| 7460 | 140 | 4029 | if ($code =~ /(?:die|croak|confess)[^;]*if\s+\$$param1\s+&&\s+\$$param2\s*!=\s*(\d+)/) { | |||
| 7461 | 3 | 3 | my $value = $1; | |||
| 7462 | 3 | 10 | push @relationships, { | |||
| 7463 | type => 'value_constraint', | |||||
| 7464 | if => $param1, | |||||
| 7465 | then => $param2, | |||||
| 7466 | operator => '==', | |||||
| 7467 | value => $value, | |||||
| 7468 | description => "When $param1 is specified, $param2 must equal $value" | |||||
| 7469 | }; | |||||
| 7470 | ||||||
| 7471 | 3 | 5 | $self->_log(" RELATIONSHIP: $param1 requires $param2 == $value"); | |||
| 7472 | } | |||||
| 7473 | ||||||
| 7474 | # Pattern 2: die if $x && $y < value | |||||
| 7475 | 140 | 3929 | if ($code =~ /(?:die|croak|confess)[^;]*if\s+\$$param1\s+&&\s+\$$param2\s*<\s*(\d+)/) { | |||
| 7476 | 0 | 0 | my $value = $1; | |||
| 7477 | 0 | 0 | push @relationships, { | |||
| 7478 | type => 'value_constraint', | |||||
| 7479 | if => $param1, | |||||
| 7480 | then => $param2, | |||||
| 7481 | operator => '>=', | |||||
| 7482 | value => $value, | |||||
| 7483 | description => "When $param1 is specified, $param2 must be >= $value" | |||||
| 7484 | }; | |||||
| 7485 | ||||||
| 7486 | 0 | 0 | $self->_log(" RELATIONSHIP: $param1 requires $param2 >= $value"); | |||
| 7487 | } | |||||
| 7488 | ||||||
| 7489 | # Pattern 3: die if $x eq 'value' && !$y | |||||
| 7490 | 140 | 5035 | if ($code =~ /(?:die|croak|confess)[^;]*if\s+\$$param1\s+eq\s+['"]([^'"]+)['"]\s+&&\s+!\$$param2/) { | |||
| 7491 | 1 | 1 | my $value = $1; | |||
| 7492 | 1 | 3 | push @relationships, { | |||
| 7493 | type => 'value_conditional', | |||||
| 7494 | if => $param1, | |||||
| 7495 | equals => $value, | |||||
| 7496 | then_required => $param2, | |||||
| 7497 | description => "When $param1 equals '$value', $param2 is required" | |||||
| 7498 | }; | |||||
| 7499 | ||||||
| 7500 | 1 | 2 | $self->_log(" RELATIONSHIP: $param1='$value' requires $param2"); | |||
| 7501 | } | |||||
| 7502 | } | |||||
| 7503 | } | |||||
| 7504 | ||||||
| 7505 | 100 | 92 | return \@relationships; | |||
| 7506 | } | |||||
| 7507 | ||||||
| 7508 | # Write a single method schema to a YAML file in output_dir. | |||||
| 7509 | # | |||||
| 7510 | # Entry: $method_name is a non-empty string; $schema is a hashref. | |||||
| 7511 | # Exit: YAML file written to output_dir/$method_name.yml. | |||||
| 7512 | # Side effects: Creates output_dir if it does not exist. | |||||
| 7513 | # Notes: Croaks if output_dir was not set in new(). | |||||
| 7514 | ||||||
| 7515 | sub _write_schema { | |||||
| 7516 | 92 | 98 | my ($self, $method_name, $schema) = @_; | |||
| 7517 | ||||||
| 7518 | # output_dir is required here â croak early with a clear message | |||||
| 7519 | # rather than letting make_path fail with a cryptic error | |||||
| 7520 | 92 | 105 | croak(__PACKAGE__, ': output_dir must be provided to new() when writing schema files') unless defined $self->{output_dir}; | |||
| 7521 | ||||||
| 7522 | 92 | 933 | make_path($self->{output_dir}) unless -d $self->{output_dir}; | |||
| 7523 | ||||||
| 7524 | 92 | 106 | my $filename = "$self->{output_dir}/${method_name}.yml"; | |||
| 7525 | ||||||
| 7526 | # Configure YAML::XS to not quote numeric strings | |||||
| 7527 | 92 | 72 | local $YAML::XS::QuoteNumericStrings = 0; | |||
| 7528 | ||||||
| 7529 | # Extract package name for module field | |||||
| 7530 | 92 | 75 | my $package_name = ''; | |||
| 7531 | 92 | 173 | if ($self->{_document}) { | |||
| 7532 | 92 | 130 | my $package_stmt = $self->{_document}->find_first('PPI::Statement::Package'); | |||
| 7533 | 92 | 10216 | $package_name = $package_stmt ? $package_stmt->namespace : ''; | |||
| 7534 | 92 | 1163 | $self->{_package_name} //= $package_name; | |||
| 7535 | } | |||||
| 7536 | ||||||
| 7537 | # Clean up schema for output - use the format expected by App::Test::Generator::Template | |||||
| 7538 | 92 | 284 | my $output = { | |||
| 7539 | function => $method_name, | |||||
| 7540 | module => $package_name, | |||||
| 7541 | config => { | |||||
| 7542 | close_stdin => 0, | |||||
| 7543 | dedup => 1, | |||||
| 7544 | test_nuls => 0, | |||||
| 7545 | test_undef => 0, | |||||
| 7546 | test_empty => 1, | |||||
| 7547 | test_non_ascii => 0, | |||||
| 7548 | test_security => 0 | |||||
| 7549 | } | |||||
| 7550 | }; | |||||
| 7551 | ||||||
| 7552 | # Process input parameters with advanced type handling | |||||
| 7553 | 92 | 113 | if($schema->{'input'}) { | |||
| 7554 | 90 90 | 60 117 | if(scalar(keys %{$schema->{'input'}})) { | |||
| 7555 | 80 | 77 | $output->{'input'} = {}; | |||
| 7556 | ||||||
| 7557 | 80 80 | 48 115 | foreach my $param_name (keys %{$schema->{'input'}}) { | |||
| 7558 | 129 | 94 | my $param = $schema->{'input'}{$param_name}; | |||
| 7559 | 129 | 150 | if($param->{name}) { | |||
| 7560 | 20 | 16 | my $name = delete $param->{name}; | |||
| 7561 | 20 | 18 | if($name ne $param_name) { | |||
| 7562 | # Sanity check | |||||
| 7563 | 0 | 0 | croak("BUG: Parameter name - expected $param_name, got $name"); | |||
| 7564 | } | |||||
| 7565 | } | |||||
| 7566 | 129 | 143 | my $cleaned_param = $self->_serialize_parameter_for_yaml($param); | |||
| 7567 | 129 | 136 | $output->{'input'}{$param_name} = $cleaned_param; | |||
| 7568 | } | |||||
| 7569 | } else { | |||||
| 7570 | 10 | 10 | delete $output->{input}; | |||
| 7571 | } | |||||
| 7572 | } | |||||
| 7573 | ||||||
| 7574 | # Process output | |||||
| 7575 | 92 92 | 152 130 | if($schema->{'output'} && (scalar(keys %{$schema->{'output'}}))) { | |||
| 7576 | 92 92 | 140 167 | if((ref($schema->{output}{_error_handling}) eq 'HASH') && (scalar(keys %{$schema->{output}{_error_handling}}) == 0)) { | |||
| 7577 | 79 | 74 | delete $schema->{output}{_error_handling}; | |||
| 7578 | } | |||||
| 7579 | 92 | 108 | $output->{'output'} = $schema->{'output'}; | |||
| 7580 | } | |||||
| 7581 | ||||||
| 7582 | 92 | 181 | if($schema->{'output'}{'type'} && ($schema->{'output'}{'type'} eq 'scalar')) { | |||
| 7583 | 0 | 0 | $schema->{'output'}{'type'} = 'string'; | |||
| 7584 | 0 | 0 | $schema->{_confidence}{output}->{level} = 'low'; # A guess | |||
| 7585 | } | |||||
| 7586 | ||||||
| 7587 | # Add 'new' field if object instantiation is needed | |||||
| 7588 | 92 | 84 | if ($schema->{new}) { | |||
| 7589 | # TODO: consider allowing parent class packages up the ISA chain | |||||
| 7590 | 71 | 118 | if(ref($schema->{new}) || ($schema->{new} eq $package_name)) { | |||
| 7591 | 71 | 94 | $output->{new} = $schema->{new} eq $package_name ? undef : $schema->{'new'}; | |||
| 7592 | } else { | |||||
| 7593 | 0 | 0 | $self->_log(" NEW: Don't use $schema->{new} for object insantiation"); | |||
| 7594 | 0 | 0 | delete $schema->{new}; | |||
| 7595 | 0 | 0 | delete $output->{new}; | |||
| 7596 | } | |||||
| 7597 | } | |||||
| 7598 | ||||||
| 7599 | 92 | 112 | if(!defined($schema->{_confidence}{input}->{level})) { | |||
| 7600 | 63 | 80 | $schema->{_confidence}{input} = $self->_calculate_input_confidence($schema->{input}); | |||
| 7601 | } | |||||
| 7602 | 92 | 129 | if(!defined($schema->{_confidence}{output}->{level})) { | |||
| 7603 | 0 | 0 | $schema->{_confidence}{output} = $self->_calculate_output_confidence($schema->{output}); | |||
| 7604 | } | |||||
| 7605 | ||||||
| 7606 | # Add relationships if detected | |||||
| 7607 | 92 7 | 104 7 | if ($schema->{relationships} && @{$schema->{relationships}}) { | |||
| 7608 | 7 | 4 | $output->{relationships} = $schema->{relationships}; | |||
| 7609 | } | |||||
| 7610 | ||||||
| 7611 | 92 5 | 99 8 | if($schema->{accessor} && scalar(keys %{$schema->{accessor}})) { | |||
| 7612 | 5 | 6 | $output->{accessor} = $schema->{accessor}; | |||
| 7613 | } | |||||
| 7614 | ||||||
| 7615 | 92 | 149 | open my $fh, '>', $filename; | |||
| 7616 | 92 | 21989 | print $fh YAML::XS::Dump($output); | |||
| 7617 | 92 | 396 | print $fh $self->_generate_schema_comments($schema, $method_name); | |||
| 7618 | 92 | 136 | close $fh; | |||
| 7619 | ||||||
| 7620 | my $rel_info = $schema->{relationships} ? | |||||
| 7621 | 92 7 | 8888 10 | ' [' . scalar(@{$schema->{relationships}}) . ' relationships]' : ''; | |||
| 7622 | $self->_log(" Wrote: $filename (input confidence: $schema->{_confidence}{input}->{level})" . | |||||
| 7623 | 92 | 274 | ($schema->{new} ? " [requires: $schema->{new}]" : '') . $rel_info); | |||
| 7624 | } | |||||
| 7625 | ||||||
| 7626 | # -------------------------------------------------- | |||||
| 7627 | # _generate_schema_comments | |||||
| 7628 | # | |||||
| 7629 | # Purpose: Generate the YAML comment block | |||||
| 7630 | # appended to the end of each written | |||||
| 7631 | # schema file, containing provenance, | |||||
| 7632 | # confidence levels, parameter type | |||||
| 7633 | # notes, relationship summaries, and | |||||
| 7634 | # warnings about types requiring | |||||
| 7635 | # special test setup. | |||||
| 7636 | # | |||||
| 7637 | # Entry: $schema - the schema hashref as | |||||
| 7638 | # built by _analyze_method. | |||||
| 7639 | # $method_name - the method name string, | |||||
| 7640 | # used in the fuzz | |||||
| 7641 | # command hint. | |||||
| 7642 | # | |||||
| 7643 | # Exit: Returns a string of YAML comment lines | |||||
| 7644 | # beginning with a blank line and ending | |||||
| 7645 | # with a trailing newline. | |||||
| 7646 | # | |||||
| 7647 | # Side effects: None. | |||||
| 7648 | # -------------------------------------------------- | |||||
| 7649 | sub _generate_schema_comments { | |||||
| 7650 | 97 | 141 | my ($self, $schema, $method_name) = @_; | |||
| 7651 | ||||||
| 7652 | 97 | 69 | my @comments; | |||
| 7653 | ||||||
| 7654 | 97 | 99 | push @comments, ''; | |||
| 7655 | 97 | 120 | push @comments, '# Generated by ' . ref($self); | |||
| 7656 | 97 | 150 | push @comments, "# Run: fuzz-harness-generator -r $self->{output_dir}/${method_name}.yml"; | |||
| 7657 | 97 | 66 | push @comments, '#'; | |||
| 7658 | 97 | 124 | push @comments, "# Input confidence: $schema->{_confidence}{input}->{level}"; | |||
| 7659 | 97 | 99 | push @comments, "# Output confidence: $schema->{_confidence}{output}->{level}"; | |||
| 7660 | ||||||
| 7661 | # Add notes about parameters | |||||
| 7662 | 97 | 125 | if ($schema->{input}) { | |||
| 7663 | 90 | 60 | my @param_notes; | |||
| 7664 | 90 90 | 63 160 | foreach my $param_name (sort keys %{$schema->{input}}) { | |||
| 7665 | 129 | 103 | my $p = $schema->{input}{$param_name}; | |||
| 7666 | ||||||
| 7667 | 129 | 120 | if ($p->{semantic}) { | |||
| 7668 | 16 | 18 | push @param_notes, "$param_name: $p->{semantic}"; | |||
| 7669 | } | |||||
| 7670 | ||||||
| 7671 | 129 | 115 | if ($p->{enum}) { | |||
| 7672 | 4 4 | 4 5 | push @param_notes, "$param_name: enum with " . scalar(@{$p->{enum}}) . " values"; | |||
| 7673 | } | |||||
| 7674 | ||||||
| 7675 | 129 | 154 | if ($p->{isa}) { | |||
| 7676 | 6 | 9 | push @param_notes, "$param_name: requires $p->{isa} object"; | |||
| 7677 | } | |||||
| 7678 | } | |||||
| 7679 | ||||||
| 7680 | 90 | 100 | if (@param_notes) { | |||
| 7681 | 18 | 17 | push @comments, '#'; | |||
| 7682 | 18 | 11 | push @comments, '# Parameter types detected:'; | |||
| 7683 | 18 | 14 | foreach my $note (@param_notes) { | |||
| 7684 | 26 | 29 | push @comments, "# - $note"; | |||
| 7685 | } | |||||
| 7686 | } | |||||
| 7687 | } | |||||
| 7688 | ||||||
| 7689 | # Add relationship notes | |||||
| 7690 | 97 8 | 120 17 | if ($schema->{relationships} && @{$schema->{relationships}}) { | |||
| 7691 | 8 | 8 | push @comments, ( | |||
| 7692 | '#', | |||||
| 7693 | '# Parameter relationships detected:' | |||||
| 7694 | ); | |||||
| 7695 | 8 8 | 8 10 | foreach my $rel (@{$schema->{relationships}}) { | |||
| 7696 | 14 | 15 | my $desc = $rel->{description} || _format_relationship($rel); | |||
| 7697 | 14 | 15 | push @comments, "# - $desc"; | |||
| 7698 | } | |||||
| 7699 | } | |||||
| 7700 | ||||||
| 7701 | # Add general notes | |||||
| 7702 | 97 97 | 121 146 | if ($schema->{_notes} && scalar(@{$schema->{_notes}})) { | |||
| 7703 | 30 | 23 | push @comments, '#'; | |||
| 7704 | 30 | 26 | push @comments, '# Notes:'; | |||
| 7705 | 30 30 | 15 31 | foreach my $note (@{$schema->{_notes}}) { | |||
| 7706 | 45 | 49 | push @comments, "# - $note"; | |||
| 7707 | } | |||||
| 7708 | } | |||||
| 7709 | ||||||
| 7710 | 97 | 104 | if($schema->{_analysis}) { | |||
| 7711 | 92 | 88 | push @comments, ( | |||
| 7712 | '#', | |||||
| 7713 | '# Analysis:', | |||||
| 7714 | '# TODO:', | |||||
| 7715 | ); | |||||
| 7716 | # confidence_factors: | |||||
| 7717 | # input: | |||||
| 7718 | # - No parameters found | |||||
| 7719 | # output: | |||||
| 7720 | # - 'Return type defined: object (+30)' | |||||
| 7721 | # - 'Total output confidence score: 30' | |||||
| 7722 | # - 'Medium confidence: return type defined' | |||||
| 7723 | # input_confidence: none | |||||
| 7724 | # output_confidence: medium | |||||
| 7725 | # overall_confidence: none | |||||
| 7726 | } | |||||
| 7727 | ||||||
| 7728 | # Add warnings for complex types | |||||
| 7729 | 97 | 65 | my @warnings; | |||
| 7730 | 97 | 94 | if ($schema->{input}) { | |||
| 7731 | 90 90 | 55 90 | foreach my $param_name (keys %{$schema->{input}}) { | |||
| 7732 | 129 | 112 | my $p = $schema->{input}{$param_name}; | |||
| 7733 | ||||||
| 7734 | 129 | 201 | if ($p->{type} && $p->{type} eq 'coderef') { | |||
| 7735 | 3 | 3 | push @warnings, "Parameter '$param_name' is a coderef - you'll need to provide a sub {} in tests"; | |||
| 7736 | } | |||||
| 7737 | ||||||
| 7738 | 129 | 150 | if ($p->{semantic} && $p->{semantic} eq 'filehandle') { | |||
| 7739 | 2 | 2 | push @warnings, "Parameter '$param_name' is a filehandle - consider using IO::String or mock"; | |||
| 7740 | } | |||||
| 7741 | ||||||
| 7742 | 129 | 181 | if ($p->{isa} && $p->{isa} =~ /DateTime/) { | |||
| 7743 | 2 | 3 | push @warnings, "Parameter '$param_name' requires DateTime - ensure DateTime is loaded"; | |||
| 7744 | } | |||||
| 7745 | } | |||||
| 7746 | } | |||||
| 7747 | ||||||
| 7748 | 97 | 96 | if (@warnings) { | |||
| 7749 | 7 | 6 | push @comments, '#'; | |||
| 7750 | 7 | 5 | push @comments, '# WARNINGS - Manual test setup may be required:'; | |||
| 7751 | 7 | 6 | foreach my $warning (@warnings) { | |||
| 7752 | 7 | 6 | push @comments, "# ! $warning"; | |||
| 7753 | } | |||||
| 7754 | } | |||||
| 7755 | ||||||
| 7756 | 97 | 75 | push @comments, ''; | |||
| 7757 | ||||||
| 7758 | 97 | 248 | return join("\n", @comments); | |||
| 7759 | } | |||||
| 7760 | ||||||
| 7761 | # -------------------------------------------------- | |||||
| 7762 | # _serialize_parameter_for_yaml | |||||
| 7763 | # | |||||
| 7764 | # Purpose: Convert a parameter spec hashref into | |||||
| 7765 | # a cleaned, YAML-serialisable form | |||||
| 7766 | # suitable for App::Test::Generator | |||||
| 7767 | # consumption, handling semantic type | |||||
| 7768 | # mappings, enum values, and object | |||||
| 7769 | # class annotations. | |||||
| 7770 | # | |||||
| 7771 | # Entry: $param - parameter spec hashref as | |||||
| 7772 | # produced by the merge and | |||||
| 7773 | # analysis pipeline. | |||||
| 7774 | # | |||||
| 7775 | # Exit: Returns a new hashref containing only | |||||
| 7776 | # the fields App::Test::Generator | |||||
| 7777 | # understands, with internal _ keys | |||||
| 7778 | # and semantic keys removed or converted. | |||||
| 7779 | # | |||||
| 7780 | # Side effects: None. | |||||
| 7781 | # | |||||
| 7782 | # Notes: Semantic types are mapped to | |||||
| 7783 | # appropriate base types with additional | |||||
| 7784 | # constraint and note fields. | |||||
| 7785 | # The original $param hashref is not | |||||
| 7786 | # modified. | |||||
| 7787 | # -------------------------------------------------- | |||||
| 7788 | sub _serialize_parameter_for_yaml { | |||||
| 7789 | 144 | 3086 | my ($self, $param) = @_; | |||
| 7790 | ||||||
| 7791 | 144 | 99 | my %cleaned; | |||
| 7792 | ||||||
| 7793 | # Copy basic fields that App::Test::Generator expects | |||||
| 7794 | 144 | 113 | foreach my $field (qw(type position optional min max matches default)) { | |||
| 7795 | 1008 | 933 | $cleaned{$field} = $param->{$field} if defined $param->{$field}; | |||
| 7796 | } | |||||
| 7797 | ||||||
| 7798 | # Handle advanced type mappings | |||||
| 7799 | 144 | 142 | if(my $semantic = $param->{semantic}) { | |||
| 7800 | 23 | 76 | if ($semantic eq 'datetime_object') { | |||
| 7801 | # DateTime objects: test generator needs to know how to create them | |||||
| 7802 | 2 | 3 | $cleaned{type} = 'object'; | |||
| 7803 | 2 | 3 | $cleaned{isa} = $param->{isa} || 'DateTime'; | |||
| 7804 | 2 | 2 | $cleaned{_note} = 'Requires DateTime object'; | |||
| 7805 | } elsif ($semantic eq 'timepiece_object') { | |||||
| 7806 | 0 | 0 | $cleaned{type} = 'object'; | |||
| 7807 | 0 | 0 | $cleaned{isa} = $param->{isa} || 'Time::Piece'; | |||
| 7808 | 0 | 0 | $cleaned{_note} = 'Requires Time::Piece object'; | |||
| 7809 | } elsif ($semantic eq 'date_string') { | |||||
| 7810 | # Date strings: provide regex pattern | |||||
| 7811 | 1 | 1 | $cleaned{type} = 'string'; | |||
| 7812 | 1 | 3 | $cleaned{matches} ||= '/^\d{4}-\d{2}-\d{2}$/'; | |||
| 7813 | 1 | 2 | $cleaned{_example} = '2024-12-12'; | |||
| 7814 | } elsif ($semantic eq 'iso8601_string') { | |||||
| 7815 | 1 | 1 | $cleaned{type} = 'string'; | |||
| 7816 | 1 | 2 | $cleaned{matches} ||= '/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z?$/'; | |||
| 7817 | 1 | 1 | $cleaned{_example} = '2024-12-12T10:30:00Z'; | |||
| 7818 | } elsif ($semantic eq 'unix_timestamp') { | |||||
| 7819 | 3 | 4 | $cleaned{type} = 'integer'; | |||
| 7820 | 3 | 9 | $cleaned{min} ||= 0; | |||
| 7821 | 3 | 7 | $cleaned{max} ||= $INT32_MAX; # 32-bit max | |||
| 7822 | 3 | 8 | $cleaned{_note} = 'UNIX timestamp'; | |||
| 7823 | } elsif ($semantic eq 'datetime_parseable') { | |||||
| 7824 | 0 | 0 | $cleaned{type} = 'string'; | |||
| 7825 | 0 | 0 | $cleaned{_note} = 'Must be parseable as datetime'; | |||
| 7826 | } elsif ($semantic eq 'filehandle') { | |||||
| 7827 | # File handles: special handling needed | |||||
| 7828 | 2 | 4 | $cleaned{type} = 'object'; | |||
| 7829 | 2 | 3 | $cleaned{isa} = $param->{isa} || 'IO::Handle'; | |||
| 7830 | 2 | 2 | $cleaned{_note} = 'File handle - may need mock in tests'; | |||
| 7831 | } elsif ($semantic eq 'filepath') { | |||||
| 7832 | # File paths: string with path pattern | |||||
| 7833 | 2 | 2 | $cleaned{type} = 'string'; | |||
| 7834 | 2 | 6 | $cleaned{matches} ||= '/^[\\w\\/.\\-_]+$/'; | |||
| 7835 | 2 | 2 | $cleaned{_note} = 'File path'; | |||
| 7836 | } elsif ($semantic eq 'callback') { | |||||
| 7837 | # Coderefs: mark as special type | |||||
| 7838 | 5 | 5 | $cleaned{type} = 'coderef'; | |||
| 7839 | 5 | 8 | $cleaned{_note} = 'CODE reference - provide sub { } in tests'; | |||
| 7840 | } elsif ($semantic eq 'enum') { | |||||
| 7841 | # Enum: keep as string but add valid values | |||||
| 7842 | 4 | 5 | $cleaned{type} = 'string'; | |||
| 7843 | 4 | 55 | if ($param->{enum} && ref($param->{enum}) eq 'ARRAY') { | |||
| 7844 | 4 | 3 | $cleaned{enum} = $param->{enum}; | |||
| 7845 | 4 4 | 4 8 | $cleaned{_note} = 'Must be one of: ' . join(', ', @{$param->{enum}}); | |||
| 7846 | } | |||||
| 7847 | } | |||||
| 7848 | } | |||||
| 7849 | ||||||
| 7850 | # Handle memberof even if not marked with semantic | |||||
| 7851 | 144 | 166 | if($param->{enum} && ref($param->{enum}) eq 'ARRAY') { | |||
| 7852 | 6 | 6 | $cleaned{memberof} = $param->{enum}; | |||
| 7853 | } | |||||
| 7854 | 144 | 144 | if($param->{memberof} && ref($param->{memberof}) eq 'ARRAY') { | |||
| 7855 | 0 | 0 | $cleaned{memberof} = $param->{memberof}; | |||
| 7856 | } | |||||
| 7857 | ||||||
| 7858 | # Handle object class | |||||
| 7859 | 144 | 154 | if ($param->{isa} && !$cleaned{isa}) { | |||
| 7860 | 4 | 5 | $cleaned{isa} = $param->{isa}; | |||
| 7861 | } | |||||
| 7862 | ||||||
| 7863 | # Add format hints where available | |||||
| 7864 | 144 | 126 | if ($param->{format}) { | |||
| 7865 | 1 | 2 | $cleaned{_format} = $param->{format}; | |||
| 7866 | } | |||||
| 7867 | ||||||
| 7868 | # Remove internal fields | |||||
| 7869 | 144 | 103 | delete $cleaned{_source}; | |||
| 7870 | 144 | 99 | delete $cleaned{semantic}; | |||
| 7871 | ||||||
| 7872 | 144 | 136 | return \%cleaned; | |||
| 7873 | } | |||||
| 7874 | ||||||
| 7875 | # -------------------------------------------------- | |||||
| 7876 | # _format_relationship | |||||
| 7877 | # | |||||
| 7878 | # Purpose: Format a relationship hashref as a | |||||
| 7879 | # short human-readable description | |||||
| 7880 | # string for use in YAML comments. | |||||
| 7881 | # | |||||
| 7882 | # Entry: $rel - relationship hashref as | |||||
| 7883 | # produced by the relationship | |||||
| 7884 | # detection methods. | |||||
| 7885 | # | |||||
| 7886 | # Exit: Returns a description string. | |||||
| 7887 | # Returns 'Unknown relationship' for | |||||
| 7888 | # unrecognised types. | |||||
| 7889 | # | |||||
| 7890 | # Side effects: None. | |||||
| 7891 | # -------------------------------------------------- | |||||
| 7892 | sub _format_relationship { | |||||
| 7893 | 14 | 6760 | my $rel = $_[0]; | |||
| 7894 | ||||||
| 7895 | 14 | 32 | if ($rel->{type} eq 'mutually_exclusive') { | |||
| 7896 | 3 3 | 4 8 | return 'Mutually exclusive: ' . join(', ', @{$rel->{params}}); | |||
| 7897 | } elsif ($rel->{type} eq 'required_group') { | |||||
| 7898 | 2 2 | 2 6 | return "Required group (OR): " . join(', ', @{$rel->{params}}); | |||
| 7899 | } elsif ($rel->{type} eq 'conditional_requirement') { | |||||
| 7900 | 2 | 5 | return "If $rel->{if} then $rel->{then_required} required"; | |||
| 7901 | } elsif ($rel->{type} eq 'dependency') { | |||||
| 7902 | 3 | 7 | return "$rel->{param} depends on $rel->{requires}"; | |||
| 7903 | } elsif ($rel->{type} eq 'value_constraint') { | |||||
| 7904 | 2 | 4 | return "If $rel->{if} then $rel->{then} $rel->{operator} $rel->{value}"; | |||
| 7905 | } elsif ($rel->{type} eq 'value_conditional') { | |||||
| 7906 | 1 | 4 | return "If $rel->{if}='$rel->{equals}' then $rel->{then_required} required"; | |||
| 7907 | } | |||||
| 7908 | 1 | 2 | return 'Unknown relationship'; | |||
| 7909 | } | |||||
| 7910 | ||||||
| 7911 | # -------------------------------------------------- | |||||
| 7912 | # _needs_object_instantiation | |||||
| 7913 | # | |||||
| 7914 | # Purpose: Determine whether a method requires | |||||
| 7915 | # an object to be instantiated before | |||||
| 7916 | # it can be called, and if so return | |||||
| 7917 | # the package name to instantiate. | |||||
| 7918 | # | |||||
| 7919 | # Entry: $method_name - name of the method. | |||||
| 7920 | # $method_body - method source string. | |||||
| 7921 | # $method_info - method hashref from | |||||
| 7922 | # _find_methods (optional, | |||||
| 7923 | # for backward compat). | |||||
| 7924 | # | |||||
| 7925 | # Exit: Returns the package name string if | |||||
| 7926 | # object instantiation is required. | |||||
| 7927 | # Returns undef if the method is a | |||||
| 7928 | # constructor, factory, singleton, or | |||||
| 7929 | # pure class method. | |||||
| 7930 | # | |||||
| 7931 | # Side effects: Logs analysis decisions to stdout | |||||
| 7932 | # when verbose is set. | |||||
| 7933 | # | |||||
| 7934 | # Notes: Orchestrates five detection sub-steps: | |||||
| 7935 | # factory detection, singleton detection, | |||||
| 7936 | # instance method detection, inheritance | |||||
| 7937 | # check, and constructor requirements. | |||||
| 7938 | # Instance method detection overrides | |||||
| 7939 | # factory detection when both fire. | |||||
| 7940 | # -------------------------------------------------- | |||||
| 7941 | sub _needs_object_instantiation { | |||||
| 7942 | 271 | 1317 | my ($self, $method_name, $method_body, $method_info) = @_; | |||
| 7943 | ||||||
| 7944 | # Allow method_info to be optional for backward compatibility | |||||
| 7945 | 271 | 285 | $method_info ||= {}; | |||
| 7946 | ||||||
| 7947 | 271 | 254 | my $doc = $self->{_document}; | |||
| 7948 | 271 | 478 | return undef unless $doc; | |||
| 7949 | ||||||
| 7950 | # Get the current package name | |||||
| 7951 | 271 | 427 | my $package_stmt = $doc->find_first('PPI::Statement::Package'); | |||
| 7952 | 271 | 28793 | my $current_package = $package_stmt ? $package_stmt->namespace : 'UNKNOWN'; | |||
| 7953 | 271 | 3980 | $self->{_package_name} //= $current_package; | |||
| 7954 | ||||||
| 7955 | # Initialize result structure | |||||
| 7956 | 271 | 710 | my $result = { | |||
| 7957 | package => $current_package, | |||||
| 7958 | needs_object => 0, | |||||
| 7959 | type => 'unknown', | |||||
| 7960 | details => {}, | |||||
| 7961 | constructor_params => undef, | |||||
| 7962 | }; | |||||
| 7963 | ||||||
| 7964 | # Track whether we should explicitly skip object instantiation | |||||
| 7965 | 271 | 234 | my $skip_object = 0; | |||
| 7966 | ||||||
| 7967 | # Skip constructors and destructors | |||||
| 7968 | 271 | 340 | if ($method_name eq 'new') { | |||
| 7969 | 12 | 23 | $self->_log(" OBJECT: Constructor '$method_name' detected; skipping instantiation analysis"); | |||
| 7970 | 12 | 23 | return undef; | |||
| 7971 | } | |||||
| 7972 | 259 | 501 | if($method_name =~ /^(create|build|construct|init|DESTROY)$/i) { | |||
| 7973 | 0 | 0 | $skip_object = 1; | |||
| 7974 | } | |||||
| 7975 | ||||||
| 7976 | # 1. Check for factory methods that return instances | |||||
| 7977 | 259 | 368 | my $is_factory = $self->_detect_factory_method( | |||
| 7978 | $method_name, $method_body, $current_package, $method_info | |||||
| 7979 | ); | |||||
| 7980 | ||||||
| 7981 | # 2. Check for singleton patterns | |||||
| 7982 | 259 | 400 | my $is_singleton = $self->_detect_singleton_pattern($method_name, $method_body); | |||
| 7983 | 259 | 709 | if ($is_singleton) { | |||
| 7984 | 1 | 1 | $result->{needs_object} = 0; # Singleton methods return the singleton instance | |||
| 7985 | 1 | 1 | $result->{type} = 'singleton_accessor'; | |||
| 7986 | 1 | 1 | $result->{details} = $is_singleton; | |||
| 7987 | 1 | 2 | $self->_log(" OBJECT: Detected singleton accessor '$method_name'"); | |||
| 7988 | # Singleton accessors typically don't need object creation in tests | |||||
| 7989 | # as they're called on the class, not instance | |||||
| 7990 | 1 | 0 | $skip_object = 1; | |||
| 7991 | } | |||||
| 7992 | ||||||
| 7993 | # 3. Check if this is an instance method that needs an object | |||||
| 7994 | 259 | 345 | my $is_instance_method = $self->_detect_instance_method($method_name, $method_body); | |||
| 7995 | 259 | 361 | if ($is_instance_method && | |||
| 7996 | ($is_instance_method->{explicit_self} || | |||||
| 7997 | $is_instance_method->{shift_self} || | |||||
| 7998 | $is_instance_method->{accesses_object_data})) { | |||||
| 7999 | ||||||
| 8000 | # Instance-only methods override factory detection | |||||
| 8001 | 119 | 104 | if ($is_factory) { | |||
| 8002 | 3 | 6 | $self->_log( | |||
| 8003 | " OBJECT: Instance-only method '$method_name' overrides factory detection" | |||||
| 8004 | ); | |||||
| 8005 | } | |||||
| 8006 | ||||||
| 8007 | 119 | 97 | $result->{needs_object} = 1; | |||
| 8008 | 119 | 102 | $result->{type} = 'instance_method'; | |||
| 8009 | 119 | 115 | $result->{details} = $is_instance_method; | |||
| 8010 | ||||||
| 8011 | # 4. Check for inheritance - if parent class constructor should be used | |||||
| 8012 | 119 | 155 | my $inheritance_info = $self->_check_inheritance_for_constructor( | |||
| 8013 | $current_package, $method_body | |||||
| 8014 | ); | |||||
| 8015 | 119 | 156 | if ($inheritance_info && $inheritance_info->{use_parent_constructor}) { | |||
| 8016 | 0 | 0 | $result->{package} = $inheritance_info->{parent_class}; | |||
| 8017 | 0 | 0 | $result->{details}{inheritance} = $inheritance_info; | |||
| 8018 | 0 | 0 | $self->_log( | |||
| 8019 | " OBJECT: Method '$method_name' uses parent class constructor: $inheritance_info->{parent_class}" | |||||
| 8020 | ); | |||||
| 8021 | } | |||||
| 8022 | ||||||
| 8023 | # 5. Check if constructor needs specific parameters | |||||
| 8024 | my $constructor_needs = $self->_detect_constructor_requirements( | |||||
| 8025 | $current_package, $result->{package} | |||||
| 8026 | 119 | 181 | ); | |||
| 8027 | 119 | 125 | if ($constructor_needs) { | |||
| 8028 | 1 | 1 | $result->{constructor_params} = $constructor_needs; | |||
| 8029 | 1 | 1 | $result->{details}{constructor_requirements} = $constructor_needs; | |||
| 8030 | 1 | 2 | $self->_log( | |||
| 8031 | " OBJECT: Constructor for $result->{package} requires parameters" | |||||
| 8032 | ); | |||||
| 8033 | } | |||||
| 8034 | ||||||
| 8035 | # Return the package name (or parent package) that needs instantiation | |||||
| 8036 | 119 | 371 | return $result->{package}; | |||
| 8037 | } | |||||
| 8038 | ||||||
| 8039 | # 6. Check for class methods that might need objects from other classes | |||||
| 8040 | 140 | 237 | my $needs_other_object = $self->_detect_external_object_dependency($method_body); | |||
| 8041 | 140 | 145 | if ($needs_other_object) { | |||
| 8042 | 0 | 0 | $result->{needs_object} = 1; | |||
| 8043 | 0 | 0 | $result->{type} = 'external_dependency'; | |||
| 8044 | $result->{package} = $needs_other_object->{package} | |||||
| 8045 | 0 | 0 | if $needs_other_object->{package}; | |||
| 8046 | 0 | 0 | $result->{details} = $needs_other_object; | |||
| 8047 | ||||||
| 8048 | 0 | 0 | $self->_log( | |||
| 8049 | " OBJECT: Method '$method_name' depends on external object: $needs_other_object->{package}" | |||||
| 8050 | ); | |||||
| 8051 | 0 | 0 | return $result->{package} if $result->{package}; | |||
| 8052 | } | |||||
| 8053 | ||||||
| 8054 | # Factory method only if NOT instance-based | |||||
| 8055 | 140 | 183 | if ($is_factory && !$skip_object) { | |||
| 8056 | 1 | 2 | $result->{needs_object} = 0; | |||
| 8057 | 1 | 0 | $result->{type} = 'factory'; | |||
| 8058 | 1 | 2 | $result->{details} = $is_factory; | |||
| 8059 | $self->_log( | |||||
| 8060 | " OBJECT: Detected factory method '$method_name' returns $is_factory->{returns_class} objects" | |||||
| 8061 | 1 | 5 | ) if $is_factory->{returns_class}; | |||
| 8062 | } | |||||
| 8063 | ||||||
| 8064 | 140 | 281 | return undef; | |||
| 8065 | } | |||||
| 8066 | ||||||
| 8067 | # -------------------------------------------------- | |||||
| 8068 | # _detect_factory_method | |||||
| 8069 | # | |||||
| 8070 | # Purpose: Detect whether a method is a factory | |||||
| 8071 | # that creates and returns object | |||||
| 8072 | # instances rather than operating on | |||||
| 8073 | # an existing instance. | |||||
| 8074 | # | |||||
| 8075 | # Entry: $method_name - method name string. | |||||
| 8076 | # $method_body - method source string. | |||||
| 8077 | # $current_package - current package name. | |||||
| 8078 | # $method_info - method hashref | |||||
| 8079 | # (optional). | |||||
| 8080 | # | |||||
| 8081 | # Exit: Returns a factory_info hashref on | |||||
| 8082 | # detection, or undef if the method | |||||
| 8083 | # is not a factory. | |||||
| 8084 | # The hashref includes: returns_class, | |||||
| 8085 | # confidence, and one of: | |||||
| 8086 | # returns_blessed, returns_new, | |||||
| 8087 | # returns_factory_result, pod_hint. | |||||
| 8088 | # | |||||
| 8089 | # Side effects: None. | |||||
| 8090 | # -------------------------------------------------- | |||||
| 8091 | sub _detect_factory_method { | |||||
| 8092 | 265 | 12064 | my ($self, $method_name, $method_body, $current_package, $method_info) = @_; | |||
| 8093 | ||||||
| 8094 | 265 | 213 | my %factory_info; | |||
| 8095 | ||||||
| 8096 | # Check method name patterns | |||||
| 8097 | 265 | 388 | if ($method_name =~ /^(create_|make_|build_|get_)/i) { | |||
| 8098 | 9 | 13 | $factory_info{name_pattern} = 1; | |||
| 8099 | } | |||||
| 8100 | ||||||
| 8101 | # Look for object creation patterns in the method body | |||||
| 8102 | 265 | 265 | if ($method_body) { | |||
| 8103 | # Pattern 1: Returns a blessed reference | |||||
| 8104 | 265 | 752 | if ($method_body =~ /return\s+bless\s*\{[^}]*\},\s*['"]?(\w+(?:::\w+)*|\$\w+)['"]?/s || | |||
| 8105 | $method_body =~ /bless\s*\{[^}]*\},\s*['"]?(\w+(?:::\w+)*|\$\w+)['"]?.*return/s) { | |||||
| 8106 | 5 | 6 | my $class_name = $1; | |||
| 8107 | ||||||
| 8108 | # Handle variable class names | |||||
| 8109 | 5 | 11 | if ($class_name =~ /^\$(class|self|package)$/) { | |||
| 8110 | 2 | 2 | $factory_info{returns_class} = $current_package; | |||
| 8111 | } elsif ($class_name =~ /^\$/) { | |||||
| 8112 | 0 | 0 | $factory_info{returns_class} = 'VARIABLE'; # Unknown variable | |||
| 8113 | } else { | |||||
| 8114 | 3 | 4 | $factory_info{returns_class} = $class_name; | |||
| 8115 | } | |||||
| 8116 | ||||||
| 8117 | 5 | 4 | $factory_info{returns_blessed} = 1; | |||
| 8118 | 5 | 6 | $factory_info{confidence} = 'high'; | |||
| 8119 | 5 | 6 | return \%factory_info; | |||
| 8120 | } | |||||
| 8121 | ||||||
| 8122 | # Pattern 2: Returns ->new() call on class or $self | |||||
| 8123 | 260 | 990 | if ($method_body =~ /return\s+([\$\w:]+)->new\(/s || | |||
| 8124 | $method_body =~ /([\$\w:]+)->new\(.*return/s) { | |||||
| 8125 | 5 | 6 | my $target = $1; | |||
| 8126 | ||||||
| 8127 | # Determine what class is being instantiated | |||||
| 8128 | 5 | 21 | if ($target eq '$self' || $target eq 'shift' || $target =~ /^\$/) { | |||
| 8129 | 0 | 0 | $factory_info{returns_class} = $current_package; | |||
| 8130 | 0 | 0 | $factory_info{self_new} = 1; | |||
| 8131 | } elsif ($target =~ /::/) { | |||||
| 8132 | 1 | 2 | $factory_info{returns_class} = $target; | |||
| 8133 | 1 | 2 | $factory_info{external_class} = 1; | |||
| 8134 | } else { | |||||
| 8135 | 4 | 4 | $factory_info{returns_class} = $target; | |||
| 8136 | } | |||||
| 8137 | ||||||
| 8138 | 5 | 6 | $factory_info{returns_new} = 1; | |||
| 8139 | 5 | 3 | $factory_info{confidence} = 'medium'; | |||
| 8140 | 5 | 9 | return \%factory_info; | |||
| 8141 | } | |||||
| 8142 | ||||||
| 8143 | # Pattern 3: Returns an object from another factory method | |||||
| 8144 | 255 | 821 | if ($method_body =~ /return\s+([\$\w:]+)->(create_|make_|build_|get_)/i || | |||
| 8145 | $method_body =~ /([\$\w:]+)->(create_|make_|build_|get_).*return/si) { | |||||
| 8146 | 0 | 0 | $factory_info{returns_factory_result} = 1; | |||
| 8147 | 0 | 0 | $factory_info{confidence} = 'low'; | |||
| 8148 | 0 | 0 | return \%factory_info; | |||
| 8149 | } | |||||
| 8150 | } | |||||
| 8151 | ||||||
| 8152 | # Check for return type hints in POD if available | |||||
| 8153 | 255 | 701 | if ($method_info && ref($method_info) eq 'HASH' && $method_info->{pod}) { | |||
| 8154 | 82 | 75 | my $pod = $method_info->{pod}; | |||
| 8155 | 82 | 173 | if ($pod =~ /returns?\s+(?:an?\s+)?(object|instance|new\s+\w+)/i) { | |||
| 8156 | 0 | 0 | $factory_info{pod_hint} = 1; | |||
| 8157 | 0 | 0 | $factory_info{confidence} = 'low'; | |||
| 8158 | 0 | 0 | return \%factory_info; | |||
| 8159 | } | |||||
| 8160 | } | |||||
| 8161 | ||||||
| 8162 | 255 | 326 | return undef; | |||
| 8163 | } | |||||
| 8164 | ||||||
| 8165 | # -------------------------------------------------- | |||||
| 8166 | # _detect_singleton_pattern | |||||
| 8167 | # | |||||
| 8168 | # Purpose: Detect singleton accessor methods | |||||
| 8169 | # that return a shared instance rather | |||||
| 8170 | # than creating a new object, by | |||||
| 8171 | # checking the method name and body | |||||
| 8172 | # for singleton patterns. | |||||
| 8173 | # | |||||
| 8174 | # Entry: $method_name - method name string. | |||||
| 8175 | # $method_body - method source string. | |||||
| 8176 | # | |||||
| 8177 | # Exit: Returns a singleton_info hashref on | |||||
| 8178 | # detection (always contains at least | |||||
| 8179 | # name_pattern => 1), or undef if the | |||||
| 8180 | # method name does not match the | |||||
| 8181 | # singleton accessor pattern. | |||||
| 8182 | # | |||||
| 8183 | # Side effects: None. | |||||
| 8184 | # | |||||
| 8185 | # Notes: Only fires for methods named | |||||
| 8186 | # instance, get_instance, singleton, | |||||
| 8187 | # or shared_instance. Methods not | |||||
| 8188 | # matching these names always return | |||||
| 8189 | # undef regardless of body content. | |||||
| 8190 | # -------------------------------------------------- | |||||
| 8191 | sub _detect_singleton_pattern { | |||||
| 8192 | 268 | 907 | my ($self, $method_name, $method_body) = @_; | |||
| 8193 | ||||||
| 8194 | # Check method name patterns | |||||
| 8195 | 268 | 463 | return undef unless $method_name =~ /^(instance|get_instance|singleton|shared_instance)$/i; | |||
| 8196 | ||||||
| 8197 | 7 | 23 | my %singleton_info = ( | |||
| 8198 | name_pattern => 1, | |||||
| 8199 | ); | |||||
| 8200 | ||||||
| 8201 | # Look for singleton patterns in code | |||||
| 8202 | 7 | 29 | if ($method_body) { | |||
| 8203 | # Pattern 1: Static/state variable holding instance | |||||
| 8204 | 7 | 21 | if ($method_body =~ /(?:my\s+)?(?:our\s+)?\$(?:instance|_instance|singleton)\b/s || | |||
| 8205 | $method_body =~ /state\s+\$(?:instance|_instance|singleton)\b/s) { | |||||
| 8206 | 5 | 5 | $singleton_info{static_variable} = 1; | |||
| 8207 | 5 | 6 | $singleton_info{confidence} = 'high'; | |||
| 8208 | } | |||||
| 8209 | ||||||
| 8210 | # Pattern 2: Returns $instance if defined (with better regex) | |||||
| 8211 | 7 | 30 | if ($method_body =~ /return\s+\$instance\s+if\s+(?:defined\s+)?\$instance/ || | |||
| 8212 | $method_body =~ /unless\s+\$instance.*?=\s*.*?new/) { | |||||
| 8213 | 1 | 1 | $singleton_info{returns_instance} = 1; | |||
| 8214 | 1 | 1 | $singleton_info{confidence} = 'high'; | |||
| 8215 | } | |||||
| 8216 | ||||||
| 8217 | # Pattern 3: ||= new() pattern (with better regex) | |||||
| 8218 | 7 | 20 | if ($method_body =~ /\$instance\s*\|\|=\s*.*?new/ || | |||
| 8219 | $method_body =~ /\$instance\s*=\s*.*?new\s+unless\s+(?:defined\s+)?\$instance/) { | |||||
| 8220 | 3 | 3 | $singleton_info{lazy_initialization} = 1; | |||
| 8221 | 3 | 2 | $singleton_info{confidence} = 'medium'; | |||
| 8222 | } | |||||
| 8223 | ||||||
| 8224 | # Pattern 4: Direct return of $instance variable | |||||
| 8225 | 7 | 11 | if ($method_body =~ /return\s+\$instance;/) { | |||
| 8226 | 4 | 5 | $singleton_info{returns_instance} = 1; | |||
| 8227 | 4 | 6 | $singleton_info{confidence} = 'high' unless $singleton_info{confidence}; | |||
| 8228 | } | |||||
| 8229 | } | |||||
| 8230 | ||||||
| 8231 | 7 | 11 | return \%singleton_info if keys %singleton_info > 0; # Need at least name pattern | |||
| 8232 | ||||||
| 8233 | 0 | 0 | return undef; | |||
| 8234 | } | |||||
| 8235 | ||||||
| 8236 | # -------------------------------------------------- | |||||
| 8237 | # _detect_instance_method | |||||
| 8238 | # | |||||
| 8239 | # Purpose: Detect whether a method is an | |||||
| 8240 | # instance method that requires a | |||||
| 8241 | # blessed object ($self) to be called, | |||||
| 8242 | # through multiple detection patterns | |||||
| 8243 | # of varying confidence. | |||||
| 8244 | # | |||||
| 8245 | # Entry: $method_name - method name string. | |||||
| 8246 | # $method_body - method source string. | |||||
| 8247 | # | |||||
| 8248 | # Exit: Returns an instance_info hashref if | |||||
| 8249 | # any instance method signal is found. | |||||
| 8250 | # Returns undef if no signals are | |||||
| 8251 | # detected. | |||||
| 8252 | # The hashref may contain: explicit_self, | |||||
| 8253 | # shift_self, uses_self, | |||||
| 8254 | # accesses_object_data, | |||||
| 8255 | # calls_instance_methods, | |||||
| 8256 | # private_method, and confidence. | |||||
| 8257 | # | |||||
| 8258 | # Side effects: None. | |||||
| 8259 | # -------------------------------------------------- | |||||
| 8260 | sub _detect_instance_method { | |||||
| 8261 | 267 | 2243 | my ($self, $method_name, $method_body) = @_; | |||
| 8262 | ||||||
| 8263 | 267 | 228 | my %instance_info; | |||
| 8264 | ||||||
| 8265 | # Pattern 1: my ($self, ...) = @_; | |||||
| 8266 | 267 | 659 | if ($method_body =~ /my\s*\(\s*\$self\s*[,)]/) { | |||
| 8267 | 107 | 122 | $instance_info{explicit_self} = 1; | |||
| 8268 | 107 | 99 | $instance_info{confidence} = 'high'; | |||
| 8269 | } | |||||
| 8270 | ||||||
| 8271 | # Pattern 2: my $self = shift; | |||||
| 8272 | elsif ($method_body =~ /my\s+\$self\s*=\s*shift/) { | |||||
| 8273 | 13 | 14 | $instance_info{shift_self} = 1; | |||
| 8274 | 13 | 12 | $instance_info{confidence} = 'high'; | |||
| 8275 | } | |||||
| 8276 | ||||||
| 8277 | # Pattern 3: Uses $self->something (including hash/array access) | |||||
| 8278 | # This catches $self->{value} and $self->[0] as well as $self->method() | |||||
| 8279 | elsif ($method_body =~ /\$self\s*->\s*(\w+|[\{\[])/) { | |||||
| 8280 | 5 | 5 | $instance_info{uses_self} = 1; | |||
| 8281 | 5 | 6 | $instance_info{confidence} = 'medium'; | |||
| 8282 | } | |||||
| 8283 | ||||||
| 8284 | # Pattern 4: Accesses object data: $self->{...}, $self->[...] | |||||
| 8285 | 267 | 377 | if ($method_body =~ /\$self\s*->\s*[\{\[]/) { | |||
| 8286 | 47 | 46 | $instance_info{accesses_object_data} = 1; | |||
| 8287 | 47 | 59 | $instance_info{confidence} = 'high' unless $instance_info{confidence} eq 'high'; | |||
| 8288 | } | |||||
| 8289 | ||||||
| 8290 | # Pattern 5: Calls other instance methods on $self | |||||
| 8291 | 267 | 371 | if ($method_body =~ /\$self\s*->\s*(\w+)\s*\(/s) { | |||
| 8292 | 4 | 4 | $instance_info{calls_instance_methods} = []; | |||
| 8293 | 4 | 9 | while ($method_body =~ /\$self\s*->\s*(\w+)\s*\(/g) { | |||
| 8294 | 4 4 | 3 7 | push @{$instance_info{calls_instance_methods}}, $1; | |||
| 8295 | } | |||||
| 8296 | 4 4 | 3 6 | $instance_info{confidence} = 'high' if @{$instance_info{calls_instance_methods}}; | |||
| 8297 | } | |||||
| 8298 | ||||||
| 8299 | # Pattern 6: Method name suggests instance method (not perfect but helpful) | |||||
| 8300 | 267 | 358 | if ($method_name =~ /^_/ && $method_name !~ /^_new/) { | |||
| 8301 | # Private methods are usually instance methods | |||||
| 8302 | 2 | 2 | $instance_info{private_method} = 1; | |||
| 8303 | 2 | 4 | $instance_info{confidence} = 'low' unless exists $instance_info{confidence}; | |||
| 8304 | } | |||||
| 8305 | ||||||
| 8306 | 267 | 353 | return \%instance_info if keys %instance_info; | |||
| 8307 | 140 | 141 | return undef; | |||
| 8308 | } | |||||
| 8309 | ||||||
| 8310 | # -------------------------------------------------- | |||||
| 8311 | # _check_inheritance_for_constructor | |||||
| 8312 | # | |||||
| 8313 | # Purpose: Determine whether the current package | |||||
| 8314 | # uses an inherited constructor from a | |||||
| 8315 | # parent class, by examining use parent, | |||||
| 8316 | # use base, and @ISA declarations. | |||||
| 8317 | # | |||||
| 8318 | # Entry: $current_package - current package | |||||
| 8319 | # name string. | |||||
| 8320 | # $method_body - method source string | |||||
| 8321 | # (checked for SUPER:: | |||||
| 8322 | # calls). | |||||
| 8323 | # | |||||
| 8324 | # Exit: Returns an inheritance_info hashref | |||||
| 8325 | # if any inheritance information is | |||||
| 8326 | # found, or undef otherwise. | |||||
| 8327 | # The hashref may contain: | |||||
| 8328 | # parent_statements, isa_array, | |||||
| 8329 | # uses_super, calls_super_new, | |||||
| 8330 | # has_own_constructor, | |||||
| 8331 | # use_parent_constructor, parent_class. | |||||
| 8332 | # | |||||
| 8333 | # Side effects: None. | |||||
| 8334 | # -------------------------------------------------- | |||||
| 8335 | sub _check_inheritance_for_constructor { | |||||
| 8336 | 120 | 4764 | my ($self, $current_package, $method_body) = @_; | |||
| 8337 | ||||||
| 8338 | 120 | 94 | my $doc = $self->{_document}; | |||
| 8339 | 120 | 157 | return undef unless $doc; | |||
| 8340 | ||||||
| 8341 | 120 | 94 | my %inheritance_info; | |||
| 8342 | ||||||
| 8343 | # 1. Look for parent/base statements | |||||
| 8344 | my @parent_classes; | |||||
| 8345 | ||||||
| 8346 | # Find all 'use parent' or 'use base' statements | |||||
| 8347 | 120 | 166 | my $includes = $doc->find('PPI::Statement::Include') || []; | |||
| 8348 | 120 | 484435 | foreach my $inc (@$includes) { | |||
| 8349 | 236 | 245 | my $content = $inc->content; | |||
| 8350 | 236 | 2174 | if ($content =~ /use\s+(parent|base)\s+['"]?([\w:]+)['"]?/) { | |||
| 8351 | 3 | 3 | push @parent_classes, $2; | |||
| 8352 | 3 | 4 | $inheritance_info{parent_statements} = \@parent_classes; | |||
| 8353 | } | |||||
| 8354 | # Also check for multiple parents: use parent qw(Class1 Class2) | |||||
| 8355 | 236 | 264 | if ($content =~ /use\s+(parent|base)\s+qw?[\(\[]?(.+?)[\)\]]?;/) { | |||
| 8356 | 0 | 0 | my $parents = $2; | |||
| 8357 | 0 | 0 | my @multi_parents = split /\s+/, $parents; | |||
| 8358 | 0 | 0 | push @parent_classes, @multi_parents; | |||
| 8359 | 0 | 0 | $inheritance_info{parent_statements} = \@parent_classes; | |||
| 8360 | } | |||||
| 8361 | } | |||||
| 8362 | ||||||
| 8363 | # 2. Look for @ISA assignments (with or without 'our') | |||||
| 8364 | 120 | 140 | my $isas = $doc->find('PPI::Statement::Variable') || []; | |||
| 8365 | 120 | 481230 | foreach my $isa (@$isas) { | |||
| 8366 | 777 | 664 | my $content = $isa->content(); | |||
| 8367 | # Match both "our @ISA = qw(...)" and "@ISA = qw(...)" | |||||
| 8368 | 777 | 19326 | if ($content =~ /(?:our\s+)?\@ISA\s*=\s*qw?[\(\[]?(.+?)[\)\]]?/) { | |||
| 8369 | 0 | 0 | my $parents = $1; | |||
| 8370 | 0 | 0 | my @isa_parents = split(/\s+/, $parents); | |||
| 8371 | 0 | 0 | push @parent_classes, @isa_parents; | |||
| 8372 | 0 | 0 | $inheritance_info{isa_array} = \@isa_parents; | |||
| 8373 | } | |||||
| 8374 | } | |||||
| 8375 | ||||||
| 8376 | # Also look for @ISA in regular statements | |||||
| 8377 | 120 | 135 | my $statements = $doc->find('PPI::Statement') || []; | |||
| 8378 | 120 | 476862 | foreach my $stmt (@$statements) { | |||
| 8379 | 4769 | 3681 | my $content = $stmt->content; | |||
| 8380 | 4769 | 116901 | if ($content =~ /\@ISA\s*=\s*qw?[\(\[]?(.+?)[\)\]]?/) { | |||
| 8381 | 1 | 2 | my $parents = $1; | |||
| 8382 | 1 | 1 | my @isa_parents = split(/\s+/, $parents); | |||
| 8383 | 1 | 1 | push @parent_classes, @isa_parents; | |||
| 8384 | 1 | 1 | $inheritance_info{isa_array} = \@isa_parents; | |||
| 8385 | } | |||||
| 8386 | } | |||||
| 8387 | ||||||
| 8388 | # 3. Check if method uses SUPER:: calls | |||||
| 8389 | 120 | 279 | if ($method_body && $method_body =~ /SUPER::/) { | |||
| 8390 | 1 | 1 | $inheritance_info{uses_super} = 1; | |||
| 8391 | 1 | 1 | if ($method_body =~ /SUPER::new/) { | |||
| 8392 | 0 | 0 | $inheritance_info{calls_super_new} = 1; | |||
| 8393 | } | |||||
| 8394 | } | |||||
| 8395 | ||||||
| 8396 | # 4. Check if current package has its own new method | |||||
| 8397 | my $has_own_new = $doc->find(sub { | |||||
| 8398 | 45978 | 209278 | $_[1]->isa('PPI::Statement::Sub') && | |||
| 8399 | $_[1]->name eq 'new' | |||||
| 8400 | 120 | 257 | }); | |||
| 8401 | ||||||
| 8402 | 120 | 769 | if ($has_own_new) { | |||
| 8403 | 29 | 49 | $inheritance_info{has_own_constructor} = 1; | |||
| 8404 | } elsif (@parent_classes) { | |||||
| 8405 | # No own constructor, but has parents - might need parent constructor | |||||
| 8406 | 0 | 0 | $inheritance_info{use_parent_constructor} = 1; | |||
| 8407 | 0 | 0 | $inheritance_info{parent_class} = $parent_classes[0]; # Use first parent | |||
| 8408 | } | |||||
| 8409 | ||||||
| 8410 | 120 | 181 | return \%inheritance_info if keys %inheritance_info; | |||
| 8411 | 91 | 201 | return undef; | |||
| 8412 | } | |||||
| 8413 | ||||||
| 8414 | # -------------------------------------------------- | |||||
| 8415 | # _detect_constructor_requirements | |||||
| 8416 | # | |||||
| 8417 | # Purpose: Analyse the new() method of the | |||||
| 8418 | # current or target package to determine | |||||
| 8419 | # what parameters the constructor | |||||
| 8420 | # requires, including required and | |||||
| 8421 | # optional parameters and their defaults. | |||||
| 8422 | # | |||||
| 8423 | # Entry: $current_package - the package being | |||||
| 8424 | # analysed. | |||||
| 8425 | # $target_package - the package whose | |||||
| 8426 | # constructor will | |||||
| 8427 | # be called (may | |||||
| 8428 | # differ from current | |||||
| 8429 | # for inherited | |||||
| 8430 | # constructors). | |||||
| 8431 | # | |||||
| 8432 | # Exit: Returns a requirements hashref on | |||||
| 8433 | # success, or undef if no new() method | |||||
| 8434 | # is found. For external classes, | |||||
| 8435 | # returns a minimal hashref with | |||||
| 8436 | # external_class => 1. | |||||
| 8437 | # | |||||
| 8438 | # Side effects: None. | |||||
| 8439 | # -------------------------------------------------- | |||||
| 8440 | sub _detect_constructor_requirements { | |||||
| 8441 | 120 | 4306 | my ($self, $current_package, $target_package) = @_; | |||
| 8442 | ||||||
| 8443 | 120 | 99 | my $doc = $self->{_document}; | |||
| 8444 | 120 | 160 | return undef unless $doc; | |||
| 8445 | ||||||
| 8446 | # If target is different from current, we can't analyze it | |||||
| 8447 | # (external class, parent class in different file) | |||||
| 8448 | 120 | 124 | if ($target_package ne $current_package) { | |||
| 8449 | return { | |||||
| 8450 | 0 | 0 | external_class => 1, | |||
| 8451 | package => $target_package, | |||||
| 8452 | note => "Constructor for external class $target_package - parameters unknown" | |||||
| 8453 | }; | |||||
| 8454 | } | |||||
| 8455 | ||||||
| 8456 | # Find the new method in current package | |||||
| 8457 | my $new_method = $doc->find_first(sub { | |||||
| 8458 | 39882 | 179524 | $_[1]->isa('PPI::Statement::Sub') && | |||
| 8459 | $_[1]->name eq 'new' | |||||
| 8460 | 120 | 229 | }); | |||
| 8461 | ||||||
| 8462 | 120 | 1250 | return undef unless $new_method; | |||
| 8463 | ||||||
| 8464 | 29 | 24 | my %requirements; | |||
| 8465 | ||||||
| 8466 | # Get method body | |||||
| 8467 | 29 | 31 | my $body = $new_method->content; | |||
| 8468 | ||||||
| 8469 | # Look for parameter extraction patterns - handle both $self and $class | |||||
| 8470 | 29 | 2529 | if ($body =~ /my\s*\(\s*\$(self|class)\s*,\s*(.+?)\)\s*=\s*\@_/s) { | |||
| 8471 | 28 | 35 | my $params = $2; | |||
| 8472 | 28 | 38 | my @param_names = $params =~ /\$(\w+)/g; | |||
| 8473 | ||||||
| 8474 | 28 | 30 | if (@param_names) { | |||
| 8475 | 2 | 3 | $requirements{parameters} = \@param_names; | |||
| 8476 | 2 | 2 | $requirements{parameter_count} = scalar @param_names; | |||
| 8477 | } | |||||
| 8478 | } | |||||
| 8479 | ||||||
| 8480 | # Look for shift patterns | |||||
| 8481 | 29 | 39 | my @shift_params; | |||
| 8482 | 29 | 47 | while ($body =~ /my\s+\$(\w+)\s*=\s*shift/g) { | |||
| 8483 | 0 | 0 | push @shift_params, $1; | |||
| 8484 | } | |||||
| 8485 | # Remove $self or $class if present | |||||
| 8486 | 29 0 | 26 0 | @shift_params = grep { $_ !~ /^(self|class)$/i } @shift_params; | |||
| 8487 | ||||||
| 8488 | 29 | 32 | if (@shift_params) { | |||
| 8489 | 0 | 0 | $requirements{parameters} = \@shift_params; | |||
| 8490 | 0 | 0 | $requirements{parameter_count} = scalar @shift_params; | |||
| 8491 | 0 | 0 | $requirements{shift_pattern} = 1; | |||
| 8492 | } | |||||
| 8493 | ||||||
| 8494 | # Look for validation of parameters (more flexible pattern) | |||||
| 8495 | 29 | 23 | my @required_params; | |||
| 8496 | 29 | 42 | if ($body =~ /croak.*unless.*(?:defined\s+)?\$(\w+)/g) { | |||
| 8497 | 1 | 1 | push @required_params, $1; | |||
| 8498 | } | |||||
| 8499 | 29 | 40 | if ($body =~ /die.*unless.*(?:defined\s+)?\$(\w+)/g) { | |||
| 8500 | 1 | 1 | push @required_params, $1; | |||
| 8501 | } | |||||
| 8502 | ||||||
| 8503 | 29 | 28 | if (@required_params) { | |||
| 8504 | 2 | 1 | $requirements{required_parameters} = \@required_params; | |||
| 8505 | } | |||||
| 8506 | ||||||
| 8507 | # Look for default values (optional parameters) | |||||
| 8508 | 29 | 19 | my @optional_params; | |||
| 8509 | my %default_values; | |||||
| 8510 | ||||||
| 8511 | # Use the new _extract_default_value method | |||||
| 8512 | # Check for each parameter in the constructor body | |||||
| 8513 | 29 | 37 | if ($requirements{parameters}) { | |||
| 8514 | 2 2 | 0 2 | foreach my $param (@{$requirements{parameters}}) { | |||
| 8515 | 3 | 4 | my $default = $self->_extract_default_value($param, $body); | |||
| 8516 | 3 | 15 | if (defined $default) { | |||
| 8517 | 1 | 1 | push @optional_params, $param; | |||
| 8518 | 1 | 1 | $default_values{$param} = $default; | |||
| 8519 | } | |||||
| 8520 | } | |||||
| 8521 | } | |||||
| 8522 | ||||||
| 8523 | 29 | 30 | if (@optional_params) { | |||
| 8524 | 1 | 1 | $requirements{optional_parameters} = \@optional_params; | |||
| 8525 | 1 | 2 | $requirements{default_values} = \%default_values; | |||
| 8526 | } | |||||
| 8527 | ||||||
| 8528 | 29 | 36 | return \%requirements if keys %requirements; | |||
| 8529 | 27 | 41 | return undef; | |||
| 8530 | } | |||||
| 8531 | ||||||
| 8532 | ||||||
| 8533 | # -------------------------------------------------- | |||||
| 8534 | # _detect_external_object_dependency | |||||
| 8535 | # | |||||
| 8536 | # Purpose: Detect whether a method creates or | |||||
| 8537 | # depends on objects from classes other | |||||
| 8538 | # than the current package, by scanning | |||||
| 8539 | # for ->new() calls on named classes | |||||
| 8540 | # and method calls on typed variables. | |||||
| 8541 | # | |||||
| 8542 | # Entry: $method_body - method source string. | |||||
| 8543 | # May be undef. | |||||
| 8544 | # | |||||
| 8545 | # Exit: Returns a dependency_info hashref if | |||||
| 8546 | # external object usage is found, or | |||||
| 8547 | # undef otherwise. | |||||
| 8548 | # The hashref may contain: | |||||
| 8549 | # creates_objects (arrayref of class | |||||
| 8550 | # names), uses_objects (arrayref of | |||||
| 8551 | # class names), and package (the primary | |||||
| 8552 | # dependency class). | |||||
| 8553 | # | |||||
| 8554 | # Side effects: None. | |||||
| 8555 | # -------------------------------------------------- | |||||
| 8556 | sub _detect_external_object_dependency { | |||||
| 8557 | 141 | 163 | my ($self, $method_body) = @_; | |||
| 8558 | ||||||
| 8559 | 141 | 128 | return undef unless $method_body; | |||
| 8560 | ||||||
| 8561 | 141 | 111 | my %dependency_info; | |||
| 8562 | ||||||
| 8563 | # Pattern 1: Creates objects of other classes with ->new() or ->create() | |||||
| 8564 | # Reset pos for global match | |||||
| 8565 | 141 | 195 | pos($method_body) = 0; | |||
| 8566 | 141 | 273 | while ($method_body =~ /(\w+(?:::\w+)*)->(?:new|create)\(/g) { | |||
| 8567 | 3 | 5 | my $class = $1; | |||
| 8568 | 3 | 10 | next if $class eq 'main' || $class eq '__PACKAGE__' || $class =~ /^\$/; | |||
| 8569 | 2 2 | 1 12 | push @{$dependency_info{creates_objects}}, $class; | |||
| 8570 | } | |||||
| 8571 | ||||||
| 8572 | 141 | 183 | if ($dependency_info{creates_objects}) { | |||
| 8573 | # Remove duplicates | |||||
| 8574 | 1 | 1 | my %seen; | |||
| 8575 | 1 2 1 | 1 3 10 | $dependency_info{creates_objects} = [grep { !$seen{$_}++ } @{$dependency_info{creates_objects}}]; | |||
| 8576 | 1 | 2 | $dependency_info{package} = $dependency_info{creates_objects}[0]; | |||
| 8577 | } | |||||
| 8578 | ||||||
| 8579 | # Pattern 2: Calls methods on objects from other classes | |||||
| 8580 | 141 | 194 | if ($method_body =~ /\$(\w+)->\w+\(/g) { | |||
| 8581 | 1 | 1 | my %object_vars; | |||
| 8582 | 1 | 2 | while ($method_body =~ /\$(\w+)->\w+\(/g) { | |||
| 8583 | 1 | 1 | $object_vars{$1}++; | |||
| 8584 | } | |||||
| 8585 | ||||||
| 8586 | # Try to determine type of object variables | |||||
| 8587 | 1 | 1 | my @object_classes; | |||
| 8588 | 1 | 1 | foreach my $var (keys %object_vars) { | |||
| 8589 | # Look for type declarations or assignments | |||||
| 8590 | 1 | 50 | if ($method_body =~ /my\s+\$$var\s*=\s*(\w+(?:::\w+)+)->(?:new|create)/) { | |||
| 8591 | 1 | 2 | push @object_classes, $1; | |||
| 8592 | } elsif ($method_body =~ /my\s+\$$var\s*=\s*(\w+(?:::\w+)+)->/) { | |||||
| 8593 | 0 | 0 | push @object_classes, $1; | |||
| 8594 | } | |||||
| 8595 | } | |||||
| 8596 | ||||||
| 8597 | 1 | 1 | if (@object_classes) { | |||
| 8598 | 1 | 2 | $dependency_info{uses_objects} = \@object_classes; | |||
| 8599 | 1 | 1 | $dependency_info{package} = $object_classes[0] unless $dependency_info{package}; | |||
| 8600 | } | |||||
| 8601 | } | |||||
| 8602 | ||||||
| 8603 | # Pattern 3: Receives objects as parameters (type hints in comments/POD) | |||||
| 8604 | # This would need integration with parameter analysis | |||||
| 8605 | ||||||
| 8606 | 141 | 161 | return \%dependency_info if keys %dependency_info; | |||
| 8607 | 140 | 153 | return undef; | |||
| 8608 | } | |||||
| 8609 | ||||||
| 8610 | # -------------------------------------------------- | |||||
| 8611 | # _get_parent_class | |||||
| 8612 | # | |||||
| 8613 | # Purpose: Find the first parent class of the | |||||
| 8614 | # current package by searching the | |||||
| 8615 | # PPI document for use parent, use base, | |||||
| 8616 | # or our @ISA declarations. | |||||
| 8617 | # | |||||
| 8618 | # Entry: None (operates on $self->{_document}). | |||||
| 8619 | # | |||||
| 8620 | # Exit: Returns the parent class name string, | |||||
| 8621 | # or undef if no parent is found. | |||||
| 8622 | # | |||||
| 8623 | # Side effects: None. | |||||
| 8624 | # -------------------------------------------------- | |||||
| 8625 | sub _get_parent_class { | |||||
| 8626 | 2 | 3458 | my $self = $_[0]; | |||
| 8627 | ||||||
| 8628 | 2 | 3 | my $doc = $self->{_document}; | |||
| 8629 | 2 | 2 | return unless $doc; | |||
| 8630 | ||||||
| 8631 | # Look for use parent statements | |||||
| 8632 | my $parent_stmt = $doc->find_first(sub { | |||||
| 8633 | 68 | 355 | $_[1]->isa('PPI::Statement::Include') && | |||
| 8634 | $_[1]->type eq 'use' && | |||||
| 8635 | $_[1]->module =~ /^(parent|base)$/ && | |||||
| 8636 | $_[1]->arguments =~ /['"](\w+(?:::\w+)*)['"]/ | |||||
| 8637 | 2 | 5 | }); | |||
| 8638 | 2 | 13 | if ($parent_stmt) { | |||
| 8639 | 0 | 0 | my $parent = $1; | |||
| 8640 | 0 | 0 | return $parent; | |||
| 8641 | } | |||||
| 8642 | ||||||
| 8643 | # Look for @ISA assignment | |||||
| 8644 | my $isa_stmt = $doc->find_first(sub { | |||||
| 8645 | 68 | 414 | $_[1]->isa('PPI::Statement') && | |||
| 8646 | $_[1]->content =~ /our\s+\@ISA\s*=\s*\(\s*['"](\w+(?:::\w+)*)['"]\s*\)/ | |||||
| 8647 | 2 | 5 | }); | |||
| 8648 | 2 | 12 | if ($isa_stmt && $isa_stmt->content =~ /['"](\w+(?:::\w+)*)['"]/) { | |||
| 8649 | 0 | 0 | return $1; | |||
| 8650 | } | |||||
| 8651 | ||||||
| 8652 | 2 | 3 | return; | |||
| 8653 | } | |||||
| 8654 | ||||||
| 8655 | # -------------------------------------------------- | |||||
| 8656 | # _get_class_for_instance_method | |||||
| 8657 | # | |||||
| 8658 | # Purpose: Determine which class should be used | |||||
| 8659 | # for object instantiation when testing | |||||
| 8660 | # an instance method, preferring the | |||||
| 8661 | # current package if it has a new() | |||||
| 8662 | # method, falling back to the parent | |||||
| 8663 | # class otherwise. | |||||
| 8664 | # | |||||
| 8665 | # Entry: None (operates on $self->{_document}). | |||||
| 8666 | # | |||||
| 8667 | # Exit: Returns the package name string to | |||||
| 8668 | # use for instantiation. Returns | |||||
| 8669 | # 'UNKNOWN_PACKAGE' if no package | |||||
| 8670 | # statement is found. | |||||
| 8671 | # | |||||
| 8672 | # Side effects: Stores the package name in | |||||
| 8673 | # $self->{_package_name} if not | |||||
| 8674 | # already set. | |||||
| 8675 | # -------------------------------------------------- | |||||
| 8676 | sub _get_class_for_instance_method { | |||||
| 8677 | 2 | 3208 | my $self = $_[0]; | |||
| 8678 | ||||||
| 8679 | # Get the current package | |||||
| 8680 | 2 | 2 | my $doc = $self->{_document}; | |||
| 8681 | 2 | 3 | my $package_stmt = $doc->find_first('PPI::Statement::Package'); | |||
| 8682 | 2 | 359 | return 'UNKNOWN_PACKAGE' unless $package_stmt; | |||
| 8683 | 1 | 2 | my $package_name = $package_stmt->namespace; | |||
| 8684 | 1 | 14 | $self->{_package_name} //= $package_name; | |||
| 8685 | ||||||
| 8686 | # Check if the current package has a 'new' method | |||||
| 8687 | my $has_new = $doc->find(sub { | |||||
| 8688 | 46 | 249 | $_[1]->isa('PPI::Statement::Sub') && $_[1]->name eq 'new' | |||
| 8689 | 1 | 1 | }); | |||
| 8690 | ||||||
| 8691 | 1 | 6 | if ($has_new) { | |||
| 8692 | 1 | 2 | return $package_name; | |||
| 8693 | } | |||||
| 8694 | ||||||
| 8695 | # Otherwise, try to get the parent class | |||||
| 8696 | 0 | 0 | my $parent = $self->_get_parent_class(); | |||
| 8697 | 0 | 0 | return $parent if $parent; | |||
| 8698 | ||||||
| 8699 | # Fallback to current package | |||||
| 8700 | 0 | 0 | return $package_name; | |||
| 8701 | } | |||||
| 8702 | ||||||
| 8703 | # -------------------------------------------------- | |||||
| 8704 | # _extract_default_value | |||||
| 8705 | # | |||||
| 8706 | # Purpose: Extract a default value for a named | |||||
| 8707 | # parameter from a method body by | |||||
| 8708 | # matching multiple common Perl default | |||||
| 8709 | # assignment idioms. | |||||
| 8710 | # | |||||
| 8711 | # Entry: $param - parameter name string. | |||||
| 8712 | # $code - method body source string. | |||||
| 8713 | # | |||||
| 8714 | # Exit: Returns the cleaned default value | |||||
| 8715 | # scalar on success, or undef if no | |||||
| 8716 | # default assignment pattern is found. | |||||
| 8717 | # | |||||
| 8718 | # Side effects: None. | |||||
| 8719 | # | |||||
| 8720 | # Notes: Eight patterns are tried in order: | |||||
| 8721 | # ||, //=, defined ternary, unless | |||||
| 8722 | # defined, ||=, //, multi-line if | |||||
| 8723 | # !defined, unless defined block. | |||||
| 8724 | # Comment lines are stripped from the | |||||
| 8725 | # code before matching to avoid false | |||||
| 8726 | # positives. Delegates to | |||||
| 8727 | # _clean_default_value for value | |||||
| 8728 | # normalisation. | |||||
| 8729 | # -------------------------------------------------- | |||||
| 8730 | sub _extract_default_value { | |||||
| 8731 | 189 | 8473 | my ($self, $param, $code) = @_; | |||
| 8732 | ||||||
| 8733 | 189 | 308 | return undef unless $param && $code; | |||
| 8734 | ||||||
| 8735 | # Clean up the code for easier pattern matching | |||||
| 8736 | # Remove comments to avoid false positives | |||||
| 8737 | 189 | 143 | my $clean_code = $code; | |||
| 8738 | 189 | 253 | $clean_code =~ s/#.*$//gm; | |||
| 8739 | 189 | 2053 | $clean_code =~ s/^\s+|\s+$//g; | |||
| 8740 | ||||||
| 8741 | # Pattern 1: $param = $param || 'default_value' | |||||
| 8742 | # Also handles: $param = $arg || 'default' | |||||
| 8743 | 189 | 6169 | if ($clean_code =~ /\$$param\s*=\s*(?:\$$param|\$[a-zA-Z_]\w*)\s*\|\|\s*([^;]+)/) { | |||
| 8744 | 10 | 13 | my $default = $1; | |||
| 8745 | 10 | 7 | $default =~ s/\s*;\s*$//; | |||
| 8746 | 10 | 11 | $default = $self->_clean_default_value($default); | |||
| 8747 | 10 | 19 | return $default if defined $default; | |||
| 8748 | } | |||||
| 8749 | ||||||
| 8750 | # Pattern 2: $param //= 'default_value' | |||||
| 8751 | 179 | 2071 | if ($clean_code =~ /\$$param\s*\/\/=\s*([^;]+)/) { | |||
| 8752 | 9 | 13 | my $default = $1; | |||
| 8753 | 9 | 8 | $default =~ s/\s*;\s*$//; | |||
| 8754 | 9 | 14 | $default = $self->_clean_default_value($default); | |||
| 8755 | 9 | 21 | return $default if defined $default; | |||
| 8756 | } | |||||
| 8757 | ||||||
| 8758 | # Pattern 3: $param = defined $param ? $param : 'default' | |||||
| 8759 | # Also handles: $param = defined $arg ? $arg : 'default' | |||||
| 8760 | 171 | 8117 | if ($clean_code =~ /\$$param\s*=\s*defined\s+(?:\$$param|\$[a-zA-Z_]\w*)\s*\?\s*(?:\$$param|\$[a-zA-Z_]\w*)\s*:\s*([^;]+)/) { | |||
| 8761 | 6 | 8 | my $default = $1; | |||
| 8762 | 6 | 5 | $default =~ s/\s*;\s*$//; | |||
| 8763 | 6 | 6 | $default = $self->_clean_default_value($default); | |||
| 8764 | 6 | 15 | return $default if defined $default; | |||
| 8765 | } | |||||
| 8766 | ||||||
| 8767 | # Pattern 4: $param = 'default' unless defined $param; | |||||
| 8768 | 165 | 5137 | if ($clean_code =~ /\$$param\s*=\s*([^;]+?)\s+unless\s+defined\s+(?:\$$param|\$[a-zA-Z_]\w*)/) { | |||
| 8769 | 3 | 3 | my $default = $1; | |||
| 8770 | 3 | 5 | $default = $self->_clean_default_value($default); | |||
| 8771 | 3 | 9 | return $default if defined $default; | |||
| 8772 | } | |||||
| 8773 | ||||||
| 8774 | # Pattern 5: $param ||= 'default' | |||||
| 8775 | 162 | 1900 | if ($clean_code =~ /\$$param\s*\|\|=\s*([^;]+)/) { | |||
| 8776 | 3 | 5 | my $default = $1; | |||
| 8777 | 3 | 1 | $default =~ s/\s*;\s*$//; | |||
| 8778 | 3 | 5 | $default = $self->_clean_default_value($default); | |||
| 8779 | 3 | 8 | return $default if defined $default; | |||
| 8780 | } | |||||
| 8781 | ||||||
| 8782 | # Pattern 6: $param = $arg // 'default' | |||||
| 8783 | 159 | 4257 | if ($clean_code =~ /\$$param\s*=\s*(?:\$$param|\$[a-zA-Z_]\w*)\s*\/\/\s*([^;]+)/) { | |||
| 8784 | 2 | 2 | my $default = $1; | |||
| 8785 | 2 | 2 | $default =~ s/\s*;\s*$//; | |||
| 8786 | 2 | 3 | $default = $self->_clean_default_value($default); | |||
| 8787 | 2 | 4 | return $default if defined $default; | |||
| 8788 | } | |||||
| 8789 | ||||||
| 8790 | # Pattern 7: Multi-line: if (!defined $param) { $param = 'default'; } | |||||
| 8791 | 158 | 4349 | if ($clean_code =~ /if\s*\(\s*!defined\s+\$$param\s*\)\s*\{[^}]*\$$param\s*=\s*([^;]+)/s) { | |||
| 8792 | 1 | 1 | my $default = $1; | |||
| 8793 | 1 | 1 | $default =~ s/\s*;\s*$//; | |||
| 8794 | 1 | 2 | $default = $self->_clean_default_value($default); | |||
| 8795 | 1 | 7 | return $default if defined $default; | |||
| 8796 | } | |||||
| 8797 | ||||||
| 8798 | # Pattern 8: unless (defined $param) { $param = 'default'; } | |||||
| 8799 | 157 | 4190 | if ($clean_code =~ /unless\s*\(\s*defined\s+\$$param\s*\)\s*\{[^}]*\$$param\s*=\s*([^;]+)/s) { | |||
| 8800 | 1 | 2 | my $default = $1; | |||
| 8801 | 1 | 1 | $default =~ s/\s*;\s*$//; | |||
| 8802 | 1 | 1 | $default = $self->_clean_default_value($default); | |||
| 8803 | 1 | 4 | return $default if defined $default; | |||
| 8804 | } | |||||
| 8805 | ||||||
| 8806 | 156 | 387 | return undef; | |||
| 8807 | } | |||||
| 8808 | ||||||
| 8809 | # -------------------------------------------------- | |||||
| 8810 | # _extract_test_hints | |||||
| 8811 | # | |||||
| 8812 | # Purpose: Extract structured test hints from | |||||
| 8813 | # a method's code and schema, including | |||||
| 8814 | # boundary values, invalid inputs, and | |||||
| 8815 | # valid input examples from POD. | |||||
| 8816 | # | |||||
| 8817 | # Entry: $method - method hashref. | |||||
| 8818 | # $schema - schema hashref as built so | |||||
| 8819 | # far by _analyze_method. | |||||
| 8820 | # | |||||
| 8821 | # Exit: Returns a hints hashref with keys: | |||||
| 8822 | # boundary_values, invalid_inputs, | |||||
| 8823 | # equivalence_classes, valid_inputs. | |||||
| 8824 | # Keys with empty arrays are deleted | |||||
| 8825 | # before returning. | |||||
| 8826 | # | |||||
| 8827 | # Side effects: None. | |||||
| 8828 | # -------------------------------------------------- | |||||
| 8829 | sub _extract_test_hints { | |||||
| 8830 | 269 | 269 | my ($self, $method, $schema) = @_; | |||
| 8831 | ||||||
| 8832 | 269 | 542 | my %hints = ( | |||
| 8833 | boundary_values => [], | |||||
| 8834 | invalid_inputs => [], | |||||
| 8835 | equivalence_classes => [], | |||||
| 8836 | valid_inputs => [], | |||||
| 8837 | ); | |||||
| 8838 | ||||||
| 8839 | 269 | 242 | my $code = $method->{body}; | |||
| 8840 | 269 | 276 | return {} unless $code; | |||
| 8841 | ||||||
| 8842 | 268 | 387 | $self->_extract_invalid_input_hints($code, \%hints); | |||
| 8843 | 268 | 391 | $self->_extract_boundary_value_hints($code, \%hints); | |||
| 8844 | ||||||
| 8845 | # prune empties | |||||
| 8846 | 268 | 367 | for my $k (keys %hints) { | |||
| 8847 | 1072 1072 | 664 1144 | delete $hints{$k} unless @{$hints{$k}}; | |||
| 8848 | } | |||||
| 8849 | ||||||
| 8850 | 268 | 322 | return \%hints; | |||
| 8851 | } | |||||
| 8852 | ||||||
| 8853 | # -------------------------------------------------- | |||||
| 8854 | # _extract_invalid_input_hints | |||||
| 8855 | # | |||||
| 8856 | # Purpose: Detect likely invalid input values | |||||
| 8857 | # from a method body by looking for | |||||
| 8858 | # defined checks, empty string checks, | |||||
| 8859 | # and negative number checks. | |||||
| 8860 | # | |||||
| 8861 | # Entry: $code - method body source string. | |||||
| 8862 | # $hints - hints hashref (modified in | |||||
| 8863 | # place via invalid_inputs key). | |||||
| 8864 | # | |||||
| 8865 | # Exit: Returns nothing. Appends to | |||||
| 8866 | # $hints->{invalid_inputs}. | |||||
| 8867 | # | |||||
| 8868 | # Side effects: None. | |||||
| 8869 | # -------------------------------------------------- | |||||
| 8870 | sub _extract_invalid_input_hints { | |||||
| 8871 | 275 | 818 | my ($self, $code, $hints) = @_; | |||
| 8872 | ||||||
| 8873 | # undef invalid | |||||
| 8874 | 275 | 394 | if ($code =~ /defined\s*\(\s*\$/) { | |||
| 8875 | 5 5 | 4 7 | push @{ $hints->{invalid_inputs} }, 'undef'; | |||
| 8876 | } | |||||
| 8877 | ||||||
| 8878 | # empty string invalid | |||||
| 8879 | 275 | 503 | if ($code =~ /\beq\s*''/ || $code =~ /\blength\s*\(/) { | |||
| 8880 | 5 5 | 5 7 | push @{ $hints->{invalid_inputs} }, ''; | |||
| 8881 | } | |||||
| 8882 | ||||||
| 8883 | # negative number invalid | |||||
| 8884 | 275 | 375 | if ($code =~ /\$\w+\s*<\s*0/) { | |||
| 8885 | 7 7 | 4 10 | push @{ $hints->{invalid_inputs} }, -1; | |||
| 8886 | } | |||||
| 8887 | } | |||||
| 8888 | ||||||
| 8889 | # -------------------------------------------------- | |||||
| 8890 | # _extract_boundary_value_hints | |||||
| 8891 | # | |||||
| 8892 | # Purpose: Extract numeric boundary values from | |||||
| 8893 | # comparison operators in a method body, | |||||
| 8894 | # adding both the boundary value and | |||||
| 8895 | # the value one step either side. | |||||
| 8896 | # | |||||
| 8897 | # Entry: $code - method body source string. | |||||
| 8898 | # $hints - hints hashref (modified in | |||||
| 8899 | # place via boundary_values key). | |||||
| 8900 | # | |||||
| 8901 | # Exit: Returns nothing. Appends to and | |||||
| 8902 | # deduplicates $hints->{boundary_values}. | |||||
| 8903 | # | |||||
| 8904 | # Side effects: None. | |||||
| 8905 | # -------------------------------------------------- | |||||
| 8906 | sub _extract_boundary_value_hints { | |||||
| 8907 | 273 | 727 | my ($self, $code, $hints) = @_; | |||
| 8908 | ||||||
| 8909 | 273 | 673 | while ($code =~ /\$\w+\s*(<=|<|>=|>)\s*(\d+)/g) { | |||
| 8910 | 23 | 41 | my ($op, $n) = ($1, $2); | |||
| 8911 | ||||||
| 8912 | 23 | 40 | if ($op eq '<') { | |||
| 8913 | 11 11 | 9 26 | push @{ $hints->{boundary_values} }, $n, $n+1; | |||
| 8914 | } elsif ($op eq '<=') { | |||||
| 8915 | 2 2 | 1 5 | push @{ $hints->{boundary_values} }, $n, $n+1; | |||
| 8916 | } elsif ($op eq '>') { | |||||
| 8917 | 8 8 | 8 22 | push @{ $hints->{boundary_values} }, $n, $n-1; | |||
| 8918 | } elsif ($op eq '>=') { | |||||
| 8919 | 2 2 | 2 4 | push @{ $hints->{boundary_values} }, $n, $n-1; | |||
| 8920 | } | |||||
| 8921 | } | |||||
| 8922 | ||||||
| 8923 | # Remove duplicates | |||||
| 8924 | 273 | 219 | my %seen; | |||
| 8925 | 273 46 273 | 195 83 501 | $hints->{boundary_values} = [ grep { !$seen{$_}++ } @{ $hints->{boundary_values} } ]; | |||
| 8926 | } | |||||
| 8927 | ||||||
| 8928 | # -------------------------------------------------- | |||||
| 8929 | # _extract_pod_examples | |||||
| 8930 | # | |||||
| 8931 | # Purpose: Extract example method call patterns | |||||
| 8932 | # from a method's SYNOPSIS POD section | |||||
| 8933 | # and add them as valid_inputs hints. | |||||
| 8934 | # | |||||
| 8935 | # Entry: $pod - POD string for the method. | |||||
| 8936 | # May be undef. | |||||
| 8937 | # $hints - hints hashref (modified in | |||||
| 8938 | # place via valid_inputs key). | |||||
| 8939 | # | |||||
| 8940 | # Exit: Returns $hints. Appends to | |||||
| 8941 | # $hints->{valid_inputs}. | |||||
| 8942 | # | |||||
| 8943 | # Side effects: Logs the number of examples found | |||||
| 8944 | # to stdout when verbose is set. | |||||
| 8945 | # -------------------------------------------------- | |||||
| 8946 | sub _extract_pod_examples { | |||||
| 8947 | 269 | 288 | my ($self, $pod, $hints) = @_; | |||
| 8948 | ||||||
| 8949 | 269 | 294 | return $hints unless $pod; | |||
| 8950 | ||||||
| 8951 | 93 | 58 | my @examples; | |||
| 8952 | ||||||
| 8953 | # Extract SYNOPSIS | |||||
| 8954 | 93 | 143 | return $hints unless $pod =~ /=head2\s+SYNOPSIS\s*(.+?)(?=\n=head|\z)/s; | |||
| 8955 | 4 | 7 | my $synopsis = $1; | |||
| 8956 | ||||||
| 8957 | # Constructor examples: ->wilma(foo => 'bar', count => 5) | |||||
| 8958 | 4 | 15 | while ($synopsis =~ /->([a-z_0-9A-Z]+)\s*\(\s*(.*?)\s*\)/sg) { | |||
| 8959 | 4 | 5 | my ($method, $args) = ($1, $2); | |||
| 8960 | 4 | 4 | my %kv; | |||
| 8961 | ||||||
| 8962 | 4 | 9 | while ($args =~ /(\w+)\s*=>\s*(?:'([^']*)'|"([^"]*)"|(\d+))/g) { | |||
| 8963 | 7 | 7 | my $key = $1; | |||
| 8964 | 7 | 26 | my $val = defined $2 ? $2 : defined $3 ? $3 : $4; | |||
| 8965 | 7 | 11 | $kv{$key} = $val; | |||
| 8966 | } | |||||
| 8967 | ||||||
| 8968 | 4 | 13 | push @examples, { | |||
| 8969 | style => 'named', | |||||
| 8970 | source => 'pod', | |||||
| 8971 | args => \%kv, | |||||
| 8972 | function => $method, # TODO: add a sanity check this is what we expect | |||||
| 8973 | } if %kv; | |||||
| 8974 | } | |||||
| 8975 | ||||||
| 8976 | 4 | 4 | unless(scalar(@examples)) { | |||
| 8977 | # Positional calls: func($a, $b) | |||||
| 8978 | 1 | 5 | while ($synopsis =~ /\b(\w+)\s*\(\s*(.*?)\s*\)/sg) { | |||
| 8979 | 1 | 1 | my ($func, $argstr) = ($1, $2); | |||
| 8980 | ||||||
| 8981 | # next if $func eq 'new'; # already handled | |||||
| 8982 | ||||||
| 8983 | 1 2 | 3 5 | my @args = map { s/^\s+|\s+$//gr } split /\s*,\s*/, $argstr; | |||
| 8984 | ||||||
| 8985 | 1 | 1 | next unless @args; | |||
| 8986 | ||||||
| 8987 | 1 | 4 | push @examples, { | |||
| 8988 | style => 'positional', | |||||
| 8989 | source => 'pod', | |||||
| 8990 | function => $func, | |||||
| 8991 | args => \@args, | |||||
| 8992 | }; | |||||
| 8993 | } | |||||
| 8994 | } | |||||
| 8995 | ||||||
| 8996 | 4 | 4 | if (scalar(@examples)) { | |||
| 8997 | 4 | 9 | $hints->{valid_inputs} ||= []; | |||
| 8998 | 4 4 | 3 4 | push @{ $hints->{valid_inputs} }, @examples; | |||
| 8999 | ||||||
| 9000 | 4 | 6 | $self->_log(" POD: extracted " . scalar(@examples) . " example call(s)"); | |||
| 9001 | } | |||||
| 9002 | ||||||
| 9003 | 4 | 6 | for my $k (qw(boundary_values invalid_inputs valid_inputs equivalence_classes)) { | |||
| 9004 | 16 | 18 | $hints->{$k} //= []; | |||
| 9005 | } | |||||
| 9006 | ||||||
| 9007 | 4 | 4 | return $hints; | |||
| 9008 | } | |||||
| 9009 | ||||||
| 9010 | # -------------------------------------------------- | |||||
| 9011 | # _clean_default_value | |||||
| 9012 | # | |||||
| 9013 | # Purpose: Normalise a raw default value string | |||||
| 9014 | # extracted from code or POD into a | |||||
| 9015 | # clean Perl scalar, handling quoted | |||||
| 9016 | # strings, numeric literals, boolean | |||||
| 9017 | # keywords, empty containers, and | |||||
| 9018 | # undef. | |||||
| 9019 | # | |||||
| 9020 | # Entry: $value - raw value string. | |||||
| 9021 | # May be undef. | |||||
| 9022 | # $from_code - true if the value was | |||||
| 9023 | # extracted from source | |||||
| 9024 | # code (affects escape | |||||
| 9025 | # sequence handling). | |||||
| 9026 | # | |||||
| 9027 | # Exit: Returns the cleaned value: | |||||
| 9028 | # undef for undef or unparseable | |||||
| 9029 | # {} for empty hashrefs | |||||
| 9030 | # [] for empty arrayrefs | |||||
| 9031 | # integer for whole numbers | |||||
| 9032 | # float for decimal numbers | |||||
| 9033 | # 1 or 0 for boolean keywords | |||||
| 9034 | # string for everything else | |||||
| 9035 | # | |||||
| 9036 | # Side effects: None. | |||||
| 9037 | # -------------------------------------------------- | |||||
| 9038 | sub _clean_default_value { | |||||
| 9039 | 154 | 167 | my ($self, $value, $from_code) = @_; | |||
| 9040 | ||||||
| 9041 | 154 | 181 | return unless defined $value; | |||
| 9042 | ||||||
| 9043 | # Remove leading/trailing whitespace | |||||
| 9044 | 152 | 271 | $value =~ s/^\s+|\s+$//g; | |||
| 9045 | ||||||
| 9046 | # Remove parenthetical notes like "(no password)" only if there's content before them | |||||
| 9047 | 152 | 121 | $value =~ s/(\S+)\s*\([^)]+\)\s*$/$1/; | |||
| 9048 | 152 | 192 | $value =~ s/^\s+|\s+$//g; | |||
| 9049 | ||||||
| 9050 | # Handle chained || or // operators - extract the rightmost value | |||||
| 9051 | 152 | 323 | if ($value =~ /\|\||\/{2}/) { | |||
| 9052 | 7 | 25 | my @parts = split(/\s*(?:\|\||\/{2})\s*/, $value); | |||
| 9053 | 7 | 7 | $value = $parts[-1]; | |||
| 9054 | 7 | 12 | $value =~ s/^\s+|\s+$//g; | |||
| 9055 | } | |||||
| 9056 | ||||||
| 9057 | # Remove trailing semicolon if present | |||||
| 9058 | 152 | 119 | $value =~ s/;\s*$//; | |||
| 9059 | ||||||
| 9060 | # Handle q{}, qq{}, qw{} quotes | |||||
| 9061 | 152 | 241 | if ($value =~ /^qq?\{(.*?)\}$/s) { | |||
| 9062 | 3 | 2 | $value = $1; | |||
| 9063 | } elsif ($value =~ /^qw\{(.*?)\}$/s) { | |||||
| 9064 | 0 | 0 | $value = $1; | |||
| 9065 | } elsif ($value =~ /^q[qwx]?\s*([^a-zA-Z0-9\{\[])(.*?)\1$/s) { | |||||
| 9066 | 0 | 0 | $value = $2; | |||
| 9067 | } | |||||
| 9068 | ||||||
| 9069 | # Handle quoted strings | |||||
| 9070 | 152 | 190 | if ($value =~ /^(['"])(.*)\1$/s) { | |||
| 9071 | 43 | 41 | $value = $2; | |||
| 9072 | ||||||
| 9073 | 43 | 39 | if ($from_code) { | |||
| 9074 | # In regex captures from source code, escape sequences are doubled | |||||
| 9075 | # \\n in capture needs to become \n for the test | |||||
| 9076 | 14 | 10 | $value =~ s/\\\\/\\/g; | |||
| 9077 | } | |||||
| 9078 | ||||||
| 9079 | # Only unescape the quote characters themselves | |||||
| 9080 | 43 | 45 | $value =~ s/\\"/"/g; | |||
| 9081 | 43 | 31 | $value =~ s/\\'/'/g; | |||
| 9082 | ||||||
| 9083 | # If NOT from code (i.e., from POD), interpret escape sequences | |||||
| 9084 | 43 | 37 | unless ($from_code) { | |||
| 9085 | 29 | 20 | $value =~ s/\\n/\n/g; | |||
| 9086 | 29 | 25 | $value =~ s/\\r/\r/g; | |||
| 9087 | 29 | 17 | $value =~ s/\\t/\t/g; | |||
| 9088 | 29 | 23 | $value =~ s/\\\\/\\/g; | |||
| 9089 | } | |||||
| 9090 | } | |||||
| 9091 | ||||||
| 9092 | # Sometimes trailing ) is left on | |||||
| 9093 | 152 | 150 | if($value !~ /^\(/) { | |||
| 9094 | 150 | 132 | $value =~ s/\)$//; | |||
| 9095 | } | |||||
| 9096 | ||||||
| 9097 | # Handle Perl empty hash (must be before numeric/boolean checks) | |||||
| 9098 | 152 | 122 | if ($value =~ /^\{\s*\}$/) { | |||
| 9099 | 6 | 8 | return {}; | |||
| 9100 | } | |||||
| 9101 | ||||||
| 9102 | # Handle Perl empty list/array | |||||
| 9103 | 146 | 139 | if ($value =~ /^\[\s*\]$/) { | |||
| 9104 | 6 | 10 | return []; | |||
| 9105 | } | |||||
| 9106 | ||||||
| 9107 | # Handle numeric values | |||||
| 9108 | 140 | 206 | if ($value =~ /^-?\d+(?:\.\d+)?$/) { | |||
| 9109 | 59 | 55 | if ($value =~ /\./) { | |||
| 9110 | 11 | 28 | return $value + 0; | |||
| 9111 | } else { | |||||
| 9112 | 48 | 85 | return int($value); | |||
| 9113 | } | |||||
| 9114 | } | |||||
| 9115 | ||||||
| 9116 | # Handle boolean keywords | |||||
| 9117 | 81 | 106 | if ($value =~ /^(true|false)$/i) { | |||
| 9118 | 9 | 38 | return lc($1) eq 'true' ? 1 : 0; | |||
| 9119 | } | |||||
| 9120 | ||||||
| 9121 | # Handle Perl boolean constants | |||||
| 9122 | 72 | 82 | if ($value eq '1') { | |||
| 9123 | 0 | 0 | return 1; | |||
| 9124 | } elsif ($value eq '0') { | |||||
| 9125 | 0 | 0 | return 0; | |||
| 9126 | } | |||||
| 9127 | ||||||
| 9128 | # Handle undef | |||||
| 9129 | 72 | 65 | if ($value eq 'undef') { | |||
| 9130 | 10 | 16 | return undef; | |||
| 9131 | } | |||||
| 9132 | ||||||
| 9133 | # Handle __PACKAGE__ and similar constants | |||||
| 9134 | 62 | 49 | if ($value eq '__PACKAGE__') { | |||
| 9135 | 1 | 2 | return '__PACKAGE__'; | |||
| 9136 | } | |||||
| 9137 | ||||||
| 9138 | # Remove surrounding parentheses | |||||
| 9139 | 61 | 50 | $value =~ s/^\((.+)\)$/$1/; | |||
| 9140 | ||||||
| 9141 | # Handle expressions we can't evaluate | |||||
| 9142 | 61 | 96 | if ($value =~ /^\$[a-zA-Z_]/ || $value =~ /\(.*\)/) { | |||
| 9143 | 5 | 14 | return if($value =~ /^\$|\@|\%/); # The default is a value, so who knows its type? | |||
| 9144 | # return $value; | |||||
| 9145 | } | |||||
| 9146 | ||||||
| 9147 | 58 | 92 | return $value; | |||
| 9148 | } | |||||
| 9149 | ||||||
| 9150 | # -------------------------------------------------- | |||||
| 9151 | # _validate_pod_code_agreement | |||||
| 9152 | # | |||||
| 9153 | # Purpose: Compare POD parameter documentation | |||||
| 9154 | # against code-inferred parameters and | |||||
| 9155 | # return a list of disagreements when | |||||
| 9156 | # strict_pod mode is enabled. | |||||
| 9157 | # | |||||
| 9158 | # Entry: $pod_params - hashref of parameters | |||||
| 9159 | # from POD analysis. | |||||
| 9160 | # $code_params - hashref of parameters | |||||
| 9161 | # from code analysis. | |||||
| 9162 | # $method_name - method name string, | |||||
| 9163 | # used for context in | |||||
| 9164 | # error messages. | |||||
| 9165 | # | |||||
| 9166 | # Exit: Returns a list of disagreement | |||||
| 9167 | # strings. Returns an empty list if | |||||
| 9168 | # all parameters agree. | |||||
| 9169 | # | |||||
| 9170 | # Side effects: None. | |||||
| 9171 | # | |||||
| 9172 | # Notes: Type mismatches are classified as | |||||
| 9173 | # either 'compatible' (e.g. integer vs | |||||
| 9174 | # number) or 'incompatible' via | |||||
| 9175 | # _types_are_compatible. $self and | |||||
| 9176 | # $class are excluded from undocumented | |||||
| 9177 | # parameter warnings in appropriate | |||||
| 9178 | # context. | |||||
| 9179 | # -------------------------------------------------- | |||||
| 9180 | sub _validate_pod_code_agreement { | |||||
| 9181 | 13 | 43 | my ($self, $pod_params, $code_params, $method_name) = @_; | |||
| 9182 | ||||||
| 9183 | 13 | 11 | my @errors; | |||
| 9184 | ||||||
| 9185 | # Get all parameter names from both sources | |||||
| 9186 | 13 19 | 25 29 | my %all_params = map { $_ => 1 } (keys %$pod_params, keys %$code_params); | |||
| 9187 | ||||||
| 9188 | 13 | 23 | foreach my $param (sort keys %all_params) { | |||
| 9189 | 15 | 26 | my $pod = $pod_params->{$param} || {}; | |||
| 9190 | 15 | 19 | my $code = $code_params->{$param} || {}; | |||
| 9191 | ||||||
| 9192 | # Check if parameter exists in both | |||||
| 9193 | 15 | 29 | if (exists $pod_params->{$param} && !exists $code_params->{$param}) { | |||
| 9194 | 1 | 2 | push @errors, "Parameter '\$$param' documented in POD but not found in code signature"; | |||
| 9195 | 1 | 1 | next; | |||
| 9196 | } | |||||
| 9197 | ||||||
| 9198 | 14 | 29 | if(!exists $pod_params->{$param} && exists $code_params->{$param}) { | |||
| 9199 | 10 | 14 | if(($method_name eq 'new') && ($param eq 'class')) { | |||
| 9200 | # $class is usually not documented in new() | |||||
| 9201 | 1 | 1 | next; | |||
| 9202 | } | |||||
| 9203 | 9 | 21 | if(($method_name ne 'new') && ($param eq 'self')) { | |||
| 9204 | # $self is usually not documented in a method | |||||
| 9205 | 1 | 2 | next; | |||
| 9206 | } | |||||
| 9207 | 8 | 10 | push @errors, "Parameter '\$$param' found in code but not documented in POD"; | |||
| 9208 | 8 | 9 | next; | |||
| 9209 | } | |||||
| 9210 | ||||||
| 9211 | # Compare types if both exist | |||||
| 9212 | 4 | 10 | if ($pod->{type} && $code->{type} && $pod->{type} ne $code->{type}) { | |||
| 9213 | 2 | 4 | if (!$self->_types_are_compatible($pod->{type}, $code->{type})) { | |||
| 9214 | 1 | 2 | push @errors, "Type mismatch for '\$$param': POD says '$pod->{type}', code suggests '$code->{type}' (incompatible)"; | |||
| 9215 | } else { | |||||
| 9216 | 1 | 2 | push @errors, "Type difference for '\$$param': POD says '$pod->{type}', code suggests '$code->{type}' (compatible)"; | |||
| 9217 | } | |||||
| 9218 | } | |||||
| 9219 | ||||||
| 9220 | # Compare optional status if both exist | |||||
| 9221 | 4 | 8 | if (exists $pod->{optional} && exists $code->{optional} && | |||
| 9222 | $pod->{optional} != $code->{optional}) { | |||||
| 9223 | 0 | 0 | my $pod_status = $pod->{optional} ? 'optional' : 'required'; | |||
| 9224 | 0 | 0 | my $code_status = $code->{optional} ? 'optional' : 'required'; | |||
| 9225 | 0 | 0 | push @errors, "Optional status mismatch for '\$$param': POD says '$pod_status', code suggests '$code_status'"; | |||
| 9226 | } | |||||
| 9227 | ||||||
| 9228 | # Check constraints (min/max) | |||||
| 9229 | 4 | 7 | if (defined $pod->{min} && defined $code->{min} && $pod->{min} != $code->{min}) { | |||
| 9230 | 0 | 0 | push @errors, "Min constraint mismatch for '\$$param': POD says '$pod->{min}', code suggests '$code->{min}'"; | |||
| 9231 | } | |||||
| 9232 | ||||||
| 9233 | 4 | 6 | if (defined $pod->{max} && defined $code->{max} && $pod->{max} != $code->{max}) { | |||
| 9234 | 0 | 0 | push @errors, "Max constraint mismatch for '\$$param': POD says '$pod->{max}', code suggests '$code->{max}'"; | |||
| 9235 | } | |||||
| 9236 | ||||||
| 9237 | # Check regex patterns | |||||
| 9238 | 4 | 7 | if ($pod->{matches} && $code->{matches} && $pod->{matches} ne $code->{matches}) { | |||
| 9239 | 0 | 0 | push @errors, "Pattern mismatch for '\$$param': POD says '$pod->{matches}', code suggests '$code->{matches}'"; | |||
| 9240 | } | |||||
| 9241 | } | |||||
| 9242 | ||||||
| 9243 | # Return errors (empty array if no errors) | |||||
| 9244 | 13 | 28 | return @errors; | |||
| 9245 | } | |||||
| 9246 | ||||||
| 9247 | # -------------------------------------------------- | |||||
| 9248 | # _validate_strictness_level | |||||
| 9249 | # | |||||
| 9250 | # Purpose: Validate and normalise the strict_pod | |||||
| 9251 | # option value accepted by new() into | |||||
| 9252 | # an integer level: 0 (off), 1 (warn), | |||||
| 9253 | # or 2 (fatal). | |||||
| 9254 | # | |||||
| 9255 | # Entry: $val - the raw value passed to | |||||
| 9256 | # strict_pod in new(). May be | |||||
| 9257 | # undef, a number, or a string. | |||||
| 9258 | # | |||||
| 9259 | # Exit: Returns 0, 1, or 2. | |||||
| 9260 | # Croaks if the value is not recognised. | |||||
| 9261 | # | |||||
| 9262 | # Side effects: None. | |||||
| 9263 | # -------------------------------------------------- | |||||
| 9264 | sub _validate_strictness_level { | |||||
| 9265 | 440 | 11908 | my $val = $_[0]; | |||
| 9266 | ||||||
| 9267 | 440 | 911 | return 0 unless defined $val; | |||
| 9268 | ||||||
| 9269 | # Numeric | |||||
| 9270 | 29 | 81 | return 0 if $val =~ /^(0|off|none)$/i; | |||
| 9271 | 23 | 56 | return 1 if $val =~ /^(1|warn|warning)$/i; | |||
| 9272 | 11 | 35 | return 2 if $val =~ /^(2|fatal|die|error)$/i; | |||
| 9273 | ||||||
| 9274 | 2 | 10 | croak("Invalid value for --strict-pod: '$val' (use off|warn|fatal)"); | |||
| 9275 | } | |||||
| 9276 | ||||||
| 9277 | # -------------------------------------------------- | |||||
| 9278 | # _types_are_compatible | |||||
| 9279 | # | |||||
| 9280 | # Purpose: Determine whether two type strings | |||||
| 9281 | # are compatible for POD/code agreement | |||||
| 9282 | # checking, allowing semantically | |||||
| 9283 | # equivalent types (e.g. 'integer' and | |||||
| 9284 | # 'number') to coexist without | |||||
| 9285 | # triggering a strict POD warning. | |||||
| 9286 | # | |||||
| 9287 | # Entry: $pod_type - type string from POD. | |||||
| 9288 | # $code_type - type string from code. | |||||
| 9289 | # | |||||
| 9290 | # Exit: Returns 1 if compatible, 0 otherwise. | |||||
| 9291 | # | |||||
| 9292 | # Side effects: None. | |||||
| 9293 | # -------------------------------------------------- | |||||
| 9294 | sub _types_are_compatible { | |||||
| 9295 | 20 | 27 | my ($self, $pod_type, $code_type) = @_; | |||
| 9296 | ||||||
| 9297 | # Exact match is always compatible | |||||
| 9298 | 20 | 29 | return 1 if $pod_type eq $code_type; | |||
| 9299 | ||||||
| 9300 | # Define compatibility matrix | |||||
| 9301 | 15 | 37 | my %compatible_types = ( | |||
| 9302 | 'integer' => ['number', 'scalar'], | |||||
| 9303 | 'number' => ['scalar'], | |||||
| 9304 | 'string' => ['scalar'], | |||||
| 9305 | 'scalar' => ['string', 'integer', 'number'], | |||||
| 9306 | 'arrayref' => ['array'], | |||||
| 9307 | 'hashref' => ['hash'], | |||||
| 9308 | ); | |||||
| 9309 | ||||||
| 9310 | # Check if code_type is compatible with pod_type | |||||
| 9311 | 15 | 16 | if (my $allowed = $compatible_types{$pod_type}) { | |||
| 9312 | 13 17 | 13 38 | return grep { $_ eq $code_type } @$allowed; | |||
| 9313 | } | |||||
| 9314 | ||||||
| 9315 | # Check if pod_type is compatible with code_type | |||||
| 9316 | 2 | 4 | if (my $allowed = $compatible_types{$code_type}) { | |||
| 9317 | 2 2 | 2 8 | return grep { $_ eq $pod_type } @$allowed; | |||
| 9318 | } | |||||
| 9319 | ||||||
| 9320 | 0 | 0 | return 0; # Not compatible | |||
| 9321 | } | |||||
| 9322 | ||||||
| 9323 - 9372 | =head2 generate_pod_validation_report
Generate a human-readable report of all POD/code disagreements found
across a set of extracted schemas.
my $schemas = $extractor->extract_all(no_write => 1);
my $report = $extractor->generate_pod_validation_report($schemas);
print $report;
=head3 Arguments
=over 4
=item * C<$schemas>
A hashref of method name to schema hashref as returned by
C<extract_all>. Required.
=back
=head3 Returns
A string containing the full validation report, or a single line
confirming all methods passed if no disagreements were found.
=head3 Side effects
None.
=head3 Notes
Only methods whose schemas contain a C<_pod_validation_errors> key
(populated when C<strict_pod> is 1 or 2) appear in the report. If
C<strict_pod> was 0 when C<extract_all> was called, this method will
always return the all-passed message.
=head3 API specification
=head4 input
{
self => { type => OBJECT, isa => 'App::Test::Generator::SchemaExtractor' },
schemas => { type => HASHREF },
}
=head4 output
{ type => SCALAR }
=cut | |||||
| 9373 | ||||||
| 9374 | sub generate_pod_validation_report { | |||||
| 9375 | 14 | 987 | my ($self, $schemas) = @_; | |||
| 9376 | ||||||
| 9377 | 14 | 10 | my @reports; | |||
| 9378 | 14 | 29 | foreach my $method_name (sort keys %$schemas) { | |||
| 9379 | 22 | 13 | my $schema = $schemas->{$method_name}; | |||
| 9380 | ||||||
| 9381 | 22 | 31 | if (my $errors = $schema->{_pod_validation_errors}) { | |||
| 9382 | 12 | 17 | push @reports, "Method: $method_name"; | |||
| 9383 | 12 | 17 | push @reports, " Severity: " . ($schema->{_pod_disagreement} ? 'warning' : 'fatal'); | |||
| 9384 | 12 | 11 | push @reports, " Errors:"; | |||
| 9385 | 12 12 | 12 15 | push @reports, map { " - $_" } @$errors; | |||
| 9386 | 12 | 13 | push @reports, ''; | |||
| 9387 | } | |||||
| 9388 | } | |||||
| 9389 | ||||||
| 9390 | 14 | 19 | if (@reports) { | |||
| 9391 | 8 | 22 | return join("\n", "POD/Code Validation Report:", '=' x 40, '', @reports); | |||
| 9392 | } else { | |||||
| 9393 | 6 | 8 | return 'POD/Code Validation: All methods passed consistency checks.'; | |||
| 9394 | } | |||||
| 9395 | } | |||||
| 9396 | ||||||
| 9397 - 9401 | =head2 _log Log a message if verbose mode is on. =cut | |||||
| 9402 | ||||||
| 9403 | sub _log { | |||||
| 9404 | 3850 | 3037 | my($self, $msg) = @_; | |||
| 9405 | ||||||
| 9406 | 3850 | 4621 | print "$msg\n" if $self->{verbose}; | |||
| 9407 | } | |||||
| 9408 | ||||||
| 9409 - 9447 | =head1 NOTES This is pre-pre-alpha proof of concept code. Nevertheless, it is useful for creating a template which you can modify to create a working schema to pass into L<App::Test::Generator>. =head1 TODO Parse =head4 Input / =head4 Output POD blocks (in L<Params::Validate::Strict> schema format) as a high-confidence input source, falling back to runtime introspection only when POD is absent. =head1 SEE ALSO =over 4 =item * L<App::Test::Generator> - Generate fuzz and corpus-driven test harnesses Output from this module serves as input to that module. So with well-documented code, you can automatically create your tests. =item * L<App::Test::Generator::Template> - Template of the file of tests created by C<App::Test::Generator> =back =head1 AUTHOR Nigel Horne, C<< <njh at nigelhorne.com> >> =head1 LICENCE AND COPYRIGHT Copyright 2025-2026 Nigel Horne. Usage is subject to GPL2 licence terms. If you use it, please let me know. =cut | |||||
| 9448 | ||||||
| 9449 | 1; | |||||