lib/Text/Names/Abbreviate.pm

Structural Coverage (Approximate)

TER1 (Statement): 100.00%
TER2 (Branch): 100.00%
TER3 (LCSAJ): 100.0% (13/13)
Approximate LCSAJ segments: 47

LCSAJ Legend

Covered — this LCSAJ path was executed during testing.

Not covered — this LCSAJ path was never executed. These are the paths to focus on.

Multiple dots on a line indicate that multiple control-flow paths begin at that line. Hovering over any dot shows:

        start → end → jump
        

Uncovered paths show [NOT COVERED] in the tooltip.

Mutant Testing Legend

Survived (tests missed this) Killed (tests detected this) No mutation
    1: package Text::Names::Abbreviate;
    2: 
    3: use strict;
    4: use warnings;
    5: use autodie qw(:all);
    6: use utf8;
    7: 
    8: use Carp;
    9: use Exporter 'import';
   10: use Params::Get 0.13;
   11: use Params::Validate::Strict 0.13;
   12: use Readonly;
   13: 
   14: our @EXPORT_OK = qw(abbreviate);
   15: 
   16: =head1 NAME
   17: 
   18: Text::Names::Abbreviate - Create abbreviated name formats from full names
   19: 
   20: =head2 VERSION
   21: 
   22: Version 0.03
   23: 
   24: =cut
   25: 
   26: our $VERSION = '0.03';
   27: 
   28: # ---------------------------------------------------------------------------
   29: # Named constants -- eliminate magic strings throughout the logic
   30: # ---------------------------------------------------------------------------
   31: Readonly my $FMT_DEFAULT   => 'default';
   32: Readonly my $FMT_INITIALS  => 'initials';
   33: Readonly my $FMT_COMPACT   => 'compact';
   34: Readonly my $FMT_SHORTLAST => 'shortlast';
   35: Readonly my $STY_FIRST     => 'first_last';
   36: Readonly my $STY_LAST      => 'last_first';
   37: Readonly my $DEFAULT_SEP   => '.';
   38: 
   39: # Single source of truth for parameter validation; also reflected in POD below.
   40: Readonly my %PARAM_SCHEMA => (
   41: 	name => {
   42: 		type     => 'string',
   43: 		min      => 1,
   44: 		optional => 0,
   45: 	},
   46: 	format => {
   47: 		type     => 'string',
   48: 		memberof => [ $FMT_DEFAULT, $FMT_INITIALS, $FMT_COMPACT, $FMT_SHORTLAST ],
   49: 		optional => 1,
   50: 	},
   51: 	style => {
   52: 		type     => 'string',
   53: 		memberof => [ $STY_FIRST, $STY_LAST ],
   54: 		optional => 1,
   55: 	},
   56: 	separator => {
   57: 		type     => 'string',
   58: 		optional => 1,
   59: 	},
   60: );
   61: 
   62: =head1 SYNOPSIS
   63: 
   64:   use Text::Names::Abbreviate qw(abbreviate);
   65: 
   66:   say abbreviate('John Quincy Adams');                            # J. Q. Adams
   67:   say abbreviate('Adams, John Quincy');                          # J. Q. Adams
   68:   say abbreviate('George R R Martin', { format => 'initials' }); # G.R.R.M.
   69: 
   70: =head1 DESCRIPTION
   71: 
   72: This module provides simple abbreviation logic for full personal names with
   73: multiple formatting options and styles.  Input is expected to be a personal
   74: name consisting of one or more whitespace-separated components interpreted as:
   75: 
   76:   First [Middle ...] Last
   77: 
   78: Names consisting of a single component are returned unchanged.
   79: 
   80: =head1 SUBROUTINES/METHODS
   81: 
   82: =head2 abbreviate
   83: 
   84: Produce an abbreviated form of a personal name.
   85: 
   86: =head3 Purpose
   87: 
   88: Accept a full name in either C<First Middle Last> or C<Last, First Middle>
   89: form and return a formatted abbreviated string according to the requested
   90: C<format>, C<style>, and C<separator>.
   91: 
   92: =head3 Args
   93: 
   94: =over 4
   95: 
   96: =item name (required)
   97: 
   98: Non-empty string.  Accepted in two forms:
   99: 
  100: =over 4
  101: 
  102: =item C<First [Middle ...] Last>
  103: 
  104: =item C<Last, First [Middle ...]>
  105: 
  106: =back
  107: 
  108: A leading comma (C<", John">) signals that no last name is present; only
  109: initials are produced.
  110: 
  111: =item format (optional, default C<default>)
  112: 
  113: One of C<default>, C<initials>, C<compact>, C<shortlast>.
  114: 
  115: =over 4
  116: 
  117: =item C<default>   -- C<J. Q. Adams>
  118: 
  119: =item C<initials>  -- C<J.Q.A.>
  120: 
  121: =item C<compact>   -- C<JQA>
  122: 
  123: =item C<shortlast> -- initials then full last name; honours C<last_first> style
  124: (e.g. C<Adams, J. Q.>).
  125: 
  126: =back
  127: 
  128: =item style (optional, default C<first_last>)
  129: 
  130: One of C<first_last>, C<last_first>.  All formats honour this option.
  131: 
  132: =item separator (optional, default C<.>)
  133: 
  134: String appended after each initial.  Empty string removes all punctuation.
  135: 
  136: =back
  137: 
  138: =head3 Returns
  139: 
  140: A plain string.  Returns C<''> for inputs that normalise to nothing (e.g. a
  141: bare comma).
  142: 
  143: =head3 Side Effects
  144: 
  145: None.  The function is purely functional with no persistent state.
  146: 
  147: =head3 Usage
  148: 
  149:   # Positional
  150:   my $abbrev = abbreviate('John Quincy Adams');
  151: 
  152:   # Options hashref
  153:   my $abbrev = abbreviate('John Quincy Adams', {
  154:       format    => 'initials',
  155:       style     => 'last_first',
  156:       separator => '-',
  157:   });
  158: 
  159: =head3 API SPECIFICATION
  160: 
  161:   INPUT
  162:   {
  163:     name      => { type => 'string', min => 1, optional => 0 },
  164:     format    => { type => 'string',
  165:                    memberof => [qw(default initials compact shortlast)],
  166:                    optional => 1 },
  167:     style     => { type => 'string',
  168:                    memberof => [qw(first_last last_first)],
  169:                    optional => 1 },
  170:     separator => { type => 'string', optional => 1 },
  171:   }
  172: 
  173:   OUTPUT
  174:   { type => 'string' }    # croaks on argument error
  175: 
  176: =head3 MESSAGES
  177: 
  178:   Error                                    Meaning / Resolution
  179:   ---------------------------------------  -----------------------------------------------
  180:   name parameter missing or undefined      Called without a name argument; supply one.
  181:   name must be a non-empty string          Passed '' or undef; supply a non-empty string.
  182:   format must be one of: ...               Invalid format constant; see API SPECIFICATION.
  183:   style must be one of: ...               Invalid style constant; see API SPECIFICATION.
  184: 
  185: =head3 PSEUDOCODE
  186: 
  187:   FUNCTION abbreviate(name, options):
  188:      Validate parameters via %PARAM_SCHEMA       (croak on violation)
  189:      Assign defaults: format=default, style=first_last, sep="."
  190:      _normalize_name(name):
  191:          - collapse consecutive commas
  192:          - detect and reorder "Last, First" form
  193:          - track $had_leading_comma (input had no last-name component)
  194:          - collapse internal whitespace; trim
  195:      Return '' if normalized name is empty
  196:      _extract_parts(name, had_leading_comma, format, style):
  197:          - tokenize on whitespace
  198:          - pop last token as $last_name (unless leading-comma form)
  199:          - build @initials from remaining tokens (first char each)
  200:          - if style=last_first and format!=default: unshift last initial, clear $last_name
  201:          - filter empty initials
  202:      Format result:
  203:          compact   -> join('', @initials, first($last_name))
  204:          initials  -> join($sep, @all_letters) . $sep
  205:          shortlast -> join(' ', map {"$_$sep"} @initials) . " $last_name"
  206:          default   -> joined initials; prepend/append $last_name per $style
  207: 
  208: =cut
  209: 
  210: # ---------------------------------------------------------------------------
  211: # Private helpers
  212: # ---------------------------------------------------------------------------
  213: 
  214: # Purpose:      Resolve "Last, First" and leading-comma forms into a canonical
  215: #               "First ... Last" string, collapsing all internal whitespace.
  216: # Entry Criteria: $raw is a defined, non-empty string (validated by the caller).
  217: # Exit Status:  Returns ($normalized, $had_leading_comma).  $normalized is
  218: #               whitespace-collapsed and trimmed.  $had_leading_comma is 1 when
  219: #               the original input began with a comma (no last-name component).
  220: # Side Effects: None.
  221: sub _normalize_name {
222 → 228 → 246222 → 228 → 0  222: 	my ($raw) = @_;
  223: 
  224: 	$raw =~ s/,+/,/g;    # collapse any run of commas to one before splitting
  225: 
  226: 	my $had_leading_comma = 0;
  227: 
  228: 	if ($raw =~ /,/) {
  229: 		my ($last, $rest) = map { s/^\s+|\s+$//gr } split /\s*,\s*/, $raw, 2;
  230: 		$rest //= q{};
  231: 		$last //= q{};
  232: 
  233: 		$had_leading_comma = 1 if !length($last) && length($rest);
  234: 
  235: 		if (length($last) && length($rest)) {
  236: 			$raw = "$rest $last";
  237: 		} elsif (length $rest) {
  238: 			$raw = $rest;
  239: 		} elsif (length $last) {
  240: 			$raw = $last;
  241: 		} else {
  242: 			return (q{}, 0);
  243: 		}
  244: 	}
  245: 
246 → 249 → 0  246: 	$raw =~ s/^\s+|\s+$//g;
  247: 	$raw =~ s/\s+/ /g;
  248: 
  249: 	return ($raw, $had_leading_comma);
  250: }
  251: 
  252: # Purpose:      Derive the ordered list of initials and the preserved last name
  253: #               from a normalized name string, honouring format and style.
  254: # Entry Criteria: $name is output of _normalize_name (trimmed, single-spaced).
  255: #                 $had_leading_comma is the boolean from _normalize_name.
  256: #                 $format and $style are validated constants (FMT_*/STY_*).
  257: # Exit Status:  Returns ($initials_ref, $last_name).  $initials_ref is an
  258: #               arrayref of single-character strings with empty entries removed.
  259: #               $last_name is '' when consumed by style/format reordering.
  260: # Side Effects: None.
  261: sub _extract_parts {
262 → 269 → 284262 → 269 → 0  262: 	my ($name, $had_leading_comma, $format, $style) = @_;
  263: 
  264: 	my @parts = split /\s+/, $name;
  265: 	return ([], q{}) unless @parts;
  266: 
  267: 	my ($last_name, @initials);
  268: 

Mutants (Total: 1, Killed: 1, Survived: 0)

269: if ($had_leading_comma) { 270: $last_name = q{}; 271: @initials = map { substr $_, 0, 1 } @parts; 272: } else { 273: $last_name = pop @parts; 274: @initials = map { substr $_, 0, 1 } @parts; 275:

Mutants (Total: 1, Killed: 1, Survived: 0)

276: # last_first on non-default formats (except shortlast, which keeps the full last name): 277: # move the last-name initial to the front and discard the full last name 278: if ($style eq $STY_LAST && $format ne $FMT_DEFAULT && $format ne $FMT_SHORTLAST && length $last_name) { 279: unshift @initials, substr $last_name, 0, 1; 280: $last_name = q{}; 281: } 282: } 283: 284 → 286 → 0 284: @initials = grep { length $_ } @initials; 285: 286: return (\@initials, $last_name); 287: } 288: 289: # --------------------------------------------------------------------------- 290: # Public API 291: # --------------------------------------------------------------------------- 292: 293: sub abbreviate { 294 → 311 → 316294 → 311 → 0 294: my $params = Params::Validate::Strict::validate_strict({ 295: args => Params::Get::get_params('name', @_), 296: schema => \%PARAM_SCHEMA, 297: }); 298: 299: Carp::croak(__PACKAGE__ . '::abbreviate: name parameter is required and must be defined') 300: unless defined $params->{name}; 301: 302: my $format = $params->{format} // $FMT_DEFAULT; 303: my $style = $params->{style} // $STY_FIRST; 304: my $sep = $params->{separator} // $DEFAULT_SEP; 305: 306: my ($name, $had_leading_comma) = _normalize_name($params->{name}); 307: return q{} unless length $name; 308: 309: my ($initials, $last_name) = _extract_parts($name, $had_leading_comma, $format, $style);

Mutants (Total: 1, Killed: 1, Survived: 0)

310: 311: if ($format eq $FMT_COMPACT) { 312: return join q{}, @{$initials}, 313: (length $last_name ? (substr $last_name, 0, 1) : ()); 314: } 315: 316 → 316 → 322316 → 316 → 0 316: if ($format eq $FMT_INITIALS) { 317: my @letters = @{$initials}; 318: push @letters, substr($last_name, 0, 1) if length $last_name;

Mutants (Total: 1, Killed: 1, Survived: 0)

319: return join($sep, @letters) . $sep; 320: } 321: 322 → 322 → 333322 → 322 → 0 322: if ($format eq $FMT_SHORTLAST) { 323: my $joined = @{$initials} ? join(' ', map { $_ . $sep } @{$initials}) : q{}; 324: if ($style eq $STY_LAST && length $last_name) { 325: return length($joined) ? "$last_name, $joined" : $last_name; 326: } 327: return length($joined) 328: ? (length($last_name) ? "$joined $last_name" : $joined) 329: : $last_name; 330: } 331: 332: # default format 333 → 335 → 0 333: return $last_name unless @{$initials}; 334: my $joined = join ' ', map { $_ . $sep } @{$initials}; 335: return ($style eq $STY_LAST && length $last_name) 336: ? "$last_name, $joined" 337: : (length $last_name ? "$joined $last_name" : $joined); 338: } 339: 340: 1; 341: 342: __END__ 343: 344: =head1 LIMITATIONS 345: 346: =over 4 347:

Mutants (Total: 2, Killed: 2, Survived: 0)

348: =item * 349: 350: Honorifics (C<Dr.>, C<Prof.>) and suffixes (C<Jr.>, C<III>) are not 351: detected or stripped; they are treated as name components.

Mutants (Total: 1, Killed: 1, Survived: 0)

352: 353: =item * 354: 355: Initials are taken verbatim from the first character of each token. 356: Non-alphabetic leading characters (digits, punctuation) are included as-is.

Mutants (Total: 1, Killed: 1, Survived: 0)

357: 358: =item * 359: 360: Multiple consecutive commas collapse to a single comma before parsing. 361: Names with two legitimate comma-separated clauses are not supported. 362:

Mutants (Total: 1, Killed: 0, Survived: 1)
363: =item * 364:

Mutants (Total: 1, Killed: 1, Survived: 0)

365: C<compact> and C<initials> formats are lossy: passing their output back into 366: C<abbreviate> does not reproduce the original result. 367: 368: =back 369: 370: =head1 AUTHOR 371: 372: Nigel Horne, C<< <njh at nigelhorne.com> >> 373:

Mutants (Total: 2, Killed: 2, Survived: 0)

374: =head1 BUGS 375: 376: Please report bugs to C<bug-text-names-abbreviate at rt.cpan.org> or via 377: L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Text-Names-Abbreviate>. 378: 379: =head1 REPOSITORY 380: 381: L<https://github.com/nigelhorne/Text-Names-Abbreviate> 382: 383: =head1 SEE ALSO 384: 385: =over 4 386: 387: =item * L<Test Dashboard|https://nigelhorne.github.io/Text-Names-Abbreviate/coverage/> 388: 389: =back 390: 391: =head1 SUPPORT 392: 393: This module is provided as-is without any warranty. 394: 395: perldoc Text::Names::Abbreviate 396: 397: =over 4 398: 399: =item * MetaCPAN: L<https://metacpan.org/dist/Text-Names-Abbreviate> 400: 401: =item * RT tracker: L<https://rt.cpan.org/NoAuth/Bugs.html?Dist=Text-Names-Abbreviate> 402: 403: =item * CPAN Testers: L<http://matrix.cpantesters.org/?dist=Text-Names-Abbreviate> 404: 405: =back 406: 407: =head1 FORMAL SPECIFICATION 408: 409: =head2 abbreviate 410: 411: Let Sigma* denote the set of all Unicode strings. 412: Let epsilon denote the empty string. 413: 414: Sigma+ = Sigma* \ {epsilon} 415: Format = {default, initials, compact, shortlast} 416: Style = {first_last, last_first} 417: 418: collapse(s) -- replace runs of whitespace with a single space, then trim 419: 420: normalize : Sigma+ -> Sigma* x Bool 421: normalize(n) = 422: let n1 = gsub(n, ",+", ",") 423: if "," not-in n1 then (collapse(n1), false) 424: else 425: let (L, R) = split(n1, ",", 2) each trimmed 426: case 427: L = epsilon ^ R != epsilon -> (collapse(R), true) 428: L != epsilon ^ R != epsilon -> (collapse(R ++ " " ++ L), false) 429: L != epsilon ^ R = epsilon -> (collapse(L), false) 430: L = epsilon ^ R = epsilon -> (epsilon, false) 431: end 432: 433: extract : Sigma* x Bool x Format x Style -> (seq Sigma) x Sigma* 434: extract(n, leading, fmt, sty) = 435: let ps = tokenize(n) -- split on whitespace 436: if ps = [] then ([], epsilon) 437: else if leading then 438: ([ first(p) | p <- ps ], epsilon) 439: else 440: let last = ps[#ps] 441: inits = [ first(p) | p <- ps[1..#ps-1] ] 442: if sty = last_first ^ fmt != default ^ last != epsilon 443: then ([first(last)] ++ inits, epsilon) 444: else (inits, last) 445: 446: abbreviate : Sigma+ x Format x Style x Sigma* -> Sigma* 447: abbreviate = format_result . extract . normalize 448: 449: =head1 LICENCE AND COPYRIGHT 450: 451: Copyright 2025-2026 Nigel Horne. 452: 453: Usage is subject to the terms of GPL2. 454: If you use it, 455: please let me know. 456: 457: =cut