| File: | blib/lib/Text/Names/Abbreviate.pm |
| Coverage: | 96.7% |
| line | stmt | bran | cond | sub | time | code |
|---|---|---|---|---|---|---|
| 1 | package Text::Names::Abbreviate; | |||||
| 2 | ||||||
| 3 | 9 9 9 | 777512 8 164 | use strict; | |||
| 4 | 9 9 9 | 17 6 221 | use warnings; | |||
| 5 | 9 9 9 | 1791 53008 17 | use autodie qw(:all); | |||
| 6 | 9 9 9 | 67784 687 27 | use utf8; | |||
| 7 | ||||||
| 8 | 9 9 9 | 104 7 214 | use Carp; | |||
| 9 | 9 9 9 | 16 10 104 | use Exporter 'import'; | |||
| 10 | 9 9 9 | 370 9242 157 | use Params::Get 0.13; | |||
| 11 | 9 9 9 | 634 35818 123 | use Params::Validate::Strict 0.13; | |||
| 12 | 9 9 9 | 22 7 5113 | use Readonly; | |||
| 13 | ||||||
| 14 | our @EXPORT_OK = qw(abbreviate); | |||||
| 15 | ||||||
| 16 - 24 | =head1 NAME Text::Names::Abbreviate - Create abbreviated name formats from full names =head2 VERSION Version 0.03 =cut | |||||
| 25 | ||||||
| 26 | our $VERSION = '0.03'; | |||||
| 27 | ||||||
| 28 | # --------------------------------------------------------------------------- | |||||
| 29 | # Named constants -- eliminate magic strings throughout the logic | |||||
| 30 | # --------------------------------------------------------------------------- | |||||
| 31 | Readonly my $FMT_DEFAULT => 'default'; | |||||
| 32 | Readonly my $FMT_INITIALS => 'initials'; | |||||
| 33 | Readonly my $FMT_COMPACT => 'compact'; | |||||
| 34 | Readonly my $FMT_SHORTLAST => 'shortlast'; | |||||
| 35 | Readonly my $STY_FIRST => 'first_last'; | |||||
| 36 | Readonly my $STY_LAST => 'last_first'; | |||||
| 37 | Readonly my $DEFAULT_SEP => '.'; | |||||
| 38 | ||||||
| 39 | # Single source of truth for parameter validation; also reflected in POD below. | |||||
| 40 | Readonly my %PARAM_SCHEMA => ( | |||||
| 41 | name => { | |||||
| 42 | type => 'string', | |||||
| 43 | min => 1, | |||||
| 44 | optional => 0, | |||||
| 45 | }, | |||||
| 46 | format => { | |||||
| 47 | type => 'string', | |||||
| 48 | memberof => [ $FMT_DEFAULT, $FMT_INITIALS, $FMT_COMPACT, $FMT_SHORTLAST ], | |||||
| 49 | optional => 1, | |||||
| 50 | }, | |||||
| 51 | style => { | |||||
| 52 | type => 'string', | |||||
| 53 | memberof => [ $STY_FIRST, $STY_LAST ], | |||||
| 54 | optional => 1, | |||||
| 55 | }, | |||||
| 56 | separator => { | |||||
| 57 | type => 'string', | |||||
| 58 | optional => 1, | |||||
| 59 | }, | |||||
| 60 | ); | |||||
| 61 | ||||||
| 62 - 208 | =head1 SYNOPSIS
use Text::Names::Abbreviate qw(abbreviate);
say abbreviate('John Quincy Adams'); # J. Q. Adams
say abbreviate('Adams, John Quincy'); # J. Q. Adams
say abbreviate('George R R Martin', { format => 'initials' }); # G.R.R.M.
=head1 DESCRIPTION
This module provides simple abbreviation logic for full personal names with
multiple formatting options and styles. Input is expected to be a personal
name consisting of one or more whitespace-separated components interpreted as:
First [Middle ...] Last
Names consisting of a single component are returned unchanged.
=head1 SUBROUTINES/METHODS
=head2 abbreviate
Produce an abbreviated form of a personal name.
=head3 Purpose
Accept a full name in either C<First Middle Last> or C<Last, First Middle>
form and return a formatted abbreviated string according to the requested
C<format>, C<style>, and C<separator>.
=head3 Args
=over 4
=item name (required)
Non-empty string. Accepted in two forms:
=over 4
=item C<First [Middle ...] Last>
=item C<Last, First [Middle ...]>
=back
A leading comma (C<", John">) signals that no last name is present; only
initials are produced.
=item format (optional, default C<default>)
One of C<default>, C<initials>, C<compact>, C<shortlast>.
=over 4
=item C<default> -- C<J. Q. Adams>
=item C<initials> -- C<J.Q.A.>
=item C<compact> -- C<JQA>
=item C<shortlast> -- initials then full last name; honours C<last_first> style
(e.g. C<Adams, J. Q.>).
=back
=item style (optional, default C<first_last>)
One of C<first_last>, C<last_first>. All formats honour this option.
=item separator (optional, default C<.>)
String appended after each initial. Empty string removes all punctuation.
=back
=head3 Returns
A plain string. Returns C<''> for inputs that normalise to nothing (e.g. a
bare comma).
=head3 Side Effects
None. The function is purely functional with no persistent state.
=head3 Usage
# Positional
my $abbrev = abbreviate('John Quincy Adams');
# Options hashref
my $abbrev = abbreviate('John Quincy Adams', {
format => 'initials',
style => 'last_first',
separator => '-',
});
=head3 API SPECIFICATION
INPUT
{
name => { type => 'string', min => 1, optional => 0 },
format => { type => 'string',
memberof => [qw(default initials compact shortlast)],
optional => 1 },
style => { type => 'string',
memberof => [qw(first_last last_first)],
optional => 1 },
separator => { type => 'string', optional => 1 },
}
OUTPUT
{ type => 'string' } # croaks on argument error
=head3 MESSAGES
Error Meaning / Resolution
--------------------------------------- -----------------------------------------------
name parameter missing or undefined Called without a name argument; supply one.
name must be a non-empty string Passed '' or undef; supply a non-empty string.
format must be one of: ... Invalid format constant; see API SPECIFICATION.
style must be one of: ... Invalid style constant; see API SPECIFICATION.
=head3 PSEUDOCODE
FUNCTION abbreviate(name, options):
Validate parameters via %PARAM_SCHEMA (croak on violation)
Assign defaults: format=default, style=first_last, sep="."
_normalize_name(name):
- collapse consecutive commas
- detect and reorder "Last, First" form
- track $had_leading_comma (input had no last-name component)
- collapse internal whitespace; trim
Return '' if normalized name is empty
_extract_parts(name, had_leading_comma, format, style):
- tokenize on whitespace
- pop last token as $last_name (unless leading-comma form)
- build @initials from remaining tokens (first char each)
- if style=last_first and format!=default: unshift last initial, clear $last_name
- filter empty initials
Format result:
compact -> join('', @initials, first($last_name))
initials -> join($sep, @all_letters) . $sep
shortlast -> join(' ', map {"$_$sep"} @initials) . " $last_name"
default -> joined initials; prepend/append $last_name per $style
=cut | |||||
| 209 | ||||||
| 210 | # --------------------------------------------------------------------------- | |||||
| 211 | # Private helpers | |||||
| 212 | # --------------------------------------------------------------------------- | |||||
| 213 | ||||||
| 214 | # Purpose: Resolve "Last, First" and leading-comma forms into a canonical | |||||
| 215 | # "First ... Last" string, collapsing all internal whitespace. | |||||
| 216 | # Entry Criteria: $raw is a defined, non-empty string (validated by the caller). | |||||
| 217 | # Exit Status: Returns ($normalized, $had_leading_comma). $normalized is | |||||
| 218 | # whitespace-collapsed and trimmed. $had_leading_comma is 1 when | |||||
| 219 | # the original input began with a comma (no last-name component). | |||||
| 220 | # Side Effects: None. | |||||
| 221 | sub _normalize_name { | |||||
| 222 | 2225 | 95171 | my ($raw) = @_; | |||
| 223 | ||||||
| 224 | 2225 | 2504 | $raw =~ s/,+/,/g; # collapse any run of commas to one before splitting | |||
| 225 | ||||||
| 226 | 2225 | 1690 | my $had_leading_comma = 0; | |||
| 227 | ||||||
| 228 | 2225 | 2750 | if ($raw =~ /,/) { | |||
| 229 | 153 306 | 327 554 | my ($last, $rest) = map { s/^\s+|\s+$//gr } split /\s*,\s*/, $raw, 2; | |||
| 230 | 153 | 182 | $rest //= q{}; | |||
| 231 | 153 | 136 | $last //= q{}; | |||
| 232 | ||||||
| 233 | 153 | 219 | $had_leading_comma = 1 if !length($last) && length($rest); | |||
| 234 | ||||||
| 235 | 153 | 238 | if (length($last) && length($rest)) { | |||
| 236 | 64 | 85 | $raw = "$rest $last"; | |||
| 237 | } elsif (length $rest) { | |||||
| 238 | 74 | 59 | $raw = $rest; | |||
| 239 | } elsif (length $last) { | |||||
| 240 | 4 | 4 | $raw = $last; | |||
| 241 | } else { | |||||
| 242 | 11 | 17 | return (q{}, 0); | |||
| 243 | } | |||||
| 244 | } | |||||
| 245 | ||||||
| 246 | 2214 | 11649 | $raw =~ s/^\s+|\s+$//g; | |||
| 247 | 2214 | 2980 | $raw =~ s/\s+/ /g; | |||
| 248 | ||||||
| 249 | 2214 | 2857 | return ($raw, $had_leading_comma); | |||
| 250 | } | |||||
| 251 | ||||||
| 252 | # Purpose: Derive the ordered list of initials and the preserved last name | |||||
| 253 | # from a normalized name string, honouring format and style. | |||||
| 254 | # Entry Criteria: $name is output of _normalize_name (trimmed, single-spaced). | |||||
| 255 | # $had_leading_comma is the boolean from _normalize_name. | |||||
| 256 | # $format and $style are validated constants (FMT_*/STY_*). | |||||
| 257 | # Exit Status: Returns ($initials_ref, $last_name). $initials_ref is an | |||||
| 258 | # arrayref of single-character strings with empty entries removed. | |||||
| 259 | # $last_name is '' when consumed by style/format reordering. | |||||
| 260 | # Side Effects: None. | |||||
| 261 | sub _extract_parts { | |||||
| 262 | 1966 | 23883 | my ($name, $had_leading_comma, $format, $style) = @_; | |||
| 263 | ||||||
| 264 | 1966 | 2444 | my @parts = split /\s+/, $name; | |||
| 265 | 1966 | 2115 | return ([], q{}) unless @parts; | |||
| 266 | ||||||
| 267 | 1965 | 1357 | my ($last_name, @initials); | |||
| 268 | ||||||
| 269 | 1965 | 1708 | if ($had_leading_comma) { | |||
| 270 | 73 | 52 | $last_name = q{}; | |||
| 271 | 73 111 | 55 134 | @initials = map { substr $_, 0, 1 } @parts; | |||
| 272 | } else { | |||||
| 273 | 1892 | 1736 | $last_name = pop @parts; | |||
| 274 | 1892 1961 | 1982 1878 | @initials = map { substr $_, 0, 1 } @parts; | |||
| 275 | ||||||
| 276 | # last_first on non-default formats (except shortlast, which keeps the full last name): | |||||
| 277 | # move the last-name initial to the front and discard the full last name | |||||
| 278 | 1892 | 2082 | if ($style eq $STY_LAST && $format ne $FMT_DEFAULT && $format ne $FMT_SHORTLAST && length $last_name) { | |||
| 279 | 60 | 417 | unshift @initials, substr $last_name, 0, 1; | |||
| 280 | 60 | 43 | $last_name = q{}; | |||
| 281 | } | |||||
| 282 | } | |||||
| 283 | ||||||
| 284 | 1965 2132 | 5100 1841 | @initials = grep { length $_ } @initials; | |||
| 285 | ||||||
| 286 | 1965 | 2708 | return (\@initials, $last_name); | |||
| 287 | } | |||||
| 288 | ||||||
| 289 | # --------------------------------------------------------------------------- | |||||
| 290 | # Public API | |||||
| 291 | # --------------------------------------------------------------------------- | |||||
| 292 | ||||||
| 293 | sub abbreviate { | |||||
| 294 | 3081 | 901736 | my $params = Params::Validate::Strict::validate_strict({ | |||
| 295 | args => Params::Get::get_params('name', @_), | |||||
| 296 | schema => \%PARAM_SCHEMA, | |||||
| 297 | }); | |||||
| 298 | ||||||
| 299 | Carp::croak(__PACKAGE__ . '::abbreviate: name parameter is required and must be defined') | |||||
| 300 | 2238 | 635188 | unless defined $params->{name}; | |||
| 301 | ||||||
| 302 | 2219 | 4498 | my $format = $params->{format} // $FMT_DEFAULT; | |||
| 303 | 2219 | 8758 | my $style = $params->{style} // $STY_FIRST; | |||
| 304 | 2219 | 7570 | my $sep = $params->{separator} // $DEFAULT_SEP; | |||
| 305 | ||||||
| 306 | 2219 | 6071 | my ($name, $had_leading_comma) = _normalize_name($params->{name}); | |||
| 307 | 2219 | 3020 | return q{} unless length $name; | |||
| 308 | ||||||
| 309 | 1956 | 2154 | my ($initials, $last_name) = _extract_parts($name, $had_leading_comma, $format, $style); | |||
| 310 | ||||||
| 311 | 1956 | 2098 | if ($format eq $FMT_COMPACT) { | |||
| 312 | 82 82 | 163 246 | return join q{}, @{$initials}, | |||
| 313 | (length $last_name ? (substr $last_name, 0, 1) : ()); | |||||
| 314 | } | |||||
| 315 | ||||||
| 316 | 1874 | 4489 | if ($format eq $FMT_INITIALS) { | |||
| 317 | 98 98 | 185 103 | my @letters = @{$initials}; | |||
| 318 | 97 | 118 | push @letters, substr($last_name, 0, 1) if length $last_name; | |||
| 319 | 97 | 260 | return join($sep, @letters) . $sep; | |||
| 320 | } | |||||
| 321 | ||||||
| 322 | 1776 | 4089 | if ($format eq $FMT_SHORTLAST) { | |||
| 323 | 155 155 174 109 | 301 160 189 78 | my $joined = @{$initials} ? join(' ', map { $_ . $sep } @{$initials}) : q{}; | |||
| 324 | 155 | 135 | if ($style eq $STY_LAST && length $last_name) { | |||
| 325 | 63 | 274 | return length($joined) ? "$last_name, $joined" : $last_name; | |||
| 326 | } | |||||
| 327 | 92 | 379 | return length($joined) | |||
| 328 | ? (length($last_name) ? "$joined $last_name" : $joined) | |||||
| 329 | : $last_name; | |||||
| 330 | } | |||||
| 331 | ||||||
| 332 | # default format | |||||
| 333 | 1621 1621 | 3215 4218 | return $last_name unless @{$initials}; | |||
| 334 | 393 1565 393 | 379 1453 393 | my $joined = join ' ', map { $_ . $sep } @{$initials}; | |||
| 335 | 393 | 435 | return ($style eq $STY_LAST && length $last_name) | |||
| 336 | ? "$last_name, $joined" | |||||
| 337 | : (length $last_name ? "$joined $last_name" : $joined); | |||||
| 338 | } | |||||
| 339 | ||||||
| 340 | 1; | |||||
| 341 | ||||||