lib/Genealogy/Military/Branch.pm

Structural Coverage (Approximate)

TER1 (Statement): 100.00%
TER2 (Branch): 100.00%
TER3 (LCSAJ): 100.0% (15/15)
Approximate LCSAJ segments: 21

LCSAJ Legend

Covered — this LCSAJ path was executed during testing.

Not covered — this LCSAJ path was never executed. These are the paths to focus on.

Multiple dots on a line indicate that multiple control-flow paths begin at that line. Hovering over any dot shows:

        start → end → jump
        

Uncovered paths show [NOT COVERED] in the tooltip.

Mutant Testing Legend

Survived (tests missed this) Killed (tests detected this) No mutation
    1: package Genealogy::Military::Branch;
    2: 
    3: use strict;
    4: use warnings;
    5: use 5.014;
    6: 
    7: use Carp qw(croak);
    8: use I18N::LangTags::Detect;
    9: use Params::Get;
   10: use Object::Configure 0.19;
   11: use Readonly;
   12: use Params::Validate::Strict qw(validate_strict);
   13: use Return::Set qw(set_return);
   14: 
   15: our $VERSION = '0.01';
   16: 
   17: # Schema for new() arguments
   18: Readonly my $NEW_SCHEMA => {
   19: 	language => {
   20: 		type     => 'string',
   21: 		optional => 1,
   22: 	},
   23: 	warn_on_error => {
   24: 		type     => 'boolean',
   25: 		optional => 1,
   26: 		default  => 0,
   27: 	},
   28: };
   29: 
   30: # Schema for detect() arguments
   31: Readonly my $DETECT_SCHEMA => {
   32: 	text => {
   33: 		type => 'string',
   34: 	},
   35: };
   36: 
   37: # Branch detectors tried in specificity order; first match wins.
   38: # More-specific patterns must appear before patterns that would also
   39: # match them: Merchant Navy before Navy, RAF before Air Force.
   40: Readonly my @DETECTORS => (
   41: 	{ pattern => qr/\bMerchant\s+Navy\b/i,                                key => 'Merchant Navy'       },
   42: 	{ pattern => qr/\bRoyal\s+Flying\s+Corps\b|\bRFC\b/i,                 key => 'Royal Flying Corps'  },
   43: 	{ pattern => qr/\bRoyal\s+Engineers\b/i,                              key => 'Royal Engineers'     },
   44: 	{ pattern => qr/\bRoyal\s+Artillery\b/i,                              key => 'Royal Artillery'     },
   45: 	{ pattern => qr/\bRAF\b|\bRoyal\s+Air\s+Force\b/i,                   key => 'RAF'                 },
   46: 	{ pattern => qr/\bAir\s+Force\b/i,                                    key => 'air force'           },
   47: 	{ pattern => qr/\bRoyal\s+Marines\b|\bMarine\s+Corps\b|\bMarines\b/i, key => 'marines'             },
   48: 	{ pattern => qr/\bNavy\b/i,                                           key => 'navy'                },
   49: 	{ pattern => qr/\bCoast\s+Guard\b/i,                                  key => 'Coast Guard'         },
   50: 	{ pattern => qr/\bNational\s+Guard\b/i,                               key => 'National Guard'      },
   51: 	{ pattern => qr/\b(?:Army|Regiment|Soldier|Infantry|Cavalry)\b/i,     key => 'army'                },
   52: );
   53: 
   54: # Localised branch names keyed by BCP-47 primary subtag, then internal
   55: # English key.  Falls back to English if a key has no entry for the
   56: # current language.
   57: Readonly my %TRANSLATIONS => (
   58: 	'en' => {
   59: 		'navy'               => 'navy',
   60: 		'RAF'                => 'RAF',
   61: 		'army'               => 'army',
   62: 		'military'           => 'military',
   63: 		'marines'            => 'marines',
   64: 		'Royal Engineers'    => 'Royal Engineers',
   65: 		'Royal Artillery'    => 'Royal Artillery',
   66: 		'Royal Flying Corps' => 'Royal Flying Corps',
   67: 		'Merchant Navy'      => 'Merchant Navy',
   68: 		'Coast Guard'        => 'Coast Guard',
   69: 		'National Guard'     => 'National Guard',
   70: 		'air force'          => 'air force',
   71: 	},
   72: 	# French translations - subset of English keys
   73: 	'fr' => {
   74: 		'navy'      => 'marine',
   75: 		'army'      => "arm\x{e9}e",
   76: 		'RAF'       => 'RAF',
   77: 		'military'  => 'militaire',
   78: 		'marines'   => 'marines',
   79: 		'air force' => "arm\x{e9}e de l'air",
   80: 	},
   81: 	# German translations - subset of English keys
   82: 	'de' => {
   83: 		'navy'      => 'Marine',
   84: 		'army'      => 'Armee',
   85: 		'RAF'       => 'RAF',
   86: 		'military'  => "Milit\x{e4}r",
   87: 		'air force' => 'Luftwaffe',
   88: 	},
   89: );
   90: 
   91: =head1 NAME
   92: 
   93: Genealogy::Military::Branch - Extract military branch from free-text genealogy notes
   94: 
   95: =head1 VERSION
   96: 
   97: Version 0.01
   98: 
   99: =head1 SYNOPSIS
  100: 
  101:     use Genealogy::Military::Branch;
  102: 
  103:     my $detector = Genealogy::Military::Branch->new();
  104: 
  105:     my $branch = $detector->detect(
  106:         text => 'He served in the Royal Navy from 1914 to 1918',
  107:     );
  108:     # Returns 'navy'
  109: 
  110:     my $branch = $detector->detect(
  111:         text => 'Served with the RAF in Bomber Command',
  112:     );
  113:     # Returns 'RAF'
  114: 
  115:     my $branch = $detector->detect(
  116:         text => 'Some unrelated text',
  117:     );
  118:     # Returns 'military'
  119: 
  120: =head1 DESCRIPTION
  121: 
  122: Scans free-text military service notes from genealogy records and returns
  123: the name of the military branch mentioned.  Returns C<'military'> (localised)
  124: when no specific branch is recognised.
  125: 
  126: Designed to replace the C<service()> helper in the C<gedcom> and C<ged2site>
  127: distributions, which contain duplicate implementations of the same logic.
  128: 
  129: Detection patterns cover British, US and Commonwealth branches.  The returned
  130: string is localised to the system locale, which is detected from the
  131: environment at construction time.
  132: 
  133: =head1 METHODS
  134: 
  135: =head2 new
  136: 
  137: =head3 Purpose
  138: 
  139: Constructs a new branch detector object.
  140: 
  141: =head3 API Specification
  142: 
  143: =head4 Input
  144: 
  145:     {
  146:         language => {
  147:             type     => 'string',
  148:             optional => 1,
  149:         },
  150:         warn_on_error => {
  151:             type     => 'boolean',
  152:             optional => 1,
  153:             default  => 0,
  154:         },
  155:     }
  156: 
  157: =head4 Output
  158: 
  159:     { type => 'object', isa => 'Genealogy::Military::Branch' }
  160: 
  161: =head3 Arguments
  162: 
  163: =over 4
  164: 
  165: =item * C<language> - BCP-47 primary subtag e.g. C<'en'>, C<'fr'>, C<'de'>.
  166: If not given, the language is detected from the environment using
  167: C<I18N::LangTags::Detect> and the standard locale environment variables,
  168: falling back to C<'en'>.  Optional.
  169: 
  170: =item * C<warn_on_error> - If true, C<carp> is called when C<detect()> is
  171: called and no branch is identified in the supplied text.  Optional, defaults
  172: to 0.
  173: 
  174: =back
  175: 
  176: =head3 Returns
  177: 
  178: A blessed C<Genealogy::Military::Branch> object.
  179: 
  180: =head3 Notes
  181: 
  182: The language is detected and cached once at construction time.
  183: 
  184: =head3 Example
  185: 
  186:     my $detector = Genealogy::Military::Branch->new({
  187:         language      => 'fr',
  188:         warn_on_error => 1,
  189:     });
  190: 
  191: =cut
  192: 
  193: sub new {
  194: 	my $class = shift;
  195: 
  196: 	# Accept both hashref and flat list; all constructor arguments are optional
  197: 	my $args = Params::Get::get_params(undef, \@_) // {};
  198: 
  199: 	# Validate constructor arguments against schema
  200: 	$args = validate_strict({
  201: 		description => 'Genealogy::Military::Branch::new',
  202: 		input       => $args,
  203: 		schema      => $NEW_SCHEMA,
  204: 	});
  205: 
  206: 	# Use caller-supplied language or detect from environment
  207: 	my $language = $args->{'language'} // _get_language() // 'en';
  208: 
  209: 	# Load the configuration from a config file, if provided
  210: 	$args = Object::Configure::configure($class, $args);
  211: 
  212: 	# Bless and return the detector object
  213: 	return bless {
  214: 		language      => $language,
  215: 		warn_on_error => $args->{'warn_on_error'} // 0,
  216: 	}, $class;
  217: }
  218: 
  219: =head2 detect
  220: 
  221: Scans a free-text string for references to military branches and returns
  222: the localised branch name.
  223: 
  224: =head3 API Specification
  225: 
  226: =head4 Input
  227: 
  228:     {
  229:         text => {
  230:             type => 'string',
  231:         },
  232:     }
  233: 
  234: =head4 Output
  235: 
  236:     { type => 'string' }
  237: 
  238: =head3 Arguments
  239: 
  240: =over 4
  241: 
  242: =item * C<text> - The free-text string to scan.  Required.  May be passed
  243: positionally as a single string.
  244: 
  245: =back
  246: 
  247: =head3 Returns
  248: 
  249: A string containing the detected branch name, localised to the language
  250: supplied at construction.  Returns C<'military'> (or its localised
  251: equivalent) when no branch is detected.  Never returns C<undef>.
  252: 
  253: =head3 Side Effects
  254: 
  255: If C<warn_on_error> was set true at construction and no branch is detected,
  256: emits a warning via C<carp>.
  257: 
  258: =head3 Notes
  259: 
  260: Detection patterns are tried in order of specificity.  The first pattern
  261: to match wins, so C<'Merchant Navy'> is correctly identified as
  262: C<'Merchant Navy'> rather than C<'navy'>.
  263: 
  264: =head3 Example
  265: 
  266:     # Named argument form
  267:     my $branch = $detector->detect(
  268:         text => 'He served in the Royal Engineers during the Great War',
  269:     );
  270:     # Returns 'Royal Engineers'
  271: 
  272:     # Positional form
  273:     my $branch = $detector->detect('Private in the Infantry');
  274:     # Returns 'army'
  275: 
  276: =cut
  277: 
  278: sub detect {
279 → 296 → 305279 → 296 → 0  279: 	my $self = shift;
  280: 
  281: 	# Normalise parameters: accept positional text string, hash or hashref
  282: 	my $params = Params::Get::get_params('text', \@_);
  283: 
  284: 	# Validate that text is a required string; validate_strict croaks on failure
  285: 	my $validated = validate_strict({
  286: 		description => 'Genealogy::Military::Branch::detect',
  287: 		input       => $params,
  288: 		schema      => $DETECT_SCHEMA,
  289: 	});
  290: 	my $text = $validated->{'text'};
  291: 	croak 'Genealogy::Military::Branch::detect: text is required'
  292: 		unless defined $text;
  293: 
  294: 	# Walk each detector in specificity order; the first match wins
  295: 	my $branch;
  296: 	for my $d (@DETECTORS) {
  297: 		# Each entry has a compiled pattern and an English key for translation
  298: 		if($text =~ $d->{'pattern'}) {

Mutants (Total: 1, Killed: 1, Survived: 0)

299: $branch = $self->_translate($d->{'key'}); 300: last; 301: } 302: } 303: 304: # Fall back to the default 'military' key when nothing matched 305 → 305 → 313305 → 305 → 0 305: unless(defined $branch) {

Mutants (Total: 1, Killed: 1, Survived: 0)

306: # Optionally alert the caller that no specific branch was identified 307: Carp::carp 'Genealogy::Military::Branch: no military branch detected' 308: if $self->{'warn_on_error'}; 309: $branch = $self->_translate('military'); 310: } 311: 312: # Return the validated branch string; guaranteed to be a defined string 313 → 313 → 0 313: return set_return($branch, { type => 'string' }); 314: } 315: 316: # _translate 317: # 318: # Purpose: 319: # Returns the localised string for an internal branch key, falling back 320: # through language-specific -> English -> bare key. 321: # 322: # Entry criteria: 323: # $self - a blessed Genealogy::Military::Branch object 324: # $key - a string matching one of the keys in %TRANSLATIONS{'en'} 325: # 326: # Exit status: 327: # Returns the localised branch name string. Never returns undef: 328: # if no translation or English fallback exists, returns $key itself. 329: # 330: # Side effects: 331: # None. 332: # 333: # Notes: 334: # The 'en' table is the canonical fallback for all languages. 335: # Keys not present in the language-specific table fall through to 336: # English, allowing partial translation tables (e.g. 'fr' only 337: # translates the most common branches). 338: 339: sub _translate { 340 → 346 → 351340 → 346 → 0 340: my ($self, $key) = @_; 341: 342: # Get the cached language code from the object 343: my $lang = $self->{'language'} // 'en'; 344: 345: # Try the language-specific translation first 346: if(exists $TRANSLATIONS{$lang} && exists $TRANSLATIONS{$lang}{$key}) {

Mutants (Total: 1, Killed: 1, Survived: 0)

347: return $TRANSLATIONS{$lang}{$key}; 348: } 349: 350: # Fall back to English, then the bare key as a last resort 351 → 351 → 0 351: return $TRANSLATIONS{'en'}{$key} // $key; 352: } 353: 354: # _get_language 355: # 356: # Purpose: 357: # Determines the system's default language using environment variables. 358: # 359: # Entry criteria: 360: # None. Reads environment variables directly. 361: # 362: # Exit status: 363: # Returns a two-letter language code string e.g. 'en', 'fr', 'de', 364: # or undef if no language can be determined. 365: # 366: # Side effects: 367: # None. 368: # 369: # Notes: 370: # Checks in order: I18N::LangTags::Detect, LANGUAGE, LC_ALL, 371: # LC_MESSAGES, LANG environment variables. 372: # Returns 'en' for C locale. 373: # See https://www.gnu.org/software/gettext/manual/html_node/Locale-Environment-Variables.html 374: 375: sub _get_language { 376: # Try I18N::LangTags::Detect first for most accurate detection 377 → 377 → 384377 → 377 → 0 377: for my $tag (I18N::LangTags::Detect::detect()) { 378: if($tag =~ /^([a-z]{2})/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

379: return lc($1); 380: } 381: } 382: 383: # Fall back to checking environment variables in priority order 384 → 384 → 387384 → 384 → 0 384: if(($ENV{'LANGUAGE'}) && ($ENV{'LANGUAGE'} =~ /^([a-z]{2})/i)) {

Mutants (Total: 1, Killed: 1, Survived: 0)

385: return lc($1); 386: } 387 → 387 → 397387 → 387 → 0 387: foreach my $variable ('LC_ALL', 'LC_MESSAGES', 'LANG') { 388: my $val = $ENV{$variable}; 389: next unless defined($val); 390: # Extract the two-letter primary language subtag 391: if($val =~ /^([a-z]{2})/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

392: return lc($1); 393: } 394: } 395: 396: # Handle C locale explicitly - treat as English 397 → 399 → 0 397: return 'en' if(defined($ENV{'LANG'}) && $ENV{'LANG'} =~ /^C(\.|$)/);

Mutants (Total: 2, Killed: 2, Survived: 0)

398: 399: return; 400: } 401: 402: =head1 AUTHOR 403: 404: Nigel Horne C<< <njh@nigelhorne.com> >> 405: 406: =head1 BUGS 407: 408: Please report bugs via the GitHub issue tracker: 409: L<https://github.com/nigelhorne/Genealogy-Military-Branch/issues> 410: 411: =head1 TODO 412: 413: =over 4 414: 415: =item * Add Australian, Canadian and other Commonwealth branch patterns 416: 417: =item * Add more US-specific patterns (Space Force etc) 418: 419: =item * Consider a companion C<Genealogy::Military::Rank> module 420: 421: =back 422: 423: =head1 SEE ALSO 424: 425: =over 4 426: 427: =item * L<Test Dashboard|https://nigelhorne.github.io/Genealogy-Military-Branch/coverage/> 428: 429: =item * L<Genealogy::Occupation> 430: 431: =item * L<Params::Get> 432: 433: =item * L<Params::Validate::Strict> 434: 435: =item * L<Return::Set> 436: 437: =back 438: 439: =head1 LICENSE AND COPYRIGHT 440: 441: Copyright 2026 Nigel Horne. 442: 443: This program is released under the following licence: GPL2 444: If you use it, please let me know. 445: 446: =cut 447: 448: 1;