| File: | blib/lib/Genealogy/Military/Branch.pm |
| Coverage: | 96.7% |
| line | stmt | bran | cond | sub | time | code |
|---|---|---|---|---|---|---|
| 1 | package Genealogy::Military::Branch; | |||||
| 2 | ||||||
| 3 | 6 6 6 | 816473 6 79 | use strict; | |||
| 4 | 6 6 6 | 8 6 104 | use warnings; | |||
| 5 | 6 6 | 42 12 | use 5.014; | |||
| 6 | ||||||
| 7 | 6 6 6 | 9 6 130 | use Carp qw(croak); | |||
| 8 | 6 6 6 | 1064 12233 75 | use I18N::LangTags::Detect; | |||
| 9 | 6 6 6 | 1036 26003 126 | use Params::Get; | |||
| 10 | 6 6 6 | 1545 254138 75 | use Object::Configure 0.19; | |||
| 11 | 6 6 6 | 18 3 117 | use Readonly; | |||
| 12 | 6 6 6 | 10 6 84 | use Params::Validate::Strict qw(validate_strict); | |||
| 13 | 6 6 6 | 6 6 2808 | use Return::Set qw(set_return); | |||
| 14 | ||||||
| 15 | our $VERSION = '0.01'; | |||||
| 16 | ||||||
| 17 | # Schema for new() arguments | |||||
| 18 | Readonly my $NEW_SCHEMA => { | |||||
| 19 | language => { | |||||
| 20 | type => 'string', | |||||
| 21 | optional => 1, | |||||
| 22 | }, | |||||
| 23 | warn_on_error => { | |||||
| 24 | type => 'boolean', | |||||
| 25 | optional => 1, | |||||
| 26 | default => 0, | |||||
| 27 | }, | |||||
| 28 | }; | |||||
| 29 | ||||||
| 30 | # Schema for detect() arguments | |||||
| 31 | Readonly my $DETECT_SCHEMA => { | |||||
| 32 | text => { | |||||
| 33 | type => 'string', | |||||
| 34 | }, | |||||
| 35 | }; | |||||
| 36 | ||||||
| 37 | # Branch detectors tried in specificity order; first match wins. | |||||
| 38 | # More-specific patterns must appear before patterns that would also | |||||
| 39 | # match them: Merchant Navy before Navy, RAF before Air Force. | |||||
| 40 | Readonly my @DETECTORS => ( | |||||
| 41 | { pattern => qr/\bMerchant\s+Navy\b/i, key => 'Merchant Navy' }, | |||||
| 42 | { pattern => qr/\bRoyal\s+Flying\s+Corps\b|\bRFC\b/i, key => 'Royal Flying Corps' }, | |||||
| 43 | { pattern => qr/\bRoyal\s+Engineers\b/i, key => 'Royal Engineers' }, | |||||
| 44 | { pattern => qr/\bRoyal\s+Artillery\b/i, key => 'Royal Artillery' }, | |||||
| 45 | { pattern => qr/\bRAF\b|\bRoyal\s+Air\s+Force\b/i, key => 'RAF' }, | |||||
| 46 | { pattern => qr/\bAir\s+Force\b/i, key => 'air force' }, | |||||
| 47 | { pattern => qr/\bRoyal\s+Marines\b|\bMarine\s+Corps\b|\bMarines\b/i, key => 'marines' }, | |||||
| 48 | { pattern => qr/\bNavy\b/i, key => 'navy' }, | |||||
| 49 | { pattern => qr/\bCoast\s+Guard\b/i, key => 'Coast Guard' }, | |||||
| 50 | { pattern => qr/\bNational\s+Guard\b/i, key => 'National Guard' }, | |||||
| 51 | { pattern => qr/\b(?:Army|Regiment|Soldier|Infantry|Cavalry)\b/i, key => 'army' }, | |||||
| 52 | ); | |||||
| 53 | ||||||
| 54 | # Localised branch names keyed by BCP-47 primary subtag, then internal | |||||
| 55 | # English key. Falls back to English if a key has no entry for the | |||||
| 56 | # current language. | |||||
| 57 | Readonly my %TRANSLATIONS => ( | |||||
| 58 | 'en' => { | |||||
| 59 | 'navy' => 'navy', | |||||
| 60 | 'RAF' => 'RAF', | |||||
| 61 | 'army' => 'army', | |||||
| 62 | 'military' => 'military', | |||||
| 63 | 'marines' => 'marines', | |||||
| 64 | 'Royal Engineers' => 'Royal Engineers', | |||||
| 65 | 'Royal Artillery' => 'Royal Artillery', | |||||
| 66 | 'Royal Flying Corps' => 'Royal Flying Corps', | |||||
| 67 | 'Merchant Navy' => 'Merchant Navy', | |||||
| 68 | 'Coast Guard' => 'Coast Guard', | |||||
| 69 | 'National Guard' => 'National Guard', | |||||
| 70 | 'air force' => 'air force', | |||||
| 71 | }, | |||||
| 72 | # French translations - subset of English keys | |||||
| 73 | 'fr' => { | |||||
| 74 | 'navy' => 'marine', | |||||
| 75 | 'army' => "arm\x{e9}e", | |||||
| 76 | 'RAF' => 'RAF', | |||||
| 77 | 'military' => 'militaire', | |||||
| 78 | 'marines' => 'marines', | |||||
| 79 | 'air force' => "arm\x{e9}e de l'air", | |||||
| 80 | }, | |||||
| 81 | # German translations - subset of English keys | |||||
| 82 | 'de' => { | |||||
| 83 | 'navy' => 'Marine', | |||||
| 84 | 'army' => 'Armee', | |||||
| 85 | 'RAF' => 'RAF', | |||||
| 86 | 'military' => "Milit\x{e4}r", | |||||
| 87 | 'air force' => 'Luftwaffe', | |||||
| 88 | }, | |||||
| 89 | ); | |||||
| 90 | ||||||
| 91 - 191 | =head1 NAME
Genealogy::Military::Branch - Extract military branch from free-text genealogy notes
=head1 VERSION
Version 0.01
=head1 SYNOPSIS
use Genealogy::Military::Branch;
my $detector = Genealogy::Military::Branch->new();
my $branch = $detector->detect(
text => 'He served in the Royal Navy from 1914 to 1918',
);
# Returns 'navy'
my $branch = $detector->detect(
text => 'Served with the RAF in Bomber Command',
);
# Returns 'RAF'
my $branch = $detector->detect(
text => 'Some unrelated text',
);
# Returns 'military'
=head1 DESCRIPTION
Scans free-text military service notes from genealogy records and returns
the name of the military branch mentioned. Returns C<'military'> (localised)
when no specific branch is recognised.
Designed to replace the C<service()> helper in the C<gedcom> and C<ged2site>
distributions, which contain duplicate implementations of the same logic.
Detection patterns cover British, US and Commonwealth branches. The returned
string is localised to the system locale, which is detected from the
environment at construction time.
=head1 METHODS
=head2 new
=head3 Purpose
Constructs a new branch detector object.
=head3 API Specification
=head4 Input
{
language => {
type => 'string',
optional => 1,
},
warn_on_error => {
type => 'boolean',
optional => 1,
default => 0,
},
}
=head4 Output
{ type => 'object', isa => 'Genealogy::Military::Branch' }
=head3 Arguments
=over 4
=item * C<language> - BCP-47 primary subtag e.g. C<'en'>, C<'fr'>, C<'de'>.
If not given, the language is detected from the environment using
C<I18N::LangTags::Detect> and the standard locale environment variables,
falling back to C<'en'>. Optional.
=item * C<warn_on_error> - If true, C<carp> is called when C<detect()> is
called and no branch is identified in the supplied text. Optional, defaults
to 0.
=back
=head3 Returns
A blessed C<Genealogy::Military::Branch> object.
=head3 Notes
The language is detected and cached once at construction time.
=head3 Example
my $detector = Genealogy::Military::Branch->new({
language => 'fr',
warn_on_error => 1,
});
=cut | |||||
| 192 | ||||||
| 193 | sub new { | |||||
| 194 | 154 | 232313 | my $class = shift; | |||
| 195 | ||||||
| 196 | # Accept both hashref and flat list; all constructor arguments are optional | |||||
| 197 | 154 | 244 | my $args = Params::Get::get_params(undef, \@_) // {}; | |||
| 198 | ||||||
| 199 | # Validate constructor arguments against schema | |||||
| 200 | 154 | 1766 | $args = validate_strict({ | |||
| 201 | description => 'Genealogy::Military::Branch::new', | |||||
| 202 | input => $args, | |||||
| 203 | schema => $NEW_SCHEMA, | |||||
| 204 | }); | |||||
| 205 | ||||||
| 206 | # Use caller-supplied language or detect from environment | |||||
| 207 | 151 | 9950 | my $language = $args->{'language'} // _get_language() // 'en'; | |||
| 208 | ||||||
| 209 | # Load the configuration from a config file, if provided | |||||
| 210 | 151 | 200 | $args = Object::Configure::configure($class, $args); | |||
| 211 | ||||||
| 212 | # Bless and return the detector object | |||||
| 213 | return bless { | |||||
| 214 | language => $language, | |||||
| 215 | 151 | 348646 | warn_on_error => $args->{'warn_on_error'} // 0, | |||
| 216 | }, $class; | |||||
| 217 | } | |||||
| 218 | ||||||
| 219 - 276 | =head2 detect
Scans a free-text string for references to military branches and returns
the localised branch name.
=head3 API Specification
=head4 Input
{
text => {
type => 'string',
},
}
=head4 Output
{ type => 'string' }
=head3 Arguments
=over 4
=item * C<text> - The free-text string to scan. Required. May be passed
positionally as a single string.
=back
=head3 Returns
A string containing the detected branch name, localised to the language
supplied at construction. Returns C<'military'> (or its localised
equivalent) when no branch is detected. Never returns C<undef>.
=head3 Side Effects
If C<warn_on_error> was set true at construction and no branch is detected,
emits a warning via C<carp>.
=head3 Notes
Detection patterns are tried in order of specificity. The first pattern
to match wins, so C<'Merchant Navy'> is correctly identified as
C<'Merchant Navy'> rather than C<'navy'>.
=head3 Example
# Named argument form
my $branch = $detector->detect(
text => 'He served in the Royal Engineers during the Great War',
);
# Returns 'Royal Engineers'
# Positional form
my $branch = $detector->detect('Private in the Infantry');
# Returns 'army'
=cut | |||||
| 277 | ||||||
| 278 | sub detect { | |||||
| 279 | 255 | 47135 | my $self = shift; | |||
| 280 | ||||||
| 281 | # Normalise parameters: accept positional text string, hash or hashref | |||||
| 282 | 255 | 347 | my $params = Params::Get::get_params('text', \@_); | |||
| 283 | ||||||
| 284 | # Validate that text is a required string; validate_strict croaks on failure | |||||
| 285 | 253 | 2587 | my $validated = validate_strict({ | |||
| 286 | description => 'Genealogy::Military::Branch::detect', | |||||
| 287 | input => $params, | |||||
| 288 | schema => $DETECT_SCHEMA, | |||||
| 289 | }); | |||||
| 290 | 253 | 13261 | my $text = $validated->{'text'}; | |||
| 291 | 253 | 298 | croak 'Genealogy::Military::Branch::detect: text is required' | |||
| 292 | unless defined $text; | |||||
| 293 | ||||||
| 294 | # Walk each detector in specificity order; the first match wins | |||||
| 295 | 252 | 183 | my $branch; | |||
| 296 | 252 | 341 | for my $d (@DETECTORS) { | |||
| 297 | # Each entry has a compiled pattern and an English key for translation | |||||
| 298 | 1950 | 15908 | if($text =~ $d->{'pattern'}) { | |||
| 299 | 200 | 1248 | $branch = $self->_translate($d->{'key'}); | |||
| 300 | 200 | 1145 | last; | |||
| 301 | } | |||||
| 302 | } | |||||
| 303 | ||||||
| 304 | # Fall back to the default 'military' key when nothing matched | |||||
| 305 | 252 | 560 | unless(defined $branch) { | |||
| 306 | # Optionally alert the caller that no specific branch was identified | |||||
| 307 | Carp::carp 'Genealogy::Military::Branch: no military branch detected' | |||||
| 308 | 52 | 65 | if $self->{'warn_on_error'}; | |||
| 309 | 52 | 48 | $branch = $self->_translate('military'); | |||
| 310 | } | |||||
| 311 | ||||||
| 312 | # Return the validated branch string; guaranteed to be a defined string | |||||
| 313 | 252 | 594 | return set_return($branch, { type => 'string' }); | |||
| 314 | } | |||||
| 315 | ||||||
| 316 | # _translate | |||||
| 317 | # | |||||
| 318 | # Purpose: | |||||
| 319 | # Returns the localised string for an internal branch key, falling back | |||||
| 320 | # through language-specific -> English -> bare key. | |||||
| 321 | # | |||||
| 322 | # Entry criteria: | |||||
| 323 | # $self - a blessed Genealogy::Military::Branch object | |||||
| 324 | # $key - a string matching one of the keys in %TRANSLATIONS{'en'} | |||||
| 325 | # | |||||
| 326 | # Exit status: | |||||
| 327 | # Returns the localised branch name string. Never returns undef: | |||||
| 328 | # if no translation or English fallback exists, returns $key itself. | |||||
| 329 | # | |||||
| 330 | # Side effects: | |||||
| 331 | # None. | |||||
| 332 | # | |||||
| 333 | # Notes: | |||||
| 334 | # The 'en' table is the canonical fallback for all languages. | |||||
| 335 | # Keys not present in the language-specific table fall through to | |||||
| 336 | # English, allowing partial translation tables (e.g. 'fr' only | |||||
| 337 | # translates the most common branches). | |||||
| 338 | ||||||
| 339 | sub _translate { | |||||
| 340 | 255 | 1947 | my ($self, $key) = @_; | |||
| 341 | ||||||
| 342 | # Get the cached language code from the object | |||||
| 343 | 255 | 690 | my $lang = $self->{'language'} // 'en'; | |||
| 344 | ||||||
| 345 | # Try the language-specific translation first | |||||
| 346 | 255 | 332 | if(exists $TRANSLATIONS{$lang} && exists $TRANSLATIONS{$lang}{$key}) { | |||
| 347 | 230 | 1842 | return $TRANSLATIONS{$lang}{$key}; | |||
| 348 | } | |||||
| 349 | ||||||
| 350 | # Fall back to English, then the bare key as a last resort | |||||
| 351 | 25 | 243 | return $TRANSLATIONS{'en'}{$key} // $key; | |||
| 352 | } | |||||
| 353 | ||||||
| 354 | # _get_language | |||||
| 355 | # | |||||
| 356 | # Purpose: | |||||
| 357 | # Determines the system's default language using environment variables. | |||||
| 358 | # | |||||
| 359 | # Entry criteria: | |||||
| 360 | # None. Reads environment variables directly. | |||||
| 361 | # | |||||
| 362 | # Exit status: | |||||
| 363 | # Returns a two-letter language code string e.g. 'en', 'fr', 'de', | |||||
| 364 | # or undef if no language can be determined. | |||||
| 365 | # | |||||
| 366 | # Side effects: | |||||
| 367 | # None. | |||||
| 368 | # | |||||
| 369 | # Notes: | |||||
| 370 | # Checks in order: I18N::LangTags::Detect, LANGUAGE, LC_ALL, | |||||
| 371 | # LC_MESSAGES, LANG environment variables. | |||||
| 372 | # Returns 'en' for C locale. | |||||
| 373 | # See https://www.gnu.org/software/gettext/manual/html_node/Locale-Environment-Variables.html | |||||
| 374 | ||||||
| 375 | sub _get_language { | |||||
| 376 | # Try I18N::LangTags::Detect first for most accurate detection | |||||
| 377 | 148 | 3759 | for my $tag (I18N::LangTags::Detect::detect()) { | |||
| 378 | 80 | 2068 | if($tag =~ /^([a-z]{2})/i) { | |||
| 379 | 79 | 169 | return lc($1); | |||
| 380 | } | |||||
| 381 | } | |||||
| 382 | ||||||
| 383 | # Fall back to checking environment variables in priority order | |||||
| 384 | 69 | 543 | if(($ENV{'LANGUAGE'}) && ($ENV{'LANGUAGE'} =~ /^([a-z]{2})/i)) { | |||
| 385 | 1 | 7 | return lc($1); | |||
| 386 | } | |||||
| 387 | 68 | 61 | foreach my $variable ('LC_ALL', 'LC_MESSAGES', 'LANG') { | |||
| 388 | 201 | 161 | my $val = $ENV{$variable}; | |||
| 389 | 201 | 170 | next unless defined($val); | |||
| 390 | # Extract the two-letter primary language subtag | |||||
| 391 | 65 | 98 | if($val =~ /^([a-z]{2})/i) { | |||
| 392 | 58 | 141 | return lc($1); | |||
| 393 | } | |||||
| 394 | } | |||||
| 395 | ||||||
| 396 | # Handle C locale explicitly - treat as English | |||||
| 397 | 10 | 32 | return 'en' if(defined($ENV{'LANG'}) && $ENV{'LANG'} =~ /^C(\.|$)/); | |||
| 398 | ||||||
| 399 | 3 | 7 | return; | |||
| 400 | } | |||||
| 401 | ||||||
| 402 - 446 | =head1 AUTHOR Nigel Horne C<< <njh@nigelhorne.com> >> =head1 BUGS Please report bugs via the GitHub issue tracker: L<https://github.com/nigelhorne/Genealogy-Military-Branch/issues> =head1 TODO =over 4 =item * Add Australian, Canadian and other Commonwealth branch patterns =item * Add more US-specific patterns (Space Force etc) =item * Consider a companion C<Genealogy::Military::Rank> module =back =head1 SEE ALSO =over 4 =item * L<Test Dashboard|https://nigelhorne.github.io/Genealogy-Military-Branch/coverage/> =item * L<Genealogy::Occupation> =item * L<Params::Get> =item * L<Params::Validate::Strict> =item * L<Return::Set> =back =head1 LICENSE AND COPYRIGHT Copyright 2026 Nigel Horne. This program is released under the following licence: GPL2 If you use it, please let me know. =cut | |||||
| 447 | ||||||
| 448 | 1; | |||||