File Coverage

File:blib/lib/Genealogy/Military/Branch.pm
Coverage:96.7%

linestmtbrancondsubtimecode
1package Genealogy::Military::Branch;
2
3
6
6
6
816473
6
79
use strict;
4
6
6
6
8
6
104
use warnings;
5
6
6
42
12
use 5.014;
6
7
6
6
6
9
6
130
use Carp qw(croak);
8
6
6
6
1064
12233
75
use I18N::LangTags::Detect;
9
6
6
6
1036
26003
126
use Params::Get;
10
6
6
6
1545
254138
75
use Object::Configure 0.19;
11
6
6
6
18
3
117
use Readonly;
12
6
6
6
10
6
84
use Params::Validate::Strict qw(validate_strict);
13
6
6
6
6
6
2808
use Return::Set qw(set_return);
14
15our $VERSION = '0.01';
16
17# Schema for new() arguments
18Readonly my $NEW_SCHEMA => {
19        language => {
20                type     => 'string',
21                optional => 1,
22        },
23        warn_on_error => {
24                type     => 'boolean',
25                optional => 1,
26                default  => 0,
27        },
28};
29
30# Schema for detect() arguments
31Readonly my $DETECT_SCHEMA => {
32        text => {
33                type => 'string',
34        },
35};
36
37# Branch detectors tried in specificity order; first match wins.
38# More-specific patterns must appear before patterns that would also
39# match them: Merchant Navy before Navy, RAF before Air Force.
40Readonly my @DETECTORS => (
41        { pattern => qr/\bMerchant\s+Navy\b/i,                                key => 'Merchant Navy'       },
42        { pattern => qr/\bRoyal\s+Flying\s+Corps\b|\bRFC\b/i,                 key => 'Royal Flying Corps'  },
43        { pattern => qr/\bRoyal\s+Engineers\b/i,                              key => 'Royal Engineers'     },
44        { pattern => qr/\bRoyal\s+Artillery\b/i,                              key => 'Royal Artillery'     },
45        { pattern => qr/\bRAF\b|\bRoyal\s+Air\s+Force\b/i,                   key => 'RAF'                 },
46        { pattern => qr/\bAir\s+Force\b/i,                                    key => 'air force'           },
47        { pattern => qr/\bRoyal\s+Marines\b|\bMarine\s+Corps\b|\bMarines\b/i, key => 'marines'             },
48        { pattern => qr/\bNavy\b/i,                                           key => 'navy'                },
49        { pattern => qr/\bCoast\s+Guard\b/i,                                  key => 'Coast Guard'         },
50        { pattern => qr/\bNational\s+Guard\b/i,                               key => 'National Guard'      },
51        { pattern => qr/\b(?:Army|Regiment|Soldier|Infantry|Cavalry)\b/i,     key => 'army'                },
52);
53
54# Localised branch names keyed by BCP-47 primary subtag, then internal
55# English key.  Falls back to English if a key has no entry for the
56# current language.
57Readonly my %TRANSLATIONS => (
58        'en' => {
59                'navy'               => 'navy',
60                'RAF'                => 'RAF',
61                'army'               => 'army',
62                'military'           => 'military',
63                'marines'            => 'marines',
64                'Royal Engineers'    => 'Royal Engineers',
65                'Royal Artillery'    => 'Royal Artillery',
66                'Royal Flying Corps' => 'Royal Flying Corps',
67                'Merchant Navy'      => 'Merchant Navy',
68                'Coast Guard'        => 'Coast Guard',
69                'National Guard'     => 'National Guard',
70                'air force'          => 'air force',
71        },
72        # French translations - subset of English keys
73        'fr' => {
74                'navy'      => 'marine',
75                'army'      => "arm\x{e9}e",
76                'RAF'       => 'RAF',
77                'military'  => 'militaire',
78                'marines'   => 'marines',
79                'air force' => "arm\x{e9}e de l'air",
80        },
81        # German translations - subset of English keys
82        'de' => {
83                'navy'      => 'Marine',
84                'army'      => 'Armee',
85                'RAF'       => 'RAF',
86                'military'  => "Milit\x{e4}r",
87                'air force' => 'Luftwaffe',
88        },
89);
90
91 - 191
=head1 NAME

Genealogy::Military::Branch - Extract military branch from free-text genealogy notes

=head1 VERSION

Version 0.01

=head1 SYNOPSIS

    use Genealogy::Military::Branch;

    my $detector = Genealogy::Military::Branch->new();

    my $branch = $detector->detect(
        text => 'He served in the Royal Navy from 1914 to 1918',
    );
    # Returns 'navy'

    my $branch = $detector->detect(
        text => 'Served with the RAF in Bomber Command',
    );
    # Returns 'RAF'

    my $branch = $detector->detect(
        text => 'Some unrelated text',
    );
    # Returns 'military'

=head1 DESCRIPTION

Scans free-text military service notes from genealogy records and returns
the name of the military branch mentioned.  Returns C<'military'> (localised)
when no specific branch is recognised.

Designed to replace the C<service()> helper in the C<gedcom> and C<ged2site>
distributions, which contain duplicate implementations of the same logic.

Detection patterns cover British, US and Commonwealth branches.  The returned
string is localised to the system locale, which is detected from the
environment at construction time.

=head1 METHODS

=head2 new

=head3 Purpose

Constructs a new branch detector object.

=head3 API Specification

=head4 Input

    {
        language => {
            type     => 'string',
            optional => 1,
        },
        warn_on_error => {
            type     => 'boolean',
            optional => 1,
            default  => 0,
        },
    }

=head4 Output

    { type => 'object', isa => 'Genealogy::Military::Branch' }

=head3 Arguments

=over 4

=item * C<language> - BCP-47 primary subtag e.g. C<'en'>, C<'fr'>, C<'de'>.
If not given, the language is detected from the environment using
C<I18N::LangTags::Detect> and the standard locale environment variables,
falling back to C<'en'>.  Optional.

=item * C<warn_on_error> - If true, C<carp> is called when C<detect()> is
called and no branch is identified in the supplied text.  Optional, defaults
to 0.

=back

=head3 Returns

A blessed C<Genealogy::Military::Branch> object.

=head3 Notes

The language is detected and cached once at construction time.

=head3 Example

    my $detector = Genealogy::Military::Branch->new({
        language      => 'fr',
        warn_on_error => 1,
    });

=cut
192
193sub new {
194
154
232313
        my $class = shift;
195
196        # Accept both hashref and flat list; all constructor arguments are optional
197
154
244
        my $args = Params::Get::get_params(undef, \@_) // {};
198
199        # Validate constructor arguments against schema
200
154
1766
        $args = validate_strict({
201                description => 'Genealogy::Military::Branch::new',
202                input       => $args,
203                schema      => $NEW_SCHEMA,
204        });
205
206        # Use caller-supplied language or detect from environment
207
151
9950
        my $language = $args->{'language'} // _get_language() // 'en';
208
209        # Load the configuration from a config file, if provided
210
151
200
        $args = Object::Configure::configure($class, $args);
211
212        # Bless and return the detector object
213        return bless {
214                language      => $language,
215
151
348646
                warn_on_error => $args->{'warn_on_error'} // 0,
216        }, $class;
217}
218
219 - 276
=head2 detect

Scans a free-text string for references to military branches and returns
the localised branch name.

=head3 API Specification

=head4 Input

    {
        text => {
            type => 'string',
        },
    }

=head4 Output

    { type => 'string' }

=head3 Arguments

=over 4

=item * C<text> - The free-text string to scan.  Required.  May be passed
positionally as a single string.

=back

=head3 Returns

A string containing the detected branch name, localised to the language
supplied at construction.  Returns C<'military'> (or its localised
equivalent) when no branch is detected.  Never returns C<undef>.

=head3 Side Effects

If C<warn_on_error> was set true at construction and no branch is detected,
emits a warning via C<carp>.

=head3 Notes

Detection patterns are tried in order of specificity.  The first pattern
to match wins, so C<'Merchant Navy'> is correctly identified as
C<'Merchant Navy'> rather than C<'navy'>.

=head3 Example

    # Named argument form
    my $branch = $detector->detect(
        text => 'He served in the Royal Engineers during the Great War',
    );
    # Returns 'Royal Engineers'

    # Positional form
    my $branch = $detector->detect('Private in the Infantry');
    # Returns 'army'

=cut
277
278sub detect {
279
255
47135
        my $self = shift;
280
281        # Normalise parameters: accept positional text string, hash or hashref
282
255
347
        my $params = Params::Get::get_params('text', \@_);
283
284        # Validate that text is a required string; validate_strict croaks on failure
285
253
2587
        my $validated = validate_strict({
286                description => 'Genealogy::Military::Branch::detect',
287                input       => $params,
288                schema      => $DETECT_SCHEMA,
289        });
290
253
13261
        my $text = $validated->{'text'};
291
253
298
        croak 'Genealogy::Military::Branch::detect: text is required'
292                unless defined $text;
293
294        # Walk each detector in specificity order; the first match wins
295
252
183
        my $branch;
296
252
341
        for my $d (@DETECTORS) {
297                # Each entry has a compiled pattern and an English key for translation
298
1950
15908
                if($text =~ $d->{'pattern'}) {
299
200
1248
                        $branch = $self->_translate($d->{'key'});
300
200
1145
                        last;
301                }
302        }
303
304        # Fall back to the default 'military' key when nothing matched
305
252
560
        unless(defined $branch) {
306                # Optionally alert the caller that no specific branch was identified
307                Carp::carp 'Genealogy::Military::Branch: no military branch detected'
308
52
65
                        if $self->{'warn_on_error'};
309
52
48
                $branch = $self->_translate('military');
310        }
311
312        # Return the validated branch string; guaranteed to be a defined string
313
252
594
        return set_return($branch, { type => 'string' });
314}
315
316# _translate
317#
318# Purpose:
319#   Returns the localised string for an internal branch key, falling back
320#   through language-specific -> English -> bare key.
321#
322# Entry criteria:
323#   $self - a blessed Genealogy::Military::Branch object
324#   $key  - a string matching one of the keys in %TRANSLATIONS{'en'}
325#
326# Exit status:
327#   Returns the localised branch name string.  Never returns undef:
328#   if no translation or English fallback exists, returns $key itself.
329#
330# Side effects:
331#   None.
332#
333# Notes:
334#   The 'en' table is the canonical fallback for all languages.
335#   Keys not present in the language-specific table fall through to
336#   English, allowing partial translation tables (e.g. 'fr' only
337#   translates the most common branches).
338
339sub _translate {
340
255
1947
        my ($self, $key) = @_;
341
342        # Get the cached language code from the object
343
255
690
        my $lang = $self->{'language'} // 'en';
344
345        # Try the language-specific translation first
346
255
332
        if(exists $TRANSLATIONS{$lang} && exists $TRANSLATIONS{$lang}{$key}) {
347
230
1842
                return $TRANSLATIONS{$lang}{$key};
348        }
349
350        # Fall back to English, then the bare key as a last resort
351
25
243
        return $TRANSLATIONS{'en'}{$key} // $key;
352}
353
354# _get_language
355#
356# Purpose:
357#   Determines the system's default language using environment variables.
358#
359# Entry criteria:
360#   None.  Reads environment variables directly.
361#
362# Exit status:
363#   Returns a two-letter language code string e.g. 'en', 'fr', 'de',
364#   or undef if no language can be determined.
365#
366# Side effects:
367#   None.
368#
369# Notes:
370#   Checks in order: I18N::LangTags::Detect, LANGUAGE, LC_ALL,
371#   LC_MESSAGES, LANG environment variables.
372#   Returns 'en' for C locale.
373#   See https://www.gnu.org/software/gettext/manual/html_node/Locale-Environment-Variables.html
374
375sub _get_language {
376        # Try I18N::LangTags::Detect first for most accurate detection
377
148
3759
        for my $tag (I18N::LangTags::Detect::detect()) {
378
80
2068
                if($tag =~ /^([a-z]{2})/i) {
379
79
169
                        return lc($1);
380                }
381        }
382
383        # Fall back to checking environment variables in priority order
384
69
543
        if(($ENV{'LANGUAGE'}) && ($ENV{'LANGUAGE'} =~ /^([a-z]{2})/i)) {
385
1
7
                return lc($1);
386        }
387
68
61
        foreach my $variable ('LC_ALL', 'LC_MESSAGES', 'LANG') {
388
201
161
                my $val = $ENV{$variable};
389
201
170
                next unless defined($val);
390                # Extract the two-letter primary language subtag
391
65
98
                if($val =~ /^([a-z]{2})/i) {
392
58
141
                        return lc($1);
393                }
394        }
395
396        # Handle C locale explicitly - treat as English
397
10
32
        return 'en' if(defined($ENV{'LANG'}) && $ENV{'LANG'} =~ /^C(\.|$)/);
398
399
3
7
        return;
400}
401
402 - 446
=head1 AUTHOR

Nigel Horne C<< <njh@nigelhorne.com> >>

=head1 BUGS

Please report bugs via the GitHub issue tracker:
L<https://github.com/nigelhorne/Genealogy-Military-Branch/issues>

=head1 TODO

=over 4

=item * Add Australian, Canadian and other Commonwealth branch patterns

=item * Add more US-specific patterns (Space Force etc)

=item * Consider a companion C<Genealogy::Military::Rank> module

=back

=head1 SEE ALSO

=over 4

=item * L<Test Dashboard|https://nigelhorne.github.io/Genealogy-Military-Branch/coverage/>

=item * L<Genealogy::Occupation>

=item * L<Params::Get>

=item * L<Params::Validate::Strict>

=item * L<Return::Set>

=back

=head1 LICENSE AND COPYRIGHT

Copyright 2026 Nigel Horne.

This program is released under the following licence: GPL2
If you use it, please let me know.

=cut
447
4481;