File Coverage

File:blib/lib/Genealogy/Occupation.pm
Coverage:98.1%

linestmtbrancondsubtimecode
1package Genealogy::Occupation;
2
3# TODO: railway/railroad = le chemin de fer
4
5
6
6
6
814860
5
78
use strict;
6
6
6
6
10
4
115
use warnings;
7
6
6
39
9
use 5.014;
8
9
6
6
6
8
6
166
use Carp qw(croak);
10
6
6
6
1133
12324
77
use I18N::LangTags::Detect;
11
6
6
6
1084
87319
390
use Lingua::EN::ABC;
12
6
6
6
1022
25900
114
use Params::Get;
13
6
6
6
1212
9967
152
use Readonly;
14
6
6
6
1692
70623
164
use Params::Validate::Strict qw(validate_strict);
15
6
6
6
945
1534
6226
use Return::Set qw(set_return);
16
17our $VERSION = '0.02';
18
19# Schema for new() arguments
20Readonly my $NEW_SCHEMA => {
21        warn_on_error => {
22                type     => 'boolean',
23                optional => 1,
24                default  => 0,
25        },
26};
27
28# Schema for normalise() arguments.
29# Note: occupation is extracted and normalised to an arrayref BEFORE
30# validate_strict is called, because Params::Validate::Strict does not
31# yet support union types.  Only the remaining named arguments (sex)
32# are validated here.
33Readonly my $NORMALISE_SCHEMA => {
34        sex => {
35                type     => 'string',
36                optional => 1,
37                memberof => ['M', 'F'],
38        },
39};
40
41# Occupations to filter out entirely - these are not real occupations
42# but rather descriptions of status or domestic roles
43Readonly my %FILTER => map { lc($_) => 1 } qw(
44        unemployed
45        retired
46);
47
48# Filter patterns - matched case-insensitively against the occupation
49Readonly my @FILTER_PATTERNS => (
50        qr/^scho(?:ol|lar)/i,
51        qr/wife$/i,
52        qr/seeking work/i,
53        qr/domestic duties/i,
54        qr/home duties/i,
55        qr/house\s?hold duties/i,
56        qr/^at school$/i,
57);
58
59# Direct lookup table for exact normalisation matches.
60# Keyed on lowercase occupation string.
61Readonly my %DIRECT => (
62        'ag lab'                          => 'Agricultural Labourer',
63        'ag labourer'                     => 'Agricultural Labourer',
64        'ag labourer pauper'              => 'Agricultural Labourer',
65        'agric labourer'                  => 'Agricultural Labourer',
66        'ag lab pauper'                   => 'Agricultural Labourer',
67        'farm labourer'                   => 'Agricultural Labourer',
68        'agricultural farm labourer'      => 'Agricultural Labourer',
69        'ordinary agricultural labourer'  => 'Agricultural Labourer',
70        'work on farm'                    => 'Agricultural Labourer',
71        'agricultural lab'                => 'Agricultural Labourer',
72        'agril labourer'                  => 'Agricultural Labourer',
73        'labourer ag'                     => 'Agricultural Labourer',
74        'labourer (ag)'                   => 'Agricultural Labourer',
75        'platelayer railway'              => 'Railway Platelayer',
76        'general servant domestic'        => 'Domestic Servant',
77        'domestic servant'                => 'Domestic Servant',
78        'lorry driver heavy worker'       => 'Lorry Driver',
79        'laundry man'                     => 'Laundryman',
80        "brewer's labourer"               => 'Brewery Labourer',
81        'labourer builders'               => "Builder's Labourer",
82        'gardener domestic'               => 'Gardener and Domestic',
83        'gardner and domestic servant'    => 'Gardener and Domestic',  # sic
84        'under gardener domestic'         => 'Domestic Gardener',
85        'domestic under gardner'          => 'Domestic Gardener',      # sic
86        'market gardener'                 => 'Market Gardener',
87        'plate glass cutter'              => 'Plate Glass Cutter',
88        'pfc us army'                     => 'Private First Class',
89        'labourer gas stoker'             => 'Gas Stoker',
90);
91
92# Pattern that matches "general serv*dom*" variants
93Readonly my $GENERAL_SERVANT_RE => qr/^general serv.+dom/i;
94
95# French translations keyed on lowercase English occupation.
96# Values are either a plain string or a hashref with M/F keys
97# for gendered translations.
98Readonly my %FRENCH => (
99        'postman'   => { M => 'Facteur',      F => 'Factrice' },
100        'farmer'    => { M => 'Agriculteur',  F => 'Agricultrice' },
101        'teacher'   => 'Professeur',
102        'nurse'     => { M => 'Infirmier',    F => 'Infirmière' },
103);
104
105# German translations keyed on lowercase English occupation.
106# Values are either a plain string or a hashref with M/F keys.
107Readonly my %GERMAN => (
108    'teacher'    => { M => 'Lehrer',     F => 'Lehrerin' },
109    'farmer'     => { M => 'Bauer',      F => 'Bauerin' },
110    'bus driver' => { M => 'Busfahrer',  F => 'Busfahrerin' },
111    'doctor'     => 'Arzt',   # simplified non-gendered form for fallback
112);
113
114 - 220
=head1 NAME

Genealogy::Occupation - Normalise and translate genealogical occupation strings

=head1 VERSION

Version 0.02

=head1 SYNOPSIS

    use Genealogy::Occupation;

    my $normaliser = Genealogy::Occupation->new();

    my @occupations = $normaliser->normalise(
        occupation => 'Ag Lab',
        sex        => 'M',
    );
    # Returns ('Agricultural Labourer')

    # Or pass an arrayref
    my @more = $normaliser->normalise(
        occupation => ['Ag Lab', 'Ag Lab', 'Retired'],
        sex        => 'M',
    );
    # Returns ('Agricultural Labourer') - deduplicated and filtered

=head1 DESCRIPTION

Normalises occupation strings found in genealogical records, handling
common abbreviations, malformed entries, locale-specific spellings and
translations into French and German.

Designed to handle poor-quality data from genealogy software imports
where occupation strings may be abbreviated, inconsistent or use
archaic terminology.

Processing steps applied in order:

=over 4

=item 1. Filter out non-occupations (Scholar, Retired, Domestic Duties etc)

=item 2. Normalise abbreviations and malformed entries to canonical forms

=item 3. Deduplicate consecutive identical or equivalent entries (compared on pre-translation normalised forms)

=item 4. Apply locale-specific spellings via C<Lingua::EN::ABC>

=item 5. Translate to French or German if system locale requires it

=back

=head1 METHODS

=head2 new

=head3 Purpose

Constructs a new normaliser object.

=head3 API Specification

=head4 Input

    {
        warn_on_error => {
            type     => 'boolean',
            optional => 1,
            default  => 0,
        },
    }

=head4 Output

    { type => 'object', isa => 'Genealogy::Occupation' }

=head3 Arguments

=over 4

=item * C<warn_on_error> - If true, unknown occupations that cannot be
translated will emit a warning via C<carp> rather than silently falling
back to English. Optional, defaults to 0.

=back

=head3 Returns

A blessed C<Genealogy::Occupation> object.

=head3 Side Effects

None.

=head3 Notes

The system locale is detected once at construction time and cached for
the lifetime of the object.

=head3 Example

    my $normaliser = Genealogy::Occupation->new({
        warn_on_error => 1,
    });

=cut
221
222sub new {
223
141
211216
        my $class = shift;
224
225        # Accept both hashref and flat list, defaulting to empty hashref
226        # since all constructor arguments are optional
227
141
209
        my $args = Params::Get::get_params(undef, @_) // {};
228
229        # Validate constructor arguments
230
141
1466
        validate_strict({
231                description => 'Genealogy::Occupation::new',
232                input       => $args,
233                schema      => $NEW_SCHEMA,
234        });
235
236        # Detect and cache the system language at construction time
237
138
7122
        my $language = _get_language();
238
239        return bless {
240
138
398
                warn_on_error => $args->{warn_on_error} // 0,
241                language      => $language,
242        }, $class;
243}
244
245 - 324
=head2 normalise

=head3 Purpose

Normalises one or more occupation strings, applying filtering,
deduplication, abbreviation expansion, locale spelling and
translation in order.

=head3 API Specification

=head4 Input

    {
        occupation => {
            type => ['string', 'arrayref'],
        },
        sex => {
            type     => 'string',
            optional => 1,
            memberof => ['M', 'F'],
        },
    }

=head4 Output

    {
        type         => 'arrayref',
        element_type => 'string',
    }

=head3 Arguments

=over 4

=item * C<occupation> - A single occupation string or an arrayref of
occupation strings. Required.

=item * C<sex> - The sex of the person, C<'M'> or C<'F'>. Optional
but required for correct gendered translations in French and German.
Defaults to C<'M'> if not provided when a gendered translation is
needed.

=back

=head3 Returns

An arrayref of normalised occupation strings. May be empty if all
occupations were filtered out.

=head3 Side Effects

If C<warn_on_error> was set at construction and an occupation cannot
be translated, emits a warning via C<carp>.

=head3 Notes

Deduplication operates across the full list of occupations passed in.
Processing a single occupation at a time will not deduplicate across
multiple calls.

Deduplication compares the pre-translation normalised English forms, not
the translated output.  This means two consecutive identical English
occupations correctly collapse to one entry even in French or German
locales, where the translated results stored in the output array would
otherwise never match the incoming English string.

=head3 Example

    my $result = $normaliser->normalise(
        occupation => ['Ag Lab', 'Ag Lab', 'Retired'],
        sex        => 'M',
    );
    # Returns ['Agricultural Labourer']

    my $result = $normaliser->normalise(
        occupation => 'Platelayer Railway',
    );
    # Returns ['Railway Platelayer']

=cut
325
326sub normalise {
327
233
33966
        my $self = shift;
328
329        # Accept both hashref and flat list
330
233
269
        my $args = Params::Get::get_params(undef, @_);
331
332        # Extract and normalise occupation to an arrayref BEFORE calling
333        # validate_strict.  Params::Validate::Strict does not yet support
334        # union types, so passing an arrayref against a 'string' schema
335        # would die.  We handle the type check here explicitly instead.
336
233
2439
        my $raw = delete $args->{occupation};
337
233
245
        croak 'Genealogy::Occupation::normalise: occupation is required'
338                unless defined $raw;
339
229
249
        my $occupations = ref($raw) eq 'ARRAY' ? $raw : [ $raw ];
340
341        # Validate remaining named arguments (sex) strictly
342
229
392
        validate_strict({
343                description => 'Genealogy::Occupation::normalise',
344                input       => $args,
345                schema      => $NORMALISE_SCHEMA,
346        });
347
348        # Default sex to M if not provided, needed for gendered translations
349
227
12416
        my $sex = $args->{sex} // 'M';
350
351
227
223
        my $language = $self->{language} // 'en';
352
227
164
        my @result;
353        # Track the last normalised English form for deduplication.  We cannot
354        # use $result[-1] for this because @result stores the translated output;
355        # comparing a translated value against a pre-translation English string
356        # means consecutive identical occupations are never deduplicated in
357        # French or German locales.
358
227
144
        my $last_normalised = '';
359
360
227
227
116
187
        foreach my $occupation (@{$occupations}) {
361                # Clean up whitespace and punctuation artifacts
362
1866
1566
                $occupation =~ tr/\r\n/ /;
363
1866
1161
                $occupation =~ s/\.+$//;
364
1866
1224
                $occupation =~ s/[\(\)]//g;
365
1866
1245
                $occupation =~ s/\s\s+/ /g;
366
1866
1266
                $occupation =~ s/\s+$//;
367
1866
1081
                $occupation =~ s/\./;/g;
368
369                # Step 1: filter out non-occupations
370
1866
1963
                next if $FILTER{lc($occupation)};
371
1652
3952
                my $filtered = 0;
372
1652
1338
                foreach my $pattern (@FILTER_PATTERNS) {
373
9670
34159
                        if($occupation =~ $pattern) {
374
423
1064
                                $filtered = 1;
375
423
258
                                last;
376                        }
377                }
378
1652
4807
                next if $filtered;
379
380                # Step 2: normalise the occupation string
381
1229
801
                $occupation = _normalise_single($occupation);
382
1229
996
                next unless length($occupation);
383
384                # Step 3: deduplicate against the previous normalised (pre-translation)
385                # entry, not against $result[-1] which holds the translated form
386
1221
1023
                next if lc($last_normalised) eq lc($occupation);
387
213
130
                $last_normalised = $occupation;
388
389                # Step 4: apply locale-specific spellings for English variants
390
213
174
                if($language eq 'en') {
391
141
101
                        $occupation = _apply_locale($occupation);
392                }
393
394                # Step 5: translate to target language if not English
395
213
3142
                if($language eq 'fr') {
396                        $occupation = _translate_french($occupation, $sex,
397
41
43
                                $self->{warn_on_error});
398                } elsif($language eq 'de') {
399                        $occupation = _translate_german($occupation, $sex,
400
31
38
                                $self->{warn_on_error});
401                }
402
403
213
379
                push @result, ucfirst($occupation);
404        }
405
406
227
409
        return set_return(\@result, { type => 'arrayref', element_type => 'string' });
407}
408
409# _normalise_single
410#
411# Purpose:
412#   Normalises a single occupation string by expanding abbreviations,
413#   fixing malformed entries and applying pattern-based corrections.
414#
415# Entry criteria:
416#   $occupation - a non-empty string, already cleaned of whitespace
417#
418# Exit status:
419#   Returns the normalised occupation string, or empty string if the
420#   entry should be discarded after normalisation.
421#
422# Side effects:
423#   None.
424#
425# Notes:
426#   Checks the direct lookup table first for exact matches,
427#   then applies pattern-based rules for more complex cases.
428#   The "sic" comments mark known data quality issues in real
429#   genealogy records that we intentionally handle.
430
431sub _normalise_single {
432
1254
2157
        my $occupation = shift;
433
434        # Check direct lookup table first for exact matches
435
1254
1308
        if(my $match = $DIRECT{lc($occupation)}) {
436
1078
2813
                return $match;
437        }
438
439        # Handle "general servant domestic" pattern variants
440
176
566
        if($occupation =~ $GENERAL_SERVANT_RE) {
441
3
14
                return 'Domestic Servant';
442        }
443
444        # Remove common suffixes that add no occupational meaning
445
173
433
        $occupation =~ s/\s+own account$//i;
446
173
112
        $occupation =~ s/^formerly //i;
447
173
149
        $occupation =~ s/\s+retired$//i;
448
173
111
        $occupation =~ s/\s+heavy worker$//i;
449
173
113
        $occupation =~ s/\s+own business$//i;
450
173
137
        $occupation =~ s/Labor/Labour/ig;
451
452        # Reorder "X domestic" and "X dom" patterns to "Domestic X"
453
173
393
        if($occupation =~ /^(.+)\s(?:domestic|dom)$/i) {
454
5
11
                return "Domestic $1";
455        }
456
457        # Convert "works on/for X" to "X worker"
458
168
140
        if($occupation =~ /works?\s+(?:on|for)\s+(.+)/i) {
459
4
9
                return "$1 worker";
460        }
461
462        # Convert "Cleaner X" prefix form to "X cleaner"
463
164
149
        if($occupation =~ /^Cleaner\s+(.+)/i) {
464
2
4
                return "$1 cleaner";
465        }
466
467        # Reorder clerk, salesman, foreman, manager patterns
468
162
136
        if($occupation =~ /^Clerk\s+(.+)/i) {
469
2
6
                return "$1 Clerk";
470        }
471
160
126
        if($occupation =~ /^Salesman\s+(.*)/i) {
472
3
6
                return "$1 Salesman";
473        }
474
157
112
        if($occupation =~ /^Foreman\s+(.*)/i) {
475
4
6
                my $of = $1;
476
4
4
                $of =~ s/^of the //i;
477
4
7
                return "$of Foreman";
478        }
479
153
171
        if($occupation =~ /^Manager\s+(.*)/i
480                && $occupation !~ /^Manager of /i
481                && $occupation !~ /Manager & /i) {
482
2
5
                return "$1 Manager";
483        }
484
485        # Convert "Shop Assistant X" to "X's Shop Assistant"
486
151
125
        if($occupation =~ /^Shop Assistant\s+(.+)/i) {
487
2
3
                return "$1's Shop Assistant";
488        }
489
490        # Convert "X Assistant" to "X's Assistant" for known trade forms
491
149
722
        if($occupation =~ /^(.+)\s+Assistant$/i) {
492
7
8
                my $trade = $1;
493
7
8
                if(lc($trade) eq 'bakers') {
494
2
4
                        return "Baker's Assistant";
495                }
496
5
7
                if(lc($trade) eq 'butchers') {
497
2
4
                        return "Butcher's Assistant";
498                }
499
3
6
                unless($trade =~ /'s$/ || lc($trade) eq 'shop') {
500
1
2
                        return "${trade}'s Assistant";
501                }
502        }
503
504        # Convert police* to "police officer"
505
144
132
        if($occupation =~ /police$/i) {
506
3
4
                return "$occupation officer";
507        }
508
509        # Convert pluralised trade forms e.g. "Builders Labourer"
510        # to possessive "Builder's Labourer".
511        #
512        # The regex captures:
513        #   $base ($1) - the word stem without the trailing 's'
514        #   $last ($2) - the final character of $base (used to reconstruct
515        #                the stem; not used in the guard comparison)
516        #   $role ($3) - the following word
517        #
518        # Guard against false positives: "Bus Driver" ($base eq 'Bu') and
519        # "Harness Maker" ($base eq 'Harnes') must not be rewritten.
520        # Compare against $base directly - using "$base$last" is wrong
521        # because it appends the final character a second time.
522
141
542
        if($occupation !~ /gas works/i
523                && $occupation =~ /^(.+([a-z]))s\s+([a-z]+)$/i) {
524
10
18
                my ($base, $last, $role) = ($1, $2, $3);
525
10
19
                unless(lc($base) eq 'bu' || lc($base) eq 'harnes') {
526
2
2
                        return "${base}'s $role";
527                }
528        }
529
530        # Handle "on farm" pattern
531
139
156
        if($occupation =~ /^(.+)\s+on farm$/i) {
532
2
3
                return "$1 on a farm";
533        }
534
535
137
144
        return $occupation;
536}
537
538# _apply_locale
539#
540# Purpose:
541#   Applies locale-specific English spelling variants using Lingua::EN::ABC.
542#   Handles en_US (labour->labor), en_CA, and en_GB (default) variants.
543#
544# Entry criteria:
545#   $occupation - a normalised English occupation string (may be title-cased)
546#
547# Exit status:
548#   Returns the occupation string with locale-appropriate spellings.
549#   Original capitalisation is preserved; Lingua::EN::ABC performs
550#   case-insensitive substitutions and does not require lowercased input.
551#
552# Side effects:
553#   None.
554#
555# Notes:
556#   Reads $ENV{'LANG'} to determine the locale.
557#   Defaults to British English if no locale is detected.
558#   Do NOT pass lc($occupation) here - doing so strips title case that
559#   cannot be fully recovered by ucfirst() alone.
560
561sub _apply_locale {
562
144
2748
        my $occupation = shift;
563
564        # Apply American English spelling variants
565
144
251
        if(defined($ENV{'LANG'}) && ($ENV{'LANG'} =~ /^en_US/)) {
566
2
3
                $occupation = Lingua::EN::ABC::b2a($occupation);
567
2
296
                $occupation =~ s/labour/labor/ig;
568
2
3
                return $occupation;
569        }
570
571        # Apply Canadian English spelling variants
572
142
191
        if(defined($ENV{'LANG'}) && ($ENV{'LANG'} =~ /^en_CA/)) {
573
2
5
                return Lingua::EN::ABC::b2c($occupation);
574        }
575
576        # Default to British English spelling
577
140
151
        return Lingua::EN::ABC::a2b($occupation);
578}
579
580# _translate_french
581#
582# Purpose:
583#   Translates a normalised English occupation string to French,
584#   applying gendered forms where appropriate.
585#
586# Entry criteria:
587#   $occupation    - normalised English occupation string
588#   $sex           - 'M' or 'F'
589#   $warn_on_error - boolean, if true carp on unknown occupation
590#
591# Exit status:
592#   Returns the French occupation string, or the original English
593#   string if no translation is available and warn_on_error is false.
594#
595# Side effects:
596#   Carps if warn_on_error is true and no translation is found.
597#
598# Notes:
599#   Only a subset of occupations have French translations.
600#   The retired/teaching special cases are handled via regex
601#   rather than the lookup table since they modify rather than
602#   replace the occupation string.
603
604sub _translate_french {
605
50
1519
        my ($occupation, $sex, $warn_on_error) = @_;
606
607        # Handle teaching as a special regex case
608
50
54
        if($occupation =~ /teaching/i) {
609
3
4
                return 'professeur';
610        }
611
612        # Handle retired as a suffix replacement
613
47
42
        $occupation =~ s/retired/\x{00E0} la retraite/i;
614
615        # Handle X Farmer pattern
616
47
55
        if($occupation =~ /^(.+)\sFarmer$/i) {
617
6
7
                my $type = $1;
618
6
12
                return $sex eq 'F'
619                        ? "Agricultrice de $type"
620                        : "Agriculteur de $type";
621        }
622
623        # Check the French translation lookup table
624
41
57
        if(my $translation = $FRENCH{lc($occupation)}) {
625
30
116
                if(ref($translation) eq 'HASH') {
626
24
30
                        return $translation->{$sex} // $translation->{'M'};
627                }
628
6
9
                return $translation;
629        }
630
631        # Fall back to English with optional warning
632
11
40
        if($warn_on_error) {
633
5
8
                Carp::carp "Genealogy::Occupation: no French translation for '$occupation'";
634        }
635
636
11
16
        return $occupation;
637}
638
639# _translate_german
640#
641# Purpose:
642#   Translates a normalised English occupation string to German,
643#   applying gendered forms where appropriate.
644#
645# Entry criteria:
646#   $occupation    - normalised English occupation string
647#   $sex           - 'M' or 'F'
648#   $warn_on_error - boolean, if true carp on unknown occupation
649#
650# Exit status:
651#   Returns the German occupation string, or the original English
652#   string if no translation is available and warn_on_error is false.
653#
654# Side effects:
655#   Carps if warn_on_error is true and no translation is found.
656#
657# Notes:
658#   Only a subset of occupations have German translations.
659#   The retired and self-employed special cases are handled via
660#   regex rather than the lookup table.
661
662sub _translate_german {
663
43
1393
        my ($occupation, $sex, $warn_on_error) = @_;
664
665        # Handle teaching as a special regex case
666
43
53
        if($occupation =~ /teaching/i) {
667
5
10
                return $sex eq 'F' ? 'Lehrerin' : 'Lehrer';
668        }
669
670        # Handle X Farmer pattern
671
38
66
        if($occupation =~ /^(.+)\sFarmer$/i) {
672
5
9
                return $sex eq 'F' ? 'Landwirtin' : 'Landwirt';
673        }
674
675        # Handle retired and self-employed as suffix replacements
676
33
28
        $occupation =~ s/retired/im ruhestand/i;
677
33
30
        $occupation =~ s/self-employed/selbstst\x{00E4}ndig/i;
678
679        # Check the German translation lookup table
680
33
48
        if(my $translation = $GERMAN{lc($occupation)}) {
681
21
72
                if(ref($translation) eq 'HASH') {
682
19
23
                        return $translation->{$sex} // $translation->{'M'};
683                }
684
2
2
                return $translation;
685        }
686
687        # Fall back to English with optional warning
688
12
43
        if($warn_on_error) {
689
4
9
                Carp::carp "Genealogy::Occupation: no German translation for '$occupation'";
690        }
691
692
12
17
        return $occupation;
693}
694
695# _get_language
696#
697# Purpose:
698#   Determines the system's default language using environment variables.
699#
700# Entry criteria:
701#   None. Reads environment variables directly.
702#
703# Exit status:
704#   Returns a two-letter language code string e.g. 'en', 'fr', 'de',
705#   or undef if no language can be determined.
706#
707# Side effects:
708#   None.
709#
710# Notes:
711#   Checks in order: I18N::LangTags::Detect, LANGUAGE, LC_ALL,
712#   LC_MESSAGES, LANG environment variables.
713#   Returns 'en' for C locale.
714#   See https://www.gnu.org/software/gettext/manual/html_node/Locale-Environment-Variables.html
715
716sub _get_language {
717        # Try I18N::LangTags::Detect first for most accurate detection
718
147
3440
        for my $tag (I18N::LangTags::Detect::detect()) {
719
65
1667
                if($tag =~ /^([a-z]{2})/i) {
720
64
123
                        return lc($1);
721                }
722        }
723
724        # Fall back to checking environment variables in order
725
83
639
        if(($ENV{'LANGUAGE'}) && ($ENV{'LANGUAGE'} =~ /^([a-z]{2})/i)) {
726
1
2
                return lc($1);
727        }
728
82
61
        foreach my $variable ('LC_ALL', 'LC_MESSAGES', 'LANG') {
729
243
178
                my $val = $ENV{$variable};
730
243
196
                next unless defined($val);
731
79
126
                if($val =~ /^([a-z]{2})/i) {
732
71
98
                        return lc($1);
733                }
734        }
735
736        # Handle C locale explicitly
737
11
33
        return 'en' if(defined($ENV{'LANG'}) && $ENV{'LANG'} =~ /^C(\.|$)/);
738
739
3
4
        return;
740}
741
742 - 786
=head1 AUTHOR

Nigel Horne C<< <njh@bandsman.co.uk> >>

=head1 BUGS

Please report bugs via the GitHub issue tracker:
L<https://github.com/nigelhorne/Genealogy-Occupation/issues>

=head1 TODO

=over 4

=item * Expand French and German translation tables

=item * Add support for additional languages

=item * Add C<normalise_place()> equivalent for occupation place strings

=back

=head1 SEE ALSO

=over 4

=item * L<Test Dashboard|https://nigelhorne.github.io/Genealogy-Occupation/coverage/>

=item * L<Lingua::EN::ABC>

=item * L<Params::Get>

=item * L<Params::Validate::Strict>

=item * L<Return::Set>

=back

=head1 LICENSE AND COPYRIGHT

Copyright 2026 Nigel Horne.

This program is released under the following licence: GPL2
If you use it, please let me know.

=cut
787
7881;