lib/Genealogy/Occupation.pm

Structural Coverage (Approximate)

TER1 (Statement): 100.00%
TER2 (Branch): 100.00%
TER3 (LCSAJ): 100.0% (28/28)
Approximate LCSAJ segments: 97

LCSAJ Legend

● Covered — this LCSAJ path was executed during testing.

● Not covered — this LCSAJ path was never executed. These are the paths to focus on.

Multiple dots on a line indicate that multiple control-flow paths begin at that line. Hovering over any dot shows:

        start → end → jump

start – first line of the linear sequence
end – last line before control flow changes
jump – line execution jumps to next

Uncovered paths show [NOT COVERED] in the tooltip.

Mutant Testing Legend

Survived (tests missed this) Killed (tests detected this) No mutation

1: package Genealogy::Occupation; 2: 3: # TODO: railway/railroad = le chemin de fer 4: 5: use strict; 6: use warnings; 7: use 5.014; 8: 9: use Carp qw(croak); 10: use I18N::LangTags::Detect; 11: use Lingua::EN::ABC; 12: use Params::Get; 13: use Readonly; 14: use Params::Validate::Strict qw(validate_strict); 15: use Return::Set qw(set_return); 16: 17: our $VERSION = '0.02'; 18: 19: # Schema for new() arguments 20: Readonly my $NEW_SCHEMA => { 21: warn_on_error => { 22: type => 'boolean', 23: optional => 1, 24: default => 0, 25: }, 26: }; 27: 28: # Schema for normalise() arguments. 29: # Note: occupation is extracted and normalised to an arrayref BEFORE 30: # validate_strict is called, because Params::Validate::Strict does not 31: # yet support union types. Only the remaining named arguments (sex) 32: # are validated here. 33: Readonly my $NORMALISE_SCHEMA => { 34: sex => { 35: type => 'string', 36: optional => 1, 37: memberof => ['M', 'F'], 38: }, 39: }; 40: 41: # Occupations to filter out entirely - these are not real occupations 42: # but rather descriptions of status or domestic roles 43: Readonly my %FILTER => map { lc($_) => 1 } qw( 44: unemployed 45: retired 46: ); 47: 48: # Filter patterns - matched case-insensitively against the occupation 49: Readonly my @FILTER_PATTERNS => ( 50: qr/^scho(?:ol|lar)/i, 51: qr/wife$/i, 52: qr/seeking work/i, 53: qr/domestic duties/i, 54: qr/home duties/i, 55: qr/house\s?hold duties/i, 56: qr/^at school$/i, 57: ); 58: 59: # Direct lookup table for exact normalisation matches. 60: # Keyed on lowercase occupation string. 61: Readonly my %DIRECT => ( 62: 'ag lab' => 'Agricultural Labourer', 63: 'ag labourer' => 'Agricultural Labourer', 64: 'ag labourer pauper' => 'Agricultural Labourer', 65: 'agric labourer' => 'Agricultural Labourer', 66: 'ag lab pauper' => 'Agricultural Labourer', 67: 'farm labourer' => 'Agricultural Labourer', 68: 'agricultural farm labourer' => 'Agricultural Labourer', 69: 'ordinary agricultural labourer' => 'Agricultural Labourer', 70: 'work on farm' => 'Agricultural Labourer', 71: 'agricultural lab' => 'Agricultural Labourer', 72: 'agril labourer' => 'Agricultural Labourer', 73: 'labourer ag' => 'Agricultural Labourer', 74: 'labourer (ag)' => 'Agricultural Labourer', 75: 'platelayer railway' => 'Railway Platelayer', 76: 'general servant domestic' => 'Domestic Servant', 77: 'domestic servant' => 'Domestic Servant', 78: 'lorry driver heavy worker' => 'Lorry Driver', 79: 'laundry man' => 'Laundryman', 80: "brewer's labourer" => 'Brewery Labourer', 81: 'labourer builders' => "Builder's Labourer", 82: 'gardener domestic' => 'Gardener and Domestic', 83: 'gardner and domestic servant' => 'Gardener and Domestic', # sic 84: 'under gardener domestic' => 'Domestic Gardener', 85: 'domestic under gardner' => 'Domestic Gardener', # sic 86: 'market gardener' => 'Market Gardener', 87: 'plate glass cutter' => 'Plate Glass Cutter', 88: 'pfc us army' => 'Private First Class', 89: 'labourer gas stoker' => 'Gas Stoker', 90: ); 91: 92: # Pattern that matches "general serv*dom*" variants 93: Readonly my $GENERAL_SERVANT_RE => qr/^general serv.+dom/i; 94: 95: # French translations keyed on lowercase English occupation. 96: # Values are either a plain string or a hashref with M/F keys 97: # for gendered translations. 98: Readonly my %FRENCH => ( 99: 'postman' => { M => 'Facteur', F => 'Factrice' }, 100: 'farmer' => { M => 'Agriculteur', F => 'Agricultrice' }, 101: 'teacher' => 'Professeur', 102: 'nurse' => { M => 'Infirmier', F => 'InfirmiÃ¨re' }, 103: ); 104: 105: # German translations keyed on lowercase English occupation. 106: # Values are either a plain string or a hashref with M/F keys. 107: Readonly my %GERMAN => ( 108: 'teacher' => { M => 'Lehrer', F => 'Lehrerin' }, 109: 'farmer' => { M => 'Bauer', F => 'Bauerin' }, 110: 'bus driver' => { M => 'Busfahrer', F => 'Busfahrerin' }, 111: 'doctor' => 'Arzt', # simplified non-gendered form for fallback 112: ); 113: 114: =head1 NAME 115: 116: Genealogy::Occupation - Normalise and translate genealogical occupation strings 117: 118: =head1 VERSION 119: 120: Version 0.02 121: 122: =head1 SYNOPSIS 123: 124: use Genealogy::Occupation; 125: 126: my $normaliser = Genealogy::Occupation->new(); 127: 128: my @occupations = $normaliser->normalise( 129: occupation => 'Ag Lab', 130: sex => 'M', 131: ); 132: # Returns ('Agricultural Labourer') 133: 134: # Or pass an arrayref 135: my @more = $normaliser->normalise( 136: occupation => ['Ag Lab', 'Ag Lab', 'Retired'], 137: sex => 'M', 138: ); 139: # Returns ('Agricultural Labourer') - deduplicated and filtered 140: 141: =head1 DESCRIPTION 142: 143: Normalises occupation strings found in genealogical records, handling 144: common abbreviations, malformed entries, locale-specific spellings and 145: translations into French and German. 146: 147: Designed to handle poor-quality data from genealogy software imports 148: where occupation strings may be abbreviated, inconsistent or use 149: archaic terminology. 150: 151: Processing steps applied in order: 152: 153: =over 4 154: 155: =item 1. Filter out non-occupations (Scholar, Retired, Domestic Duties etc) 156: 157: =item 2. Normalise abbreviations and malformed entries to canonical forms 158: 159: =item 3. Deduplicate consecutive identical or equivalent entries (compared on pre-translation normalised forms) 160: 161: =item 4. Apply locale-specific spellings via C<Lingua::EN::ABC> 162: 163: =item 5. Translate to French or German if system locale requires it 164: 165: =back 166: 167: =head1 METHODS 168: 169: =head2 new 170: 171: =head3 Purpose 172: 173: Constructs a new normaliser object. 174: 175: =head3 API Specification 176: 177: =head4 Input 178: 179: { 180: warn_on_error => { 181: type => 'boolean', 182: optional => 1, 183: default => 0, 184: }, 185: } 186: 187: =head4 Output 188: 189: { type => 'object', isa => 'Genealogy::Occupation' } 190: 191: =head3 Arguments 192: 193: =over 4 194: 195: =item * C<warn_on_error> - If true, unknown occupations that cannot be 196: translated will emit a warning via C<carp> rather than silently falling 197: back to English. Optional, defaults to 0. 198: 199: =back 200: 201: =head3 Returns 202: 203: A blessed C<Genealogy::Occupation> object. 204: 205: =head3 Side Effects 206: 207: None. 208: 209: =head3 Notes 210: 211: The system locale is detected once at construction time and cached for 212: the lifetime of the object. 213: 214: =head3 Example 215: 216: my $normaliser = Genealogy::Occupation->new({ 217: warn_on_error => 1, 218: }); 219: 220: =cut 221: 222: sub new { 223: my $class = shift; 224: 225: # Accept both hashref and flat list, defaulting to empty hashref 226: # since all constructor arguments are optional 227: my $args = Params::Get::get_params(undef, @_) // {}; 228: 229: # Validate constructor arguments 230: validate_strict({ 231: description => 'Genealogy::Occupation::new', 232: input => $args, 233: schema => $NEW_SCHEMA, 234: }); 235: 236: # Detect and cache the system language at construction time 237: my $language = _get_language(); 238:

Mutants (Total: 2, Killed: 2, Survived: 0)

240: warn_on_error => $args->{warn_on_error} // 0, 241: language => $language, 242: }, $class; 243: } 244: 245: =head2 normalise 246: 247: =head3 Purpose 248: 249: Normalises one or more occupation strings, applying filtering, 250: deduplication, abbreviation expansion, locale spelling and 251: translation in order. 252: 253: =head3 API Specification 254: 255: =head4 Input 256: 257: { 258: occupation => { 259: type => ['string', 'arrayref'], 260: }, 261: sex => { 262: type => 'string', 263: optional => 1, 264: memberof => ['M', 'F'], 265: }, 266: } 267: 268: =head4 Output 269: 270: { 271: type => 'arrayref', 272: element_type => 'string', 273: } 274: 275: =head3 Arguments 276: 277: =over 4 278: 279: =item * C<occupation> - A single occupation string or an arrayref of 280: occupation strings. Required. 281: 282: =item * C<sex> - The sex of the person, C<'M'> or C<'F'>. Optional 283: but required for correct gendered translations in French and German. 284: Defaults to C<'M'> if not provided when a gendered translation is 285: needed. 286: 287: =back 288: 289: =head3 Returns 290: 291: An arrayref of normalised occupation strings. May be empty if all 292: occupations were filtered out. 293: 294: =head3 Side Effects 295: 296: If C<warn_on_error> was set at construction and an occupation cannot 297: be translated, emits a warning via C<carp>. 298: 299: =head3 Notes 300: 301: Deduplication operates across the full list of occupations passed in. 302: Processing a single occupation at a time will not deduplicate across 303: multiple calls. 304: 305: Deduplication compares the pre-translation normalised English forms, not 306: the translated output. This means two consecutive identical English 307: occupations correctly collapse to one entry even in French or German 308: locales, where the translated results stored in the output array would 309: otherwise never match the incoming English string. 310: 311: =head3 Example 312: 313: my $result = $normaliser->normalise( 314: occupation => ['Ag Lab', 'Ag Lab', 'Retired'], 315: sex => 'M', 316: ); 317: # Returns ['Agricultural Labourer'] 318: 319: my $result = $normaliser->normalise( 320: occupation => 'Platelayer Railway', 321: ); 322: # Returns ['Railway Platelayer'] 323: 324: =cut 325: 326: sub normalise { ●327 → 360 → 406 327: my $self = shift; 328: 329: # Accept both hashref and flat list 330: my $args = Params::Get::get_params(undef, @_); 331: 332: # Extract and normalise occupation to an arrayref BEFORE calling 333: # validate_strict. Params::Validate::Strict does not yet support 334: # union types, so passing an arrayref against a 'string' schema 335: # would die. We handle the type check here explicitly instead. 336: my $raw = delete $args->{occupation}; 337: croak 'Genealogy::Occupation::normalise: occupation is required' 338: unless defined $raw; 339: my $occupations = ref($raw) eq 'ARRAY' ? $raw : [ $raw ]; 340: 341: # Validate remaining named arguments (sex) strictly 342: validate_strict({ 343: description => 'Genealogy::Occupation::normalise', 344: input => $args, 345: schema => $NORMALISE_SCHEMA, 346: }); 347: 348: # Default sex to M if not provided, needed for gendered translations 349: my $sex = $args->{sex} // 'M'; 350: 351: my $language = $self->{language} // 'en'; 352: my @result; 353: # Track the last normalised English form for deduplication. We cannot 354: # use $result[-1] for this because @result stores the translated output; 355: # comparing a translated value against a pre-translation English string 356: # means consecutive identical occupations are never deduplicated in 357: # French or German locales. 358: my $last_normalised = ''; 359: 360: foreach my $occupation (@{$occupations}) { 361: # Clean up whitespace and punctuation artifacts 362: $occupation =~ tr/\r\n/ /; 363: $occupation =~ s/\.+$//; 364: $occupation =~ s/[]//g; 365: $occupation =~ s/\s\s+/ /g; 366: $occupation =~ s/\s+$//; 367: $occupation =~ s/\./;/g; 368: 369: # Step 1: filter out non-occupations 370: next if $FILTER{lc($occupation)}; 371: my $filtered = 0; 372: foreach my $pattern (@FILTER_PATTERNS) { 373: if($occupation =~ $pattern) {

Mutants (Total: 1, Killed: 1, Survived: 0)

374: $filtered = 1; 375: last; 376: } 377: } 378: next if $filtered; 379: 380: # Step 2: normalise the occupation string 381: $occupation = _normalise_single($occupation); 382: next unless length($occupation); 383: 384: # Step 3: deduplicate against the previous normalised (pre-translation) 385: # entry, not against $result[-1] which holds the translated form 386: next if lc($last_normalised) eq lc($occupation); 387: $last_normalised = $occupation; 388: 389: # Step 4: apply locale-specific spellings for English variants 390: if($language eq 'en') {

Mutants (Total: 1, Killed: 1, Survived: 0)

391: $occupation = _apply_locale($occupation); 392: } 393: 394: # Step 5: translate to target language if not English 395: if($language eq 'fr') {

Mutants (Total: 1, Killed: 1, Survived: 0)

396: $occupation = _translate_french($occupation, $sex, 397: $self->{warn_on_error}); 398: } elsif($language eq 'de') { 399: $occupation = _translate_german($occupation, $sex, 400: $self->{warn_on_error}); 401: } 402: 403: push @result, ucfirst($occupation); 404: } 405: 406: return set_return(\@result, { type => 'arrayref', element_type => 'string' });

Mutants (Total: 2, Killed: 2, Survived: 0)

407: } 408: 409: # _normalise_single 410: # 411: # Purpose: 412: # Normalises a single occupation string by expanding abbreviations, 413: # fixing malformed entries and applying pattern-based corrections. 414: # 415: # Entry criteria: 416: # $occupation - a non-empty string, already cleaned of whitespace 417: # 418: # Exit status: 419: # Returns the normalised occupation string, or empty string if the 420: # entry should be discarded after normalisation. 421: # 422: # Side effects: 423: # None. 424: # 425: # Notes: 426: # Checks the direct lookup table first for exact matches, 427: # then applies pattern-based rules for more complex cases. 428: # The "sic" comments mark known data quality issues in real 429: # genealogy records that we intentionally handle. 430: 431: sub _normalise_single { ●432 → 435 → 440 432: my $occupation = shift; 433: 434: # Check direct lookup table first for exact matches 435: if(my $match = $DIRECT{lc($occupation)}) {

Mutants (Total: 1, Killed: 1, Survived: 0)

436: return $match;

Mutants (Total: 2, Killed: 2, Survived: 0)

437: } 438: 439: # Handle "general servant domestic" pattern variants ●440 → 440 → 445 440: if($occupation =~ $GENERAL_SERVANT_RE) {

Mutants (Total: 1, Killed: 1, Survived: 0)

441: return 'Domestic Servant';

Mutants (Total: 2, Killed: 2, Survived: 0)

442: } 443: 444: # Remove common suffixes that add no occupational meaning ●445 → 453 → 458 445: $occupation =~ s/\s+own account$//i; 446: $occupation =~ s/^formerly //i; 447: $occupation =~ s/\s+retired$//i; 448: $occupation =~ s/\s+heavy worker$//i; 449: $occupation =~ s/\s+own business$//i; 450: $occupation =~ s/Labor/Labour/ig; 451: 452: # Reorder "X domestic" and "X dom" patterns to "Domestic X" 453: if($occupation =~ /^(.+)\s(?:domestic|dom)$/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

454: return "Domestic $1";

Mutants (Total: 2, Killed: 2, Survived: 0)

455: } 456: 457: # Convert "works on/for X" to "X worker" ●458 → 458 → 463 458: if($occupation =~ /works?\s+(?:on|for)\s+(.+)/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

459: return "$1 worker";

Mutants (Total: 2, Killed: 2, Survived: 0)

460: } 461: 462: # Convert "Cleaner X" prefix form to "X cleaner" ●463 → 463 → 468 463: if($occupation =~ /^Cleaner\s+(.+)/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

464: return "$1 cleaner";

Mutants (Total: 2, Killed: 2, Survived: 0)

465: } 466: 467: # Reorder clerk, salesman, foreman, manager patterns ●468 → 468 → 471 468: if($occupation =~ /^Clerk\s+(.+)/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

469: return "$1 Clerk";

Mutants (Total: 2, Killed: 2, Survived: 0)

470: } ●471 → 471 → 474 471: if($occupation =~ /^Salesman\s+(.*)/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

472: return "$1 Salesman";

Mutants (Total: 2, Killed: 2, Survived: 0)

473: } ●474 → 474 → 479 474: if($occupation =~ /^Foreman\s+(.*)/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

475: my $of = $1; 476: $of =~ s/^of the //i; 477: return "$of Foreman";

Mutants (Total: 2, Killed: 2, Survived: 0)

478: } ●479 → 479 → 486 479: if($occupation =~ /^Manager\s+(.*)/i

Mutants (Total: 1, Killed: 1, Survived: 0)

480: && $occupation !~ /^Manager of /i 481: && $occupation !~ /Manager & /i) { 482: return "$1 Manager";

Mutants (Total: 2, Killed: 2, Survived: 0)

483: } 484: 485: # Convert "Shop Assistant X" to "X's Shop Assistant" ●486 → 486 → 491 486: if($occupation =~ /^Shop Assistant\s+(.+)/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

487: return "$1's Shop Assistant";

Mutants (Total: 2, Killed: 2, Survived: 0)

488: } 489: 490: # Convert "X Assistant" to "X's Assistant" for known trade forms ●491 → 491 → 505 491: if($occupation =~ /^(.+)\s+Assistant$/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

492: my $trade = $1; 493: if(lc($trade) eq 'bakers') {

Mutants (Total: 1, Killed: 1, Survived: 0)

494: return "Baker's Assistant";

Mutants (Total: 2, Killed: 2, Survived: 0)

495: } 496: if(lc($trade) eq 'butchers') {

Mutants (Total: 1, Killed: 1, Survived: 0)

497: return "Butcher's Assistant";

Mutants (Total: 2, Killed: 2, Survived: 0)

498: } 499: unless($trade =~ /'s$/ || lc($trade) eq 'shop') {

Mutants (Total: 1, Killed: 1, Survived: 0)

500: return "${trade}'s Assistant";

Mutants (Total: 2, Killed: 2, Survived: 0)

501: } 502: } 503: 504: # Convert police* to "police officer" ●505 → 505 → 522 505: if($occupation =~ /police$/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

506: return "$occupation officer";

Mutants (Total: 2, Killed: 2, Survived: 0)

507: } 508: 509: # Convert pluralised trade forms e.g. "Builders Labourer" 510: # to possessive "Builder's Labourer". 511: # 512: # The regex captures: 513: # $base ($1) - the word stem without the trailing 's' 514: # $last ($2) - the final character of $base (used to reconstruct 515: # the stem; not used in the guard comparison) 516: # $role ($3) - the following word 517: # 518: # Guard against false positives: "Bus Driver" ($base eq 'Bu') and 519: # "Harness Maker" ($base eq 'Harnes') must not be rewritten. 520: # Compare against $base directly - using "$base$last" is wrong 521: # because it appends the final character a second time. ●522 → 522 → 531 522: if($occupation !~ /gas works/i

Mutants (Total: 1, Killed: 1, Survived: 0)

523: && $occupation =~ /^(.+([a-z]))s\s+([a-z]+)$/i) { 524: my ($base, $last, $role) = ($1, $2, $3); 525: unless(lc($base) eq 'bu' || lc($base) eq 'harnes') {

Mutants (Total: 1, Killed: 1, Survived: 0)

526: return "${base}'s $role";

Mutants (Total: 2, Killed: 2, Survived: 0)

527: } 528: } 529: 530: # Handle "on farm" pattern ●531 → 531 → 535 531: if($occupation =~ /^(.+)\s+on farm$/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

532: return "$1 on a farm";

Mutants (Total: 2, Killed: 2, Survived: 0)

533: } 534: 535: return $occupation;

Mutants (Total: 2, Killed: 2, Survived: 0)

536: } 537: 538: # _apply_locale 539: # 540: # Purpose: 541: # Applies locale-specific English spelling variants using Lingua::EN::ABC. 542: # Handles en_US (labour->labor), en_CA, and en_GB (default) variants. 543: # 544: # Entry criteria: 545: # $occupation - a normalised English occupation string (may be title-cased) 546: # 547: # Exit status: 548: # Returns the occupation string with locale-appropriate spellings. 549: # Original capitalisation is preserved; Lingua::EN::ABC performs 550: # case-insensitive substitutions and does not require lowercased input. 551: # 552: # Side effects: 553: # None. 554: # 555: # Notes: 556: # Reads $ENV{'LANG'} to determine the locale. 557: # Defaults to British English if no locale is detected. 558: # Do NOT pass lc($occupation) here - doing so strips title case that 559: # cannot be fully recovered by ucfirst() alone. 560: 561: sub _apply_locale { ●562 → 565 → 572 562: my $occupation = shift; 563: 564: # Apply American English spelling variants 565: if(defined($ENV{'LANG'}) && ($ENV{'LANG'} =~ /^en_US/)) {

Mutants (Total: 1, Killed: 1, Survived: 0)

566: $occupation = Lingua::EN::ABC::b2a($occupation); 567: $occupation =~ s/labour/labor/ig; 568: return $occupation;

Mutants (Total: 2, Killed: 2, Survived: 0)

569: } 570: 571: # Apply Canadian English spelling variants ●572 → 572 → 577 572: if(defined($ENV{'LANG'}) && ($ENV{'LANG'} =~ /^en_CA/)) {

Mutants (Total: 1, Killed: 1, Survived: 0)

573: return Lingua::EN::ABC::b2c($occupation);

Mutants (Total: 2, Killed: 2, Survived: 0)

574: } 575: 576: # Default to British English spelling 577: return Lingua::EN::ABC::a2b($occupation);

Mutants (Total: 2, Killed: 2, Survived: 0)

578: } 579: 580: # _translate_french 581: # 582: # Purpose: 583: # Translates a normalised English occupation string to French, 584: # applying gendered forms where appropriate. 585: # 586: # Entry criteria: 587: # $occupation - normalised English occupation string 588: # $sex - 'M' or 'F' 589: # $warn_on_error - boolean, if true carp on unknown occupation 590: # 591: # Exit status: 592: # Returns the French occupation string, or the original English 593: # string if no translation is available and warn_on_error is false. 594: # 595: # Side effects: 596: # Carps if warn_on_error is true and no translation is found. 597: # 598: # Notes: 599: # Only a subset of occupations have French translations. 600: # The retired/teaching special cases are handled via regex 601: # rather than the lookup table since they modify rather than 602: # replace the occupation string. 603: 604: sub _translate_french { ●605 → 608 → 613 605: my ($occupation, $sex, $warn_on_error) = @_; 606: 607: # Handle teaching as a special regex case 608: if($occupation =~ /teaching/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

609: return 'professeur';

Mutants (Total: 2, Killed: 2, Survived: 0)

610: } 611: 612: # Handle retired as a suffix replacement ●613 → 616 → 624 613: $occupation =~ s/retired/\x{00E0} la retraite/i; 614: 615: # Handle X Farmer pattern 616: if($occupation =~ /^(.+)\sFarmer$/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

617: my $type = $1; 618: return $sex eq 'F'

Mutants (Total: 2, Killed: 2, Survived: 0)

619: ? "Agricultrice de $type" 620: : "Agriculteur de $type"; 621: } 622: 623: # Check the French translation lookup table ●624 → 624 → 632 624: if(my $translation = $FRENCH{lc($occupation)}) {

Mutants (Total: 1, Killed: 1, Survived: 0)

625: if(ref($translation) eq 'HASH') {

Mutants (Total: 1, Killed: 1, Survived: 0)

626: return $translation->{$sex} // $translation->{'M'};

Mutants (Total: 2, Killed: 2, Survived: 0)

627: } 628: return $translation;

Mutants (Total: 2, Killed: 2, Survived: 0)

629: } 630: 631: # Fall back to English with optional warning ●632 → 632 → 636 632: if($warn_on_error) {

Mutants (Total: 1, Killed: 1, Survived: 0)

633: Carp::carp "Genealogy::Occupation: no French translation for '$occupation'"; 634: } 635: 636: return $occupation;

Mutants (Total: 2, Killed: 2, Survived: 0)

637: } 638: 639: # _translate_german 640: # 641: # Purpose: 642: # Translates a normalised English occupation string to German, 643: # applying gendered forms where appropriate. 644: # 645: # Entry criteria: 646: # $occupation - normalised English occupation string 647: # $sex - 'M' or 'F' 648: # $warn_on_error - boolean, if true carp on unknown occupation 649: # 650: # Exit status: 651: # Returns the German occupation string, or the original English 652: # string if no translation is available and warn_on_error is false. 653: # 654: # Side effects: 655: # Carps if warn_on_error is true and no translation is found. 656: # 657: # Notes: 658: # Only a subset of occupations have German translations. 659: # The retired and self-employed special cases are handled via 660: # regex rather than the lookup table. 661: 662: sub _translate_german { ●663 → 666 → 671 663: my ($occupation, $sex, $warn_on_error) = @_; 664: 665: # Handle teaching as a special regex case 666: if($occupation =~ /teaching/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

667: return $sex eq 'F' ? 'Lehrerin' : 'Lehrer';

Mutants (Total: 2, Killed: 2, Survived: 0)

668: } 669: 670: # Handle X Farmer pattern ●671 → 671 → 676 671: if($occupation =~ /^(.+)\sFarmer$/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

672: return $sex eq 'F' ? 'Landwirtin' : 'Landwirt';

Mutants (Total: 2, Killed: 2, Survived: 0)

673: } 674: 675: # Handle retired and self-employed as suffix replacements ●676 → 680 → 688 676: $occupation =~ s/retired/im ruhestand/i; 677: $occupation =~ s/self-employed/selbstst\x{00E4}ndig/i; 678: 679: # Check the German translation lookup table 680: if(my $translation = $GERMAN{lc($occupation)}) {

Mutants (Total: 1, Killed: 1, Survived: 0)

681: if(ref($translation) eq 'HASH') {

Mutants (Total: 1, Killed: 1, Survived: 0)

682: return $translation->{$sex} // $translation->{'M'};

Mutants (Total: 2, Killed: 2, Survived: 0)

683: } 684: return $translation;

Mutants (Total: 2, Killed: 2, Survived: 0)

685: } 686: 687: # Fall back to English with optional warning ●688 → 688 → 692 688: if($warn_on_error) {

Mutants (Total: 1, Killed: 1, Survived: 0)

689: Carp::carp "Genealogy::Occupation: no German translation for '$occupation'"; 690: } 691: 692: return $occupation;

Mutants (Total: 2, Killed: 2, Survived: 0)

693: } 694: 695: # _get_language 696: # 697: # Purpose: 698: # Determines the system's default language using environment variables. 699: # 700: # Entry criteria: 701: # None. Reads environment variables directly. 702: # 703: # Exit status: 704: # Returns a two-letter language code string e.g. 'en', 'fr', 'de', 705: # or undef if no language can be determined. 706: # 707: # Side effects: 708: # None. 709: # 710: # Notes: 711: # Checks in order: I18N::LangTags::Detect, LANGUAGE, LC_ALL, 712: # LC_MESSAGES, LANG environment variables. 713: # Returns 'en' for C locale. 714: # See https://www.gnu.org/software/gettext/manual/html_node/Locale-Environment-Variables.html 715: 716: sub _get_language { 717: # Try I18N::LangTags::Detect first for most accurate detection ●718 → 718 → 725 718: for my $tag (I18N::LangTags::Detect::detect()) { 719: if($tag =~ /^([a-z]{2})/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

720: return lc($1);

Mutants (Total: 2, Killed: 2, Survived: 0)

721: } 722: } 723: 724: # Fall back to checking environment variables in order ●725 → 725 → 728 725: if(($ENV{'LANGUAGE'}) && ($ENV{'LANGUAGE'} =~ /^([a-z]{2})/i)) {

Mutants (Total: 1, Killed: 1, Survived: 0)

726: return lc($1);

Mutants (Total: 2, Killed: 2, Survived: 0)

727: } ●728 → 728 → 737 728: foreach my $variable ('LC_ALL', 'LC_MESSAGES', 'LANG') { 729: my $val = $ENV{$variable}; 730: next unless defined($val); 731: if($val =~ /^([a-z]{2})/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

732: return lc($1);

Mutants (Total: 2, Killed: 2, Survived: 0)

733: } 734: } 735: 736: # Handle C locale explicitly 737: return 'en' if(defined($ENV{'LANG'}) && $ENV{'LANG'} =~ /^C(\.|$)/);

Mutants (Total: 2, Killed: 2, Survived: 0)

738: 739: return; 740: } 741: 742: =head1 AUTHOR 743: 744: Nigel Horne C<< <njh@bandsman.co.uk> >> 745: 746: =head1 BUGS 747: 748: Please report bugs via the GitHub issue tracker: 749: L<https://github.com/nigelhorne/Genealogy-Occupation/issues> 750: 751: =head1 TODO 752: 753: =over 4 754: 755: =item * Expand French and German translation tables 756: 757: =item * Add support for additional languages 758: 759: =item * Add C<normalise_place()> equivalent for occupation place strings 760: 761: =back 762: 763: =head1 SEE ALSO 764: 765: =over 4 766: 767: =item * L<Test Dashboard|https://nigelhorne.github.io/Genealogy-Occupation/coverage/> 768: 769: =item * L<Lingua::EN::ABC> 770: 771: =item * L<Params::Get> 772: 773: =item * L<Params::Validate::Strict> 774: 775: =item * L<Return::Set> 776: 777: =back 778: 779: =head1 LICENSE AND COPYRIGHT 780: 781: Copyright 2026 Nigel Horne. 782: 783: This program is released under the following licence: GPL2 784: If you use it, please let me know. 785: 786: =cut 787: 788: 1;