lib/Genealogy/Occupation.pm

Structural Coverage (Approximate)

TER1 (Statement): 100.00%
TER2 (Branch): 100.00%
TER3 (LCSAJ): 100.0% (62/62)
Approximate LCSAJ segments: 97

LCSAJ Legend

Covered — this LCSAJ path was executed during testing.

Not covered — this LCSAJ path was never executed. These are the paths to focus on.

Multiple dots on a line indicate that multiple control-flow paths begin at that line. Hovering over any dot shows:

        start → end → jump
        

Uncovered paths show [NOT COVERED] in the tooltip.

Mutant Testing Legend

Survived (tests missed this) Killed (tests detected this) No mutation
    1: package Genealogy::Occupation;
    2: 
    3: # TODO: railway/railroad = le chemin de fer
    4: 
    5: use strict;
    6: use warnings;
    7: use 5.014;
    8: 
    9: use Carp qw(croak);
   10: use I18N::LangTags::Detect;
   11: use Lingua::EN::ABC;
   12: use Params::Get;
   13: use Readonly;
   14: use Params::Validate::Strict qw(validate_strict);
   15: use Return::Set qw(set_return);
   16: 
   17: our $VERSION = '0.02';
   18: 
   19: # Schema for new() arguments
   20: Readonly my $NEW_SCHEMA => {
   21: 	warn_on_error => {
   22: 		type     => 'boolean',
   23: 		optional => 1,
   24: 		default  => 0,
   25: 	},
   26: };
   27: 
   28: # Schema for normalise() arguments.
   29: # Note: occupation is extracted and normalised to an arrayref BEFORE
   30: # validate_strict is called, because Params::Validate::Strict does not
   31: # yet support union types.  Only the remaining named arguments (sex)
   32: # are validated here.
   33: Readonly my $NORMALISE_SCHEMA => {
   34: 	sex => {
   35: 		type     => 'string',
   36: 		optional => 1,
   37: 		memberof => ['M', 'F'],
   38: 	},
   39: };
   40: 
   41: # Occupations to filter out entirely - these are not real occupations
   42: # but rather descriptions of status or domestic roles
   43: Readonly my %FILTER => map { lc($_) => 1 } qw(
   44: 	unemployed
   45: 	retired
   46: );
   47: 
   48: # Filter patterns - matched case-insensitively against the occupation
   49: Readonly my @FILTER_PATTERNS => (
   50: 	qr/^scho(?:ol|lar)/i,
   51: 	qr/wife$/i,
   52: 	qr/seeking work/i,
   53: 	qr/domestic duties/i,
   54: 	qr/home duties/i,
   55: 	qr/house\s?hold duties/i,
   56: 	qr/^at school$/i,
   57: );
   58: 
   59: # Direct lookup table for exact normalisation matches.
   60: # Keyed on lowercase occupation string.
   61: Readonly my %DIRECT => (
   62: 	'ag lab'                          => 'Agricultural Labourer',
   63: 	'ag labourer'                     => 'Agricultural Labourer',
   64: 	'ag labourer pauper'              => 'Agricultural Labourer',
   65: 	'agric labourer'                  => 'Agricultural Labourer',
   66: 	'ag lab pauper'                   => 'Agricultural Labourer',
   67: 	'farm labourer'                   => 'Agricultural Labourer',
   68: 	'agricultural farm labourer'      => 'Agricultural Labourer',
   69: 	'ordinary agricultural labourer'  => 'Agricultural Labourer',
   70: 	'work on farm'                    => 'Agricultural Labourer',
   71: 	'agricultural lab'                => 'Agricultural Labourer',
   72: 	'agril labourer'                  => 'Agricultural Labourer',
   73: 	'labourer ag'                     => 'Agricultural Labourer',
   74: 	'labourer (ag)'                   => 'Agricultural Labourer',
   75: 	'platelayer railway'              => 'Railway Platelayer',
   76: 	'general servant domestic'        => 'Domestic Servant',
   77: 	'domestic servant'                => 'Domestic Servant',
   78: 	'lorry driver heavy worker'       => 'Lorry Driver',
   79: 	'laundry man'                     => 'Laundryman',
   80: 	"brewer's labourer"               => 'Brewery Labourer',
   81: 	'labourer builders'               => "Builder's Labourer",
   82: 	'gardener domestic'               => 'Gardener and Domestic',
   83: 	'gardner and domestic servant'    => 'Gardener and Domestic',  # sic
   84: 	'under gardener domestic'         => 'Domestic Gardener',
   85: 	'domestic under gardner'          => 'Domestic Gardener',      # sic
   86: 	'market gardener'                 => 'Market Gardener',
   87: 	'plate glass cutter'              => 'Plate Glass Cutter',
   88: 	'pfc us army'                     => 'Private First Class',
   89: 	'labourer gas stoker'             => 'Gas Stoker',
   90: );
   91: 
   92: # Pattern that matches "general serv*dom*" variants
   93: Readonly my $GENERAL_SERVANT_RE => qr/^general serv.+dom/i;
   94: 
   95: # French translations keyed on lowercase English occupation.
   96: # Values are either a plain string or a hashref with M/F keys
   97: # for gendered translations.
   98: Readonly my %FRENCH => (
   99: 	'postman'   => { M => 'Facteur',      F => 'Factrice' },
  100: 	'farmer'    => { M => 'Agriculteur',  F => 'Agricultrice' },
  101: 	'teacher'   => 'Professeur',
  102: 	'nurse'     => { M => 'Infirmier',    F => 'Infirmière' },
  103: );
  104: 
  105: # German translations keyed on lowercase English occupation.
  106: # Values are either a plain string or a hashref with M/F keys.
  107: Readonly my %GERMAN => (
  108:     'teacher'    => { M => 'Lehrer',     F => 'Lehrerin' },
  109:     'farmer'     => { M => 'Bauer',      F => 'Bauerin' },
  110:     'bus driver' => { M => 'Busfahrer',  F => 'Busfahrerin' },
  111:     'doctor'     => 'Arzt',   # simplified non-gendered form for fallback
  112: );
  113: 
  114: =head1 NAME
  115: 
  116: Genealogy::Occupation - Normalise and translate genealogical occupation strings
  117: 
  118: =head1 VERSION
  119: 
  120: Version 0.02
  121: 
  122: =head1 SYNOPSIS
  123: 
  124:     use Genealogy::Occupation;
  125: 
  126:     my $normaliser = Genealogy::Occupation->new();
  127: 
  128:     my @occupations = $normaliser->normalise(
  129:         occupation => 'Ag Lab',
  130:         sex        => 'M',
  131:     );
  132:     # Returns ('Agricultural Labourer')
  133: 
  134:     # Or pass an arrayref
  135:     my @more = $normaliser->normalise(
  136:         occupation => ['Ag Lab', 'Ag Lab', 'Retired'],
  137:         sex        => 'M',
  138:     );
  139:     # Returns ('Agricultural Labourer') - deduplicated and filtered
  140: 
  141: =head1 DESCRIPTION
  142: 
  143: Normalises occupation strings found in genealogical records, handling
  144: common abbreviations, malformed entries, locale-specific spellings and
  145: translations into French and German.
  146: 
  147: Designed to handle poor-quality data from genealogy software imports
  148: where occupation strings may be abbreviated, inconsistent or use
  149: archaic terminology.
  150: 
  151: Processing steps applied in order:
  152: 
  153: =over 4
  154: 
  155: =item 1. Filter out non-occupations (Scholar, Retired, Domestic Duties etc)
  156: 
  157: =item 2. Normalise abbreviations and malformed entries to canonical forms
  158: 
  159: =item 3. Deduplicate consecutive identical or equivalent entries (compared on pre-translation normalised forms)
  160: 
  161: =item 4. Apply locale-specific spellings via C<Lingua::EN::ABC>
  162: 
  163: =item 5. Translate to French or German if system locale requires it
  164: 
  165: =back
  166: 
  167: =head1 METHODS
  168: 
  169: =head2 new
  170: 
  171: =head3 Purpose
  172: 
  173: Constructs a new normaliser object.
  174: 
  175: =head3 API Specification
  176: 
  177: =head4 Input
  178: 
  179:     {
  180:         warn_on_error => {
  181:             type     => 'boolean',
  182:             optional => 1,
  183:             default  => 0,
  184:         },
  185:     }
  186: 
  187: =head4 Output
  188: 
  189:     { type => 'object', isa => 'Genealogy::Occupation' }
  190: 
  191: =head3 Arguments
  192: 
  193: =over 4
  194: 
  195: =item * C<warn_on_error> - If true, unknown occupations that cannot be
  196: translated will emit a warning via C<carp> rather than silently falling
  197: back to English. Optional, defaults to 0.
  198: 
  199: =back
  200: 
  201: =head3 Returns
  202: 
  203: A blessed C<Genealogy::Occupation> object.
  204: 
  205: =head3 Side Effects
  206: 
  207: None.
  208: 
  209: =head3 Notes
  210: 
  211: The system locale is detected once at construction time and cached for
  212: the lifetime of the object.
  213: 
  214: =head3 Example
  215: 
  216:     my $normaliser = Genealogy::Occupation->new({
  217:         warn_on_error => 1,
  218:     });
  219: 
  220: =cut
  221: 
  222: sub new {
  223: 	my $class = shift;
  224: 
  225: 	# Accept both hashref and flat list, defaulting to empty hashref
  226: 	# since all constructor arguments are optional
  227: 	my $args = Params::Get::get_params(undef, @_) // {};
  228: 
  229: 	# Validate constructor arguments
  230: 	validate_strict({
  231: 		description => 'Genealogy::Occupation::new',
  232: 		input       => $args,
  233: 		schema      => $NEW_SCHEMA,
  234: 	});
  235: 
  236: 	# Detect and cache the system language at construction time
  237: 	my $language = _get_language();
  238: 
  239: 	return bless {
  240: 		warn_on_error => $args->{warn_on_error} // 0,
  241: 		language      => $language,
  242: 	}, $class;
  243: }
  244: 
  245: =head2 normalise
  246: 
  247: =head3 Purpose
  248: 
  249: Normalises one or more occupation strings, applying filtering,
  250: deduplication, abbreviation expansion, locale spelling and
  251: translation in order.
  252: 
  253: =head3 API Specification
  254: 
  255: =head4 Input
  256: 
  257:     {
  258:         occupation => {
  259:             type => ['string', 'arrayref'],
  260:         },
  261:         sex => {
  262:             type     => 'string',
  263:             optional => 1,
  264:             memberof => ['M', 'F'],
  265:         },
  266:     }
  267: 
  268: =head4 Output
  269: 
  270:     {
  271:         type         => 'arrayref',
  272:         element_type => 'string',
  273:     }
  274: 
  275: =head3 Arguments
  276: 
  277: =over 4
  278: 
  279: =item * C<occupation> - A single occupation string or an arrayref of
  280: occupation strings. Required.
  281: 
  282: =item * C<sex> - The sex of the person, C<'M'> or C<'F'>. Optional
  283: but required for correct gendered translations in French and German.
  284: Defaults to C<'M'> if not provided when a gendered translation is
  285: needed.
  286: 
  287: =back
  288: 
  289: =head3 Returns
  290: 
  291: An arrayref of normalised occupation strings. May be empty if all
  292: occupations were filtered out.
  293: 
  294: =head3 Side Effects
  295: 
  296: If C<warn_on_error> was set at construction and an occupation cannot
  297: be translated, emits a warning via C<carp>.
  298: 
  299: =head3 Notes
  300: 
  301: Deduplication operates across the full list of occupations passed in.
  302: Processing a single occupation at a time will not deduplicate across
  303: multiple calls.
  304: 
  305: Deduplication compares the pre-translation normalised English forms, not
  306: the translated output.  This means two consecutive identical English
  307: occupations correctly collapse to one entry even in French or German
  308: locales, where the translated results stored in the output array would
  309: otherwise never match the incoming English string.
  310: 
  311: =head3 Example
  312: 
  313:     my $result = $normaliser->normalise(
  314:         occupation => ['Ag Lab', 'Ag Lab', 'Retired'],
  315:         sex        => 'M',
  316:     );
  317:     # Returns ['Agricultural Labourer']
  318: 
  319:     my $result = $normaliser->normalise(
  320:         occupation => 'Platelayer Railway',
  321:     );
  322:     # Returns ['Railway Platelayer']
  323: 
  324: =cut
  325: 
  326: sub normalise {
327 → 360 → 406327 → 360 → 0  327: 	my $self = shift;
  328: 
  329: 	# Accept both hashref and flat list
  330: 	my $args = Params::Get::get_params(undef, @_);
  331: 
  332: 	# Extract and normalise occupation to an arrayref BEFORE calling
  333: 	# validate_strict.  Params::Validate::Strict does not yet support
  334: 	# union types, so passing an arrayref against a 'string' schema
  335: 	# would die.  We handle the type check here explicitly instead.
  336: 	my $raw = delete $args->{occupation};
  337: 	croak 'Genealogy::Occupation::normalise: occupation is required'
  338: 		unless defined $raw;
  339: 	my $occupations = ref($raw) eq 'ARRAY' ? $raw : [ $raw ];
  340: 
  341: 	# Validate remaining named arguments (sex) strictly
  342: 	validate_strict({
  343: 		description => 'Genealogy::Occupation::normalise',
  344: 		input       => $args,
  345: 		schema      => $NORMALISE_SCHEMA,
  346: 	});
  347: 
  348: 	# Default sex to M if not provided, needed for gendered translations
  349: 	my $sex = $args->{sex} // 'M';
  350: 
  351: 	my $language = $self->{language} // 'en';
  352: 	my @result;
  353: 	# Track the last normalised English form for deduplication.  We cannot
  354: 	# use $result[-1] for this because @result stores the translated output;
  355: 	# comparing a translated value against a pre-translation English string
  356: 	# means consecutive identical occupations are never deduplicated in
  357: 	# French or German locales.
  358: 	my $last_normalised = '';
  359: 
  360: 	foreach my $occupation (@{$occupations}) {
  361: 		# Clean up whitespace and punctuation artifacts
  362: 		$occupation =~ tr/\r\n/ /;
  363: 		$occupation =~ s/\.+$//;
  364: 		$occupation =~ s/[\(\)]//g;
  365: 		$occupation =~ s/\s\s+/ /g;
  366: 		$occupation =~ s/\s+$//;
  367: 		$occupation =~ s/\./;/g;
  368: 
  369: 		# Step 1: filter out non-occupations
  370: 		next if $FILTER{lc($occupation)};
  371: 		my $filtered = 0;
  372: 		foreach my $pattern (@FILTER_PATTERNS) {
  373: 			if($occupation =~ $pattern) {

Mutants (Total: 1, Killed: 1, Survived: 0)

374: $filtered = 1; 375: last; 376: } 377: } 378: next if $filtered; 379: 380: # Step 2: normalise the occupation string 381: $occupation = _normalise_single($occupation); 382: next unless length($occupation); 383: 384: # Step 3: deduplicate against the previous normalised (pre-translation) 385: # entry, not against $result[-1] which holds the translated form 386: next if lc($last_normalised) eq lc($occupation); 387: $last_normalised = $occupation; 388: 389: # Step 4: apply locale-specific spellings for English variants 390: if($language eq 'en') {

Mutants (Total: 1, Killed: 1, Survived: 0)

391: $occupation = _apply_locale($occupation); 392: } 393: 394: # Step 5: translate to target language if not English 395: if($language eq 'fr') {

Mutants (Total: 1, Killed: 1, Survived: 0)

396: $occupation = _translate_french($occupation, $sex, 397: $self->{warn_on_error}); 398: } elsif($language eq 'de') { 399: $occupation = _translate_german($occupation, $sex, 400: $self->{warn_on_error}); 401: } 402: 403: push @result, ucfirst($occupation); 404: } 405: 406 → 406 → 0 406: return set_return(\@result, { type => 'arrayref', element_type => 'string' }); 407: } 408: 409: # _normalise_single 410: # 411: # Purpose: 412: # Normalises a single occupation string by expanding abbreviations, 413: # fixing malformed entries and applying pattern-based corrections. 414: # 415: # Entry criteria: 416: # $occupation - a non-empty string, already cleaned of whitespace 417: # 418: # Exit status: 419: # Returns the normalised occupation string, or empty string if the 420: # entry should be discarded after normalisation. 421: # 422: # Side effects: 423: # None. 424: # 425: # Notes: 426: # Checks the direct lookup table first for exact matches, 427: # then applies pattern-based rules for more complex cases. 428: # The "sic" comments mark known data quality issues in real 429: # genealogy records that we intentionally handle. 430: 431: sub _normalise_single { 432 → 435 → 440432 → 435 → 0 432: my $occupation = shift; 433: 434: # Check direct lookup table first for exact matches 435: if(my $match = $DIRECT{lc($occupation)}) {

Mutants (Total: 1, Killed: 1, Survived: 0)

436: return $match;

Mutants (Total: 2, Killed: 2, Survived: 0)

437: } 438: 439: # Handle "general servant domestic" pattern variants 440 → 440 → 445440 → 440 → 0 440: if($occupation =~ $GENERAL_SERVANT_RE) {

Mutants (Total: 1, Killed: 1, Survived: 0)

441: return 'Domestic Servant';

Mutants (Total: 2, Killed: 2, Survived: 0)

442: } 443: 444: # Remove common suffixes that add no occupational meaning 445 → 453 → 458445 → 453 → 0 445: $occupation =~ s/\s+own account$//i; 446: $occupation =~ s/^formerly //i; 447: $occupation =~ s/\s+retired$//i; 448: $occupation =~ s/\s+heavy worker$//i; 449: $occupation =~ s/\s+own business$//i; 450: $occupation =~ s/Labor/Labour/ig; 451: 452: # Reorder "X domestic" and "X dom" patterns to "Domestic X" 453: if($occupation =~ /^(.+)\s(?:domestic|dom)$/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

454: return "Domestic $1";

Mutants (Total: 2, Killed: 2, Survived: 0)

455: } 456: 457: # Convert "works on/for X" to "X worker" 458 → 458 → 463458 → 458 → 0 458: if($occupation =~ /works?\s+(?:on|for)\s+(.+)/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

459: return "$1 worker";

Mutants (Total: 2, Killed: 2, Survived: 0)

460: } 461: 462: # Convert "Cleaner X" prefix form to "X cleaner" 463 → 463 → 468463 → 463 → 0 463: if($occupation =~ /^Cleaner\s+(.+)/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

464: return "$1 cleaner";

Mutants (Total: 2, Killed: 2, Survived: 0)

465: } 466: 467: # Reorder clerk, salesman, foreman, manager patterns 468 → 468 → 471468 → 468 → 0 468: if($occupation =~ /^Clerk\s+(.+)/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

469: return "$1 Clerk";

Mutants (Total: 2, Killed: 2, Survived: 0)

470: } 471 → 471 → 474471 → 471 → 0 471: if($occupation =~ /^Salesman\s+(.*)/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

472: return "$1 Salesman";

Mutants (Total: 2, Killed: 2, Survived: 0)

473: } 474 → 474 → 479474 → 474 → 0 474: if($occupation =~ /^Foreman\s+(.*)/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

475: my $of = $1; 476: $of =~ s/^of the //i; 477: return "$of Foreman";

Mutants (Total: 2, Killed: 2, Survived: 0)

478: } 479 → 479 → 486479 → 479 → 0 479: if($occupation =~ /^Manager\s+(.*)/i

Mutants (Total: 1, Killed: 1, Survived: 0)

480: && $occupation !~ /^Manager of /i 481: && $occupation !~ /Manager & /i) { 482: return "$1 Manager";

Mutants (Total: 2, Killed: 2, Survived: 0)

483: } 484: 485: # Convert "Shop Assistant X" to "X's Shop Assistant" 486 → 486 → 491486 → 486 → 0 486: if($occupation =~ /^Shop Assistant\s+(.+)/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

487: return "$1's Shop Assistant";

Mutants (Total: 2, Killed: 2, Survived: 0)

488: } 489: 490: # Convert "X Assistant" to "X's Assistant" for known trade forms 491 → 491 → 505491 → 491 → 0 491: if($occupation =~ /^(.+)\s+Assistant$/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

492: my $trade = $1; 493: if(lc($trade) eq 'bakers') {

Mutants (Total: 1, Killed: 1, Survived: 0)

494: return "Baker's Assistant";

Mutants (Total: 2, Killed: 2, Survived: 0)

495: } 496: if(lc($trade) eq 'butchers') {

Mutants (Total: 1, Killed: 1, Survived: 0)

497: return "Butcher's Assistant";

Mutants (Total: 2, Killed: 2, Survived: 0)

498: } 499: unless($trade =~ /'s$/ || lc($trade) eq 'shop') {

Mutants (Total: 1, Killed: 1, Survived: 0)

500: return "${trade}'s Assistant";

Mutants (Total: 2, Killed: 2, Survived: 0)

501: } 502: } 503: 504: # Convert police* to "police officer" 505 → 505 → 522505 → 505 → 0 505: if($occupation =~ /police$/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

506: return "$occupation officer";

Mutants (Total: 2, Killed: 2, Survived: 0)

507: } 508: 509: # Convert pluralised trade forms e.g. "Builders Labourer" 510: # to possessive "Builder's Labourer". 511: # 512: # The regex captures: 513: # $base ($1) - the word stem without the trailing 's' 514: # $last ($2) - the final character of $base (used to reconstruct 515: # the stem; not used in the guard comparison) 516: # $role ($3) - the following word 517: # 518: # Guard against false positives: "Bus Driver" ($base eq 'Bu') and 519: # "Harness Maker" ($base eq 'Harnes') must not be rewritten. 520: # Compare against $base directly - using "$base$last" is wrong 521: # because it appends the final character a second time. 522 → 522 → 531522 → 522 → 0 522: if($occupation !~ /gas works/i

Mutants (Total: 1, Killed: 1, Survived: 0)

523: && $occupation =~ /^(.+([a-z]))s\s+([a-z]+)$/i) { 524: my ($base, $last, $role) = ($1, $2, $3); 525: unless(lc($base) eq 'bu' || lc($base) eq 'harnes') {

Mutants (Total: 1, Killed: 1, Survived: 0)

526: return "${base}'s $role";

Mutants (Total: 2, Killed: 2, Survived: 0)

527: } 528: } 529: 530: # Handle "on farm" pattern 531 → 531 → 535531 → 531 → 0 531: if($occupation =~ /^(.+)\s+on farm$/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

532: return "$1 on a farm";

Mutants (Total: 2, Killed: 2, Survived: 0)

533: } 534: 535 → 535 → 0 535: return $occupation;

Mutants (Total: 2, Killed: 2, Survived: 0)

536: } 537: 538: # _apply_locale 539: # 540: # Purpose: 541: # Applies locale-specific English spelling variants using Lingua::EN::ABC. 542: # Handles en_US (labour->labor), en_CA, and en_GB (default) variants. 543: # 544: # Entry criteria: 545: # $occupation - a normalised English occupation string (may be title-cased) 546: # 547: # Exit status: 548: # Returns the occupation string with locale-appropriate spellings. 549: # Original capitalisation is preserved; Lingua::EN::ABC performs 550: # case-insensitive substitutions and does not require lowercased input. 551: # 552: # Side effects: 553: # None. 554: # 555: # Notes: 556: # Reads $ENV{'LANG'} to determine the locale. 557: # Defaults to British English if no locale is detected. 558: # Do NOT pass lc($occupation) here - doing so strips title case that 559: # cannot be fully recovered by ucfirst() alone. 560: 561: sub _apply_locale { 562 → 565 → 572562 → 565 → 0 562: my $occupation = shift; 563: 564: # Apply American English spelling variants 565: if(defined($ENV{'LANG'}) && ($ENV{'LANG'} =~ /^en_US/)) {

Mutants (Total: 1, Killed: 1, Survived: 0)

566: $occupation = Lingua::EN::ABC::b2a($occupation); 567: $occupation =~ s/labour/labor/ig; 568: return $occupation;

Mutants (Total: 2, Killed: 2, Survived: 0)

569: } 570: 571: # Apply Canadian English spelling variants 572 → 572 → 577572 → 572 → 0 572: if(defined($ENV{'LANG'}) && ($ENV{'LANG'} =~ /^en_CA/)) {

Mutants (Total: 1, Killed: 1, Survived: 0)

573: return Lingua::EN::ABC::b2c($occupation); 574: } 575: 576: # Default to British English spelling 577 → 577 → 0 577: return Lingua::EN::ABC::a2b($occupation); 578: } 579: 580: # _translate_french 581: # 582: # Purpose: 583: # Translates a normalised English occupation string to French, 584: # applying gendered forms where appropriate. 585: # 586: # Entry criteria: 587: # $occupation - normalised English occupation string 588: # $sex - 'M' or 'F' 589: # $warn_on_error - boolean, if true carp on unknown occupation 590: # 591: # Exit status: 592: # Returns the French occupation string, or the original English 593: # string if no translation is available and warn_on_error is false. 594: # 595: # Side effects: 596: # Carps if warn_on_error is true and no translation is found. 597: # 598: # Notes: 599: # Only a subset of occupations have French translations. 600: # The retired/teaching special cases are handled via regex 601: # rather than the lookup table since they modify rather than 602: # replace the occupation string. 603: 604: sub _translate_french { 605 → 608 → 613605 → 608 → 0 605: my ($occupation, $sex, $warn_on_error) = @_; 606: 607: # Handle teaching as a special regex case 608: if($occupation =~ /teaching/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

609: return 'professeur';

Mutants (Total: 2, Killed: 2, Survived: 0)

610: } 611: 612: # Handle retired as a suffix replacement 613 → 616 → 624613 → 616 → 0 613: $occupation =~ s/retired/\x{00E0} la retraite/i; 614: 615: # Handle X Farmer pattern 616: if($occupation =~ /^(.+)\sFarmer$/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

617: my $type = $1; 618: return $sex eq 'F'

Mutants (Total: 2, Killed: 2, Survived: 0)

619: ? "Agricultrice de $type" 620: : "Agriculteur de $type"; 621: } 622: 623: # Check the French translation lookup table 624 → 624 → 632624 → 624 → 0 624: if(my $translation = $FRENCH{lc($occupation)}) {

Mutants (Total: 1, Killed: 1, Survived: 0)

625: if(ref($translation) eq 'HASH') {

Mutants (Total: 1, Killed: 1, Survived: 0)

626: return $translation->{$sex} // $translation->{'M'};

Mutants (Total: 2, Killed: 2, Survived: 0)

627: } 628: return $translation;

Mutants (Total: 2, Killed: 2, Survived: 0)

629: } 630: 631: # Fall back to English with optional warning 632 → 632 → 636632 → 632 → 0 632: if($warn_on_error) {

Mutants (Total: 1, Killed: 1, Survived: 0)

633: Carp::carp "Genealogy::Occupation: no French translation for '$occupation'"; 634: } 635: 636 → 636 → 0 636: return $occupation;

Mutants (Total: 2, Killed: 2, Survived: 0)

637: } 638: 639: # _translate_german 640: # 641: # Purpose: 642: # Translates a normalised English occupation string to German, 643: # applying gendered forms where appropriate. 644: # 645: # Entry criteria: 646: # $occupation - normalised English occupation string 647: # $sex - 'M' or 'F' 648: # $warn_on_error - boolean, if true carp on unknown occupation 649: # 650: # Exit status: 651: # Returns the German occupation string, or the original English 652: # string if no translation is available and warn_on_error is false. 653: # 654: # Side effects: 655: # Carps if warn_on_error is true and no translation is found. 656: # 657: # Notes: 658: # Only a subset of occupations have German translations. 659: # The retired and self-employed special cases are handled via 660: # regex rather than the lookup table. 661: 662: sub _translate_german { 663 → 666 → 671663 → 666 → 0 663: my ($occupation, $sex, $warn_on_error) = @_; 664: 665: # Handle teaching as a special regex case 666: if($occupation =~ /teaching/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

667: return $sex eq 'F' ? 'Lehrerin' : 'Lehrer';

Mutants (Total: 2, Killed: 2, Survived: 0)

668: } 669: 670: # Handle X Farmer pattern 671 → 671 → 676671 → 671 → 0 671: if($occupation =~ /^(.+)\sFarmer$/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

672: return $sex eq 'F' ? 'Landwirtin' : 'Landwirt';

Mutants (Total: 2, Killed: 2, Survived: 0)

673: } 674: 675: # Handle retired and self-employed as suffix replacements 676 → 680 → 688676 → 680 → 0 676: $occupation =~ s/retired/im ruhestand/i; 677: $occupation =~ s/self-employed/selbstst\x{00E4}ndig/i; 678: 679: # Check the German translation lookup table 680: if(my $translation = $GERMAN{lc($occupation)}) {

Mutants (Total: 1, Killed: 1, Survived: 0)

681: if(ref($translation) eq 'HASH') {

Mutants (Total: 1, Killed: 1, Survived: 0)

682: return $translation->{$sex} // $translation->{'M'};

Mutants (Total: 2, Killed: 2, Survived: 0)

683: } 684: return $translation;

Mutants (Total: 2, Killed: 2, Survived: 0)

685: } 686: 687: # Fall back to English with optional warning 688 → 688 → 692688 → 688 → 0 688: if($warn_on_error) {

Mutants (Total: 1, Killed: 1, Survived: 0)

689: Carp::carp "Genealogy::Occupation: no German translation for '$occupation'"; 690: } 691: 692 → 692 → 0 692: return $occupation;

Mutants (Total: 2, Killed: 2, Survived: 0)

693: } 694: 695: # _get_language 696: # 697: # Purpose: 698: # Determines the system's default language using environment variables. 699: # 700: # Entry criteria: 701: # None. Reads environment variables directly. 702: # 703: # Exit status: 704: # Returns a two-letter language code string e.g. 'en', 'fr', 'de', 705: # or undef if no language can be determined. 706: # 707: # Side effects: 708: # None. 709: # 710: # Notes: 711: # Checks in order: I18N::LangTags::Detect, LANGUAGE, LC_ALL, 712: # LC_MESSAGES, LANG environment variables. 713: # Returns 'en' for C locale. 714: # See https://www.gnu.org/software/gettext/manual/html_node/Locale-Environment-Variables.html 715: 716: sub _get_language { 717: # Try I18N::LangTags::Detect first for most accurate detection 718 → 718 → 725718 → 718 → 0 718: for my $tag (I18N::LangTags::Detect::detect()) { 719: if($tag =~ /^([a-z]{2})/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

720: return lc($1); 721: } 722: } 723: 724: # Fall back to checking environment variables in order 725 → 725 → 728725 → 725 → 0 725: if(($ENV{'LANGUAGE'}) && ($ENV{'LANGUAGE'} =~ /^([a-z]{2})/i)) {

Mutants (Total: 1, Killed: 1, Survived: 0)

726: return lc($1); 727: } 728 → 728 → 737728 → 728 → 0 728: foreach my $variable ('LC_ALL', 'LC_MESSAGES', 'LANG') { 729: my $val = $ENV{$variable}; 730: next unless defined($val); 731: if($val =~ /^([a-z]{2})/i) {

Mutants (Total: 1, Killed: 1, Survived: 0)

732: return lc($1); 733: } 734: } 735: 736: # Handle C locale explicitly 737 → 739 → 0 737: return 'en' if(defined($ENV{'LANG'}) && $ENV{'LANG'} =~ /^C(\.|$)/);

Mutants (Total: 2, Killed: 2, Survived: 0)

738: 739: return; 740: } 741: 742: =head1 AUTHOR 743: 744: Nigel Horne C<< <njh@bandsman.co.uk> >> 745: 746: =head1 BUGS 747: 748: Please report bugs via the GitHub issue tracker: 749: L<https://github.com/nigelhorne/Genealogy-Occupation/issues> 750: 751: =head1 TODO 752: 753: =over 4 754: 755: =item * Expand French and German translation tables 756: 757: =item * Add support for additional languages 758: 759: =item * Add C<normalise_place()> equivalent for occupation place strings 760: 761: =back 762: 763: =head1 SEE ALSO 764: 765: =over 4 766: 767: =item * L<Test Dashboard|https://nigelhorne.github.io/Genealogy-Occupation/coverage/> 768: 769: =item * L<Lingua::EN::ABC> 770: 771: =item * L<Params::Get> 772: 773: =item * L<Params::Validate::Strict> 774: 775: =item * L<Return::Set> 776: 777: =back 778: 779: =head1 LICENSE AND COPYRIGHT 780: 781: Copyright 2026 Nigel Horne. 782: 783: This program is released under the following licence: GPL2 784: If you use it, please let me know. 785: 786: =cut 787: 788: 1;