| File: | blib/lib/Genealogy/Obituary/Parser.pm |
| Coverage: | 75.6% |
| line | stmt | bran | cond | sub | time | code |
|---|---|---|---|---|---|---|
| 1 | package Genealogy::Obituary::Parser; | |||||
| 2 | ||||||
| 3 | 5 5 5 | 299313 5 67 | use strict; | |||
| 4 | 5 5 5 | 5 5 81 | use warnings; | |||
| 5 | ||||||
| 6 | 5 5 5 | 8 4 108 | use Carp; | |||
| 7 | 5 5 5 | 938 951494 88 | use DateTime::Format::Text; | |||
| 8 | 5 5 5 | 15 5 64 | use Exporter 'import'; | |||
| 9 | 5 5 5 | 9 4 157 | use JSON::MaybeXS; | |||
| 10 | 5 5 5 | 935 22243 107 | use Params::Get 0.13; | |||
| 11 | 5 5 5 | 822 15708 94 | use Return::Set 0.03; | |||
| 12 | 5 5 5 | 12 4 21181 | use Params::Validate::Strict; | |||
| 13 | ||||||
| 14 | our @EXPORT_OK = qw(parse_obituary); | |||||
| 15 | our $geocoder; | |||||
| 16 | ||||||
| 17 | # TODO: use Lingua::EN::Tagger; | |||||
| 18 | # TODO: add more general code, e.g., where it looks for father, also look for mother | |||||
| 19 | # TODO: parse https://funeral-notices.co.uk/notice/adams/5244000 | |||||
| 20 | ||||||
| 21 - 29 | =head1 NAME Genealogy::Obituary::Parser - Extract structured family relationships from obituary text =head1 VERSION Version 0.04 =cut | |||||
| 30 | ||||||
| 31 | our $VERSION = '0.04'; | |||||
| 32 | ||||||
| 33 - 93 | =head1 SYNOPSIS
use Genealogy::Obituary::Parser qw(parse_obituary);
my $text = 'She is survived by her husband Paul, daughters Anna and Lucy, and grandchildren Jake and Emma.';
my $data = parse_obituary($text);
# $data = {
# spouse => ['Paul'],
# children => ['Anna', 'Lucy'],
# grandchildren => ['Jake', 'Emma'],
# };
=head1 DESCRIPTION
This module parses free-form obituary text and extracts structured family relationship data
for use in genealogical applications.
It parses obituary text and extracts structured family relationship data,
including details about children, parents, spouse, siblings, grandchildren, and other relatives.
=head1 FUNCTIONS
=head2 parse_obituary($text)
The routine processes the obituary content to identify and organize relevant family information into a clear, structured hash.
It returns a hash reference containing structured family information,
with each family member's data organized into distinct categories such as children, spouse, parents, siblings, etc.
Takes a string, or a ref to a string.
=head3 API SPECIFICATION
=head4 INPUT
{
'text' => {
'type' => 'string', # or stringref
'min' => 1,
'max' => 10000
}, 'geocoder' => { # used to geocode locations to verify they exist
'type' => 'object',
'can' => 'geocode',
'optional' => 1,
}
}
=head4 OUTPUT
=over 4
=item * No matches: undef
=back
{
type => 'hashref',
'min' => 1,
'max' => 10
}
=cut | |||||
| 94 | ||||||
| 95 | sub parse_obituary | |||||
| 96 | { | |||||
| 97 | 13 | 399880 | my $params = Params::Validate::Strict::validate_strict({ | |||
| 98 | args => Params::Get::get_params('text', \@_), | |||||
| 99 | schema => { | |||||
| 100 | 'text' => { | |||||
| 101 | 'type' => 'string', | |||||
| 102 | 'min' => 1, | |||||
| 103 | 'max' => 10000 | |||||
| 104 | }, 'geocoder' => { | |||||
| 105 | 'type' => 'object', | |||||
| 106 | 'can' => 'geocode', | |||||
| 107 | 'optional' => 1, | |||||
| 108 | } | |||||
| 109 | } | |||||
| 110 | }); | |||||
| 111 | ||||||
| 112 | 13 | 1100 | my $text = $params->{'text'}; | |||
| 113 | ||||||
| 114 | 13 | 18 | if(my $geo = $params->{'geocoder'}) { | |||
| 115 | 0 | 0 | $geocoder = $geo; | |||
| 116 | } | |||||
| 117 | ||||||
| 118 | 13 | 14 | Carp::croak(__PACKAGE__, ': Usage: parse_obituary($text)') unless defined($text); | |||
| 119 | ||||||
| 120 | 13 | 14 | if(ref($text) eq 'SCALAR') { | |||
| 121 | 0 0 | 0 0 | $text = ${$text}; | |||
| 122 | } | |||||
| 123 | ||||||
| 124 | 13 | 132 | $text =~ s/\s+, +/, /g; # Collapse multiple commas | |||
| 125 | ||||||
| 126 | # Quick scan to get started | |||||
| 127 | sub parse_obituary_quick { | |||||
| 128 | 0 | 0 | my $text = shift; | |||
| 129 | 0 | 0 | my %data; | |||
| 130 | ||||||
| 131 | 0 | 0 | my @patterns = ( | |||
| 132 | [ qr/\bdaughters?\s+([^.,;]+)/i, 'children' ], | |||||
| 133 | [ qr/\bsons?\s+([^.,;]+)/i, 'children' ], | |||||
| 134 | [ qr/\bchildren\s+([^.,;]+)/i, 'children' ], | |||||
| 135 | [ qr/\bgrandchildren\s+([^.;]+)/i, 'grandchildren' ], | |||||
| 136 | [ qr/\bwife\s+([^.,;]+)/i, 'spouse' ], | |||||
| 137 | [ qr/\bhusband\s+([^.,;]+)/i, 'spouse' ], | |||||
| 138 | [ qr/\bhis parents were\s+([^.,;]+)/i,'parents' ], | |||||
| 139 | [ qr/\bhis father was\s+([^.,;]+)/i, 'parents' ], | |||||
| 140 | [ qr/\bhis mother was\s+([^.,;]+)/i, 'parents' ], | |||||
| 141 | [ qr/\bsister(?:s)?\s+([^.,;]+)/i, 'siblings' ], | |||||
| 142 | [ qr/\bbrother(?:s)?\s+([^.,;]+)/i, 'siblings' ], | |||||
| 143 | [ qr/\bsiblings\s+([^.,;]+)/i, 'siblings' ], | |||||
| 144 | ); | |||||
| 145 | ||||||
| 146 | 0 | 0 | for my $p (@patterns) { | |||
| 147 | 0 | 0 | my ($re, $field) = @$p; | |||
| 148 | 0 | 0 | while ($text =~ /$re/g) { | |||
| 149 | 0 | 0 | my $list = $1; | |||
| 150 | 0 | 0 | next unless $list; | |||
| 151 | ||||||
| 152 | # Robust splitting on commas and "and" | |||||
| 153 | 0 0 0 | 0 0 0 | my @names = grep { length } map { s/^\s+|\s+$//gr } split /\s*(?:,|(?:\band\b))\s*/i, $list; | |||
| 154 | 0 0 0 | 0 0 0 | push @{$data{$field}}, map { { 'name' => $_ } } @names; | |||
| 155 | } | |||||
| 156 | } | |||||
| 157 | ||||||
| 158 | 0 | 0 | return \%data; | |||
| 159 | } | |||||
| 160 | ||||||
| 161 | # my %family = %{parse_obituary_quick($text)}; | |||||
| 162 | 13 | 8 | my %family; | |||
| 163 | ||||||
| 164 | # Helper to extract people from a specific section and remove empty entries | |||||
| 165 | sub extract_people_section { | |||||
| 166 | 7 | 6 | my $section = shift; | |||
| 167 | 7 | 16 | return unless $section; | |||
| 168 | ||||||
| 169 | 7 | 21 | $section =~ s/\s+and\s+/, /g; # Ensure "and" is treated as a separator | |||
| 170 | 7 | 20 | $section =~ s/([A-Za-z]+),\s+([A-Z]{2})/$1<<COMMA>>$2/g; | |||
| 171 | 7 | 20 | my @entries = split /\s*,\s*/, $section; | |||
| 172 | ||||||
| 173 | 7 | 5 | my @people; | |||
| 174 | 7 | 7 | foreach my $entry (@entries) { | |||
| 175 | 19 | 16 | $entry =~ s/<<COMMA>>/, /g; | |||
| 176 | ||||||
| 177 | 19 | 18 | my ($name, $spouse, $location) = ('', '', ''); | |||
| 178 | ||||||
| 179 | # Match "Ian (Terry) Girvan of Surrey, BC" | |||||
| 180 | 19 | 25 | if ($entry =~ /^(\w+)\s+\(([^)]+)\)\s+(\w+)\s+of\s+(.+)$/) { | |||
| 181 | 1 1 1 | 2 1 3 | $name = "$1 $3"; $spouse = $2; $location = $4; | |||
| 182 | } | |||||
| 183 | # Match "Gwen Steeves (Leslie) of Riverview, NB" | |||||
| 184 | elsif ($entry =~ /^(.+?)\s+\(([^)]+)\)\s+of\s+(.+)$/) { | |||||
| 185 | 1 1 1 | 2 1 1 | $name = $1; $spouse = $2; $location = $3; | |||
| 186 | } | |||||
| 187 | # Match "Carol Girvan of Dartmouth, NS" | |||||
| 188 | elsif ($entry =~ /^(.+?)\s+of\s+(.+)$/) { | |||||
| 189 | 1 1 | 1 1 | $name = $1; $location = $2; | |||
| 190 | } else { | |||||
| 191 | # Match names only (e.g. for siblings) | |||||
| 192 | 16 | 15 | $name = $entry; | |||
| 193 | } | |||||
| 194 | ||||||
| 195 | 19 | 13 | next if !$name; # Skip if name is empty | |||
| 196 | 17 | 16 | next if($name =~ /^father-in-law\sto\s/); # Skip follow ons | |||
| 197 | 16 | 15 | last if($name =~ /^devoted\s/i); | |||
| 198 | 15 | 11 | last if($name =~ /^loved\s/i); | |||
| 199 | ||||||
| 200 | # Create a hash and filter out blank fields | |||||
| 201 | 15 | 17 | my %person = ( | |||
| 202 | name => $name, | |||||
| 203 | spouse => $spouse, | |||||
| 204 | location => $location, | |||||
| 205 | ); | |||||
| 206 | ||||||
| 207 | # Remove blank fields | |||||
| 208 | 15 20 45 | 16 25 74 | %person = map { $_ => $person{$_} } grep { defined $person{$_} && $person{$_} ne '' } keys %person; | |||
| 209 | ||||||
| 210 | 15 | 17 | push @people, \%person; | |||
| 211 | } | |||||
| 212 | 7 | 12 | return \@people; | |||
| 213 | } | |||||
| 214 | ||||||
| 215 | sub extract_names_from_phrase { | |||||
| 216 | 1 | 1 | my $phrase = $_[0]; | |||
| 217 | 1 | 0 | my @names; | |||
| 218 | ||||||
| 219 | 1 | 1 | $phrase =~ s/[.;]//g; | |||
| 220 | ||||||
| 221 | # Case: "Christopher, Thomas, and Marsha Cloud" | |||||
| 222 | 1 | 2 | if ($phrase =~ /^((?:\w+\s*,\s*)+\w+),?\s*and\s+(\w+)\s+(\w+)$/) { | |||
| 223 | 0 | 0 | my ($pre, $last_first, $last) = ($1, $2, $3); | |||
| 224 | 0 | 0 | my @firsts = split(/\s*,\s*/, $pre); | |||
| 225 | 0 | 0 | push @firsts, $last_first; | |||
| 226 | 0 0 | 0 0 | push @names, map { "$_ $last" } @firsts; | |||
| 227 | 0 | 0 | return @names; | |||
| 228 | } | |||||
| 229 | ||||||
| 230 | # Case: "Christopher and Thomas Cloud" | |||||
| 231 | 1 | 3 | if ($phrase =~ /^([\w\s]+?)\s+and\s+(\w+)\s+(\w+)$/) { | |||
| 232 | 0 | 0 | my ($first_part, $second_first, $last) = ($1, $2, $3); | |||
| 233 | 0 | 0 | my @firsts = split(/\s*,\s*|\s+and\s+/, $first_part); | |||
| 234 | 0 0 | 0 0 | push @names, map { "$_ $last" } (@firsts, $second_first); | |||
| 235 | 0 | 0 | return @names; | |||
| 236 | } | |||||
| 237 | ||||||
| 238 | # Fallback: Split by comma or 'and' | |||||
| 239 | 1 | 2 | $phrase =~ s/, and grandchildren.+//; # Handle "Anna and Lucy, and grandchildren Jake and Emma" | |||
| 240 | ||||||
| 241 | # Handle "Name1, Name2 and Name3" correctly | |||||
| 242 | 1 | 3 | if($phrase =~ /(.+?),\s*(\w+)\s+and\s+(\w+)/) { | |||
| 243 | 0 | 0 | my ($firsts, $second, $third) = ($1, $2, $3); | |||
| 244 | 0 | 0 | my @firsts = split /\s*,\s*/, $firsts; | |||
| 245 | 0 | 0 | push @names, @firsts, $second, $third; | |||
| 246 | } else { | |||||
| 247 | 1 | 2 | my @parts = split /\s*(?:,|and)\s*/, $phrase; | |||
| 248 | 1 2 | 1 5 | push @names, grep { defined($_) && $_ ne '' } @parts; | |||
| 249 | } | |||||
| 250 | ||||||
| 251 | 1 | 1 | return @names; | |||
| 252 | } | |||||
| 253 | ||||||
| 254 | # Correct extraction of children (skipping "his/her") | |||||
| 255 | 13 | 206 | if ($text =~ /survived by (his|her) children\s*([^\.;]+)/i) { | |||
| 256 | 2 | 2 | my $children_text = $2; | |||
| 257 | 2 | 3 | $family{children} = extract_people_section($children_text); | |||
| 258 | } elsif ($text =~ /Loving mum to\s*([^\.;]+)/i) { # Look for the phrase "Loving mum to" | |||||
| 259 | 1 | 2 | my $children_text = $1; | |||
| 260 | 1 | 1 | $family{children} = extract_people_section($children_text); | |||
| 261 | } elsif ($text =~ /Loving father of\s*([^\.;]+)/i) { # Look for the phrase "Loving father of" | |||||
| 262 | 1 | 2 | my $children_text = $1; | |||
| 263 | 1 | 2 | $family{children} = extract_people_section($children_text); | |||
| 264 | } elsif($text =~ /mother of\s*([^\.;]+)?,/i) { # Look for the phrase "mother of" | |||||
| 265 | 1 | 1 | my $children_text = $1; | |||
| 266 | 1 | 2 | $children_text =~ s/, grandmother.+//; | |||
| 267 | 1 | 2 | $family{children} = extract_people_section($children_text); | |||
| 268 | } elsif($text =~ /sons,?\s*([a-z]+)\s+and\s+([a-z]+)/i) { | |||||
| 269 | 3 | 3 | my @children; | |||
| 270 | my @grandchildren; | |||||
| 271 | ||||||
| 272 | 3 | 9 | push @children, { name => $1, sex => 'M' }, { name => $2, sex => 'M' }; | |||
| 273 | 3 | 27 | if($text =~ /\bdaughter,?\s([a-z]+)/i) { | |||
| 274 | 1 | 2 | push @children, { 'name' => $1, 'sex' => 'F' } | |||
| 275 | } | |||||
| 276 | 3 | 27 | if($text =~ /\bgranddaughter,?\s([a-z]+)/i) { | |||
| 277 | 1 | 2 | push @grandchildren, { 'name' => $1, 'sex' => 'F' }; | |||
| 278 | } | |||||
| 279 | 3 | 6 | $family{children} = \@children if @children; | |||
| 280 | 3 | 4 | $family{grandchildren} = \@grandchildren if @grandchildren; | |||
| 281 | } elsif($text =~ /Surviving are (?:a )?daughters?,\s*Mrs\.\s+(\w+)\s+\(([^)]+)\)\s+(\w+),\s+([^;]+?);/i) { | |||||
| 282 | # Handle "Surviving are a daughter, Mrs. Walter (Ruth Ann) Gerke, Fort Wayne" | |||||
| 283 | 0 | 0 | my @children; | |||
| 284 | 0 | 0 | my $spouse_first = $1; | |||
| 285 | 0 | 0 | my $daughter_name = $2; | |||
| 286 | 0 | 0 | my $spouse_last = $3; | |||
| 287 | 0 | 0 | my $location = $4; | |||
| 288 | 0 | 0 | $location =~ s/,\s*$//; | |||
| 289 | ||||||
| 290 | 0 | 0 | push @children, { | |||
| 291 | name => $daughter_name, | |||||
| 292 | location => $location, | |||||
| 293 | sex => 'F', | |||||
| 294 | spouse => { | |||||
| 295 | name => "$spouse_first $spouse_last", | |||||
| 296 | sex => 'M' | |||||
| 297 | } | |||||
| 298 | }; | |||||
| 299 | 0 | 0 | $family{children} = \@children; | |||
| 300 | } else { | |||||
| 301 | 5 | 4 | my @children; | |||
| 302 | ||||||
| 303 | # my $tagger = Lingua::EN::Tagger->new(longest_noun_phrase => 0); | |||||
| 304 | # my $tagged = $tagger->add_tags($text); | |||||
| 305 | ||||||
| 306 | 5 | 12 | if($text =~ /\ssons,\s*(.*?);/s) { | |||
| 307 | 2 | 4 | my $sons_text = $1; | |||
| 308 | 2 | 4 | if($sons_text =~ /, all of (.+)$/) { | |||
| 309 | 1 | 1 | my $location = $1; | |||
| 310 | 1 | 2 | while($sons_text =~ /([\w. ]+?),\s/g) { | |||
| 311 | 3 | 3 | my $son = $1; | |||
| 312 | 3 | 4 | if($son =~ /(\w+)\s+and\s+(\w+)/) { | |||
| 313 | 1 | 2 | push @children, { | |||
| 314 | name => $1, | |||||
| 315 | location => $location, | |||||
| 316 | sex => 'M', | |||||
| 317 | }, { | |||||
| 318 | name => $2, | |||||
| 319 | location => $location, | |||||
| 320 | sex => 'M', | |||||
| 321 | }; | |||||
| 322 | 1 | 2 | last; | |||
| 323 | } else { | |||||
| 324 | 2 | 5 | push @children, { | |||
| 325 | name => $son, | |||||
| 326 | location => $location, | |||||
| 327 | sex => 'M', | |||||
| 328 | }; | |||||
| 329 | } | |||||
| 330 | } | |||||
| 331 | } else { | |||||
| 332 | 1 | 3 | while($sons_text =~ /([\w. ]+?),\s*([\w. ]+?)(?:\s+and|\z)/g) { | |||
| 333 | 2 | 7 | push @children, { | |||
| 334 | name => $1, | |||||
| 335 | location => $2, | |||||
| 336 | sex => 'M', | |||||
| 337 | }; | |||||
| 338 | } | |||||
| 339 | } | |||||
| 340 | } | |||||
| 341 | 5 | 22 | if($text =~ /\sdaughters?,\s*Mrs\.\s+(.+?)\s+(\w+),\s+([^;]+)\sand/) { | |||
| 342 | 1 | 3 | push @children, { | |||
| 343 | name => $1, | |||||
| 344 | location => $3, | |||||
| 345 | sex => 'F', | |||||
| 346 | spouse => { 'name' => $2, sex => 'M' } | |||||
| 347 | }; | |||||
| 348 | } elsif($text =~ /one daughter,\s*(.+?),\s*(.+?);/) { | |||||
| 349 | 1 | 1 | my $name = $1; | |||
| 350 | 1 | 1 | my $location = $2; | |||
| 351 | 1 | 2 | if($name =~ /(\w+)\s+(\w+)/) { | |||
| 352 | 1 | 2 | push @children, { | |||
| 353 | name => $1, | |||||
| 354 | location => $location, | |||||
| 355 | sex => 'F', | |||||
| 356 | spouse => { name => $2, sex => 'M' } | |||||
| 357 | }; | |||||
| 358 | } else { | |||||
| 359 | 0 | 0 | push @children, { | |||
| 360 | name => $1, | |||||
| 361 | location => $location, | |||||
| 362 | sex => 'F', | |||||
| 363 | }; | |||||
| 364 | } | |||||
| 365 | } | |||||
| 366 | 5 | 9 | $family{children} = \@children if @children; | |||
| 367 | ||||||
| 368 | 5 | 5 | if(!$family{'children'}) { | |||
| 369 | 3 | 18 | while($text =~ /\b(son|daughter)s?,\s*([A-Z][a-z]+(?:\s+\([A-Z][a-z]+\))?)\s*(?:and their children ([^.;]+))?/g) { | |||
| 370 | 2 | 3 | my $sex = $1 eq 'son' ? 'M' : 'F'; | |||
| 371 | 2 | 2 | my $child = $2; | |||
| 372 | 2 | 2 | my $grandkids = $3; | |||
| 373 | 2 | 7 | if(my @grandchildren = $grandkids ? split /\s*,\s*|\s+and\s+/, $grandkids : ()) { | |||
| 374 | 0 | 0 | push @children, { | |||
| 375 | name => $child, | |||||
| 376 | sex => $sex, | |||||
| 377 | grandchildren => \@grandchildren, | |||||
| 378 | }; | |||||
| 379 | } elsif(($sex eq 'F') && ($child =~ /(.+)\s+\((.+)\)/)) { | |||||
| 380 | 1 | 6 | push @children, { name => $1, sex => 'F', spouse => { name => $2, sex => 'M' } } | |||
| 381 | } elsif($child ne 'Mrs') { | |||||
| 382 | 1 | 13 | push @children, { name => $child, sex => $sex } | |||
| 383 | } | |||||
| 384 | } | |||||
| 385 | } | |||||
| 386 | 5 | 16 | $family{children} = \@children if @children; | |||
| 387 | } | |||||
| 388 | ||||||
| 389 | 13 | 16 | if(!$family{'children'}) { | |||
| 390 | 2 | 3 | if($text =~ /\ssons?[,\s]\s*(.+?)[;\.]/) { | |||
| 391 | 0 | 0 | my $raw = $1; | |||
| 392 | 0 | 0 | $raw =~ s/\sand their .+//; | |||
| 393 | 0 | 0 | my @children = extract_names_from_phrase($raw); | |||
| 394 | 0 0 0 | 0 0 0 | push @{$family{children}}, map { { name => $_, sex => 'M' } } @children; | |||
| 395 | } | |||||
| 396 | 2 | 6 | if($text =~ /\sdaughters?[,\s]\s*(.+?)[;\.]/) { | |||
| 397 | 1 | 1 | my $raw = $1; | |||
| 398 | 1 | 1 | $raw =~ s/\sand their .+//; | |||
| 399 | 1 | 1 | my @children = extract_names_from_phrase($raw); | |||
| 400 | 1 1 2 | 1 1 3 | push @{$family{children}}, map { { name => $_, sex => 'F' } } @children; | |||
| 401 | } | |||||
| 402 | } | |||||
| 403 | ||||||
| 404 | # Extract grandchildren | |||||
| 405 | 13 | 16 | if(!$family{'grandchildren'}) { | |||
| 406 | 12 | 19 | if($text =~ /grandchildren\s+([^\.;]+)/i) { | |||
| 407 | 3 | 11 | my @grandchildren = split /\s*(?:,|and)\s*/i, $1; | |||
| 408 | 3 | 3 | if(scalar(@grandchildren)) { | |||
| 409 | 3 7 8 | 3 9 15 | $family{'grandchildren'} = [ map { { 'name' => $_ } } grep { defined $_ && $_ ne '' } @grandchildren ]; | |||
| 410 | } | |||||
| 411 | } | |||||
| 412 | } | |||||
| 413 | 13 4 | 19 5 | if($family{'grandchildren'} && scalar @{$family{grandchildren}}) { | |||
| 414 | 4 | 16 | while((exists $family{'grandchildren'}->[0]) && (length($family{'grandchildren'}->[0]) == 0)) { | |||
| 415 | 0 0 | 0 0 | shift @{$family{'grandchildren'}}; | |||
| 416 | } | |||||
| 417 | 4 | 7 | if($family{'grandchildren'}->[0] =~ /brothers/) { | |||
| 418 | 0 | 0 | if(!exists $family{'brothers'}) { | |||
| 419 | 0 0 | 0 0 | shift @{$family{'grandchildren'}}; | |||
| 420 | 0 0 | 0 0 | $family{'brothers'} = extract_people_section(join(', ', @{$family{'grandchildren'}})); | |||
| 421 | } | |||||
| 422 | 0 | 0 | delete $family{grandchildren}; | |||
| 423 | } | |||||
| 424 | } else { | |||||
| 425 | 9 | 7 | delete $family{grandchildren}; | |||
| 426 | } | |||||
| 427 | 13 4 | 17 6 | if((!defined($family{'grandchildren'})) || (($#{$family{'grandchildren'}}) <= 0)) { | |||
| 428 | # handle devoted Grandma to Tom, Dick, and Harry and loved Mother-in-law to Jack and Jill" | |||||
| 429 | 10 | 20 | my ($grandchildren_str) = $text =~ /Grandma to (.*?)(?: and loved|$)/; | |||
| 430 | # Normalize and split into individual names | |||||
| 431 | 10 | 6 | my @grandchildren; | |||
| 432 | 10 | 11 | if($grandchildren_str) { | |||
| 433 | 1 | 5 | @grandchildren = split /,\s*|\s+and\s+/, $grandchildren_str; | |||
| 434 | } | |||||
| 435 | 10 | 17 | if(scalar(@grandchildren)) { | |||
| 436 | 1 | 1 | $family{'grandchildren'} = \@grandchildren; | |||
| 437 | } elsif($text =~ /grandm\w+\s/) { | |||||
| 438 | 1 | 1 | my $t = $text; | |||
| 439 | 1 | 5 | $t =~ s/.+(grandm\w+\s+.+?\sand\s[\w\.;,]+).+/$1/; | |||
| 440 | 1 | 6 | $family{grandchildren} = [ split /\s*(?:,|and)\s*/i, ($t =~ /grandm\w+\sto\s+([^\.;]+)/i)[0] || '' ]; | |||
| 441 | } | |||||
| 442 | } | |||||
| 443 | ||||||
| 444 | # Extract siblings (sisters and brothers) correctly, skipping "her" or "his" | |||||
| 445 | 13 | 47 | if($text =~ /predeceased by (his|her) sisters?\s*([^;\.]+);?/i) { | |||
| 446 | 2 | 2 | my $sisters_text = $2; | |||
| 447 | 2 | 2 | $sisters_text =~ s/^,\s+//; | |||
| 448 | 2 | 4 | $family{sisters} = extract_people_section($sisters_text); | |||
| 449 | } else { | |||||
| 450 | 11 | 24 | while($text =~ /\bsister[,\s]\s*([A-Z][a-z]+(?:\s+[A-Z][a-z.]+)*)(?:,\s*([A-Z][a-z]+))?/g) { | |||
| 451 | 2 | 3 | my $name = $1; | |||
| 452 | 2 | 9 | $family{'sisters'} ||= []; | |||
| 453 | 2 | 3 | if($name eq 'Mrs') { | |||
| 454 | 1 | 4 | if($text =~ / sister,\s*Mrs\.\s+([A-Z][a-zA-Z]+\s+[A-Z][a-zA-Z]+)/) { | |||
| 455 | 1 | 1 | $name = $1; | |||
| 456 | } else { | |||||
| 457 | 0 | 0 | undef $name; | |||
| 458 | } | |||||
| 459 | } | |||||
| 460 | 2 | 3 | if($name) { | |||
| 461 | 2 2 | 1 31 | push @{$family{sisters}}, { | |||
| 462 | name => $name, | |||||
| 463 | sex => 'F', | |||||
| 464 | status => ($text =~ /\bpredeceased by.*?$name/i) ? 'deceased' : 'living', | |||||
| 465 | }; | |||||
| 466 | } | |||||
| 467 | } | |||||
| 468 | ||||||
| 469 | 11 | 16 | if(!exists($family{'sisters'})) { | |||
| 470 | 9 | 13 | if($text =~ /\stwo\ssisters,\s*(.*?)\sand\s(.*?)[;:]/s) { | |||
| 471 | 1 | 2 | my($first, $second) = ($1, $2); | |||
| 472 | 1 | 1 | foreach my $sister($first, $second) { | |||
| 473 | 2 | 3 | if($sister =~ /Mrs\.\s(.+?),\s(.+)/) { | |||
| 474 | 2 | 2 | my $name = $1; | |||
| 475 | 2 | 2 | my $location = $2; | |||
| 476 | 2 | 2 | $location =~ s/,$//; | |||
| 477 | 2 | 3 | if($name =~ /(\w+)\s+(\w+)/) { | |||
| 478 | 2 2 | 1 5 | push @{$family{sisters}}, { | |||
| 479 | name => $1, | |||||
| 480 | location => $location, | |||||
| 481 | sex => 'F', | |||||
| 482 | spouse => { 'name' => $2, 'sex' => 'M' } | |||||
| 483 | }; | |||||
| 484 | } else { | |||||
| 485 | 0 0 | 0 0 | push @{$family{sisters}}, { | |||
| 486 | name => $name, | |||||
| 487 | location => $location, | |||||
| 488 | sex => 'F', | |||||
| 489 | }; | |||||
| 490 | } | |||||
| 491 | } else { | |||||
| 492 | 0 0 | 0 0 | push @{$family{sisters}}, { | |||
| 493 | name => $sister, | |||||
| 494 | sex => 'F', | |||||
| 495 | }; | |||||
| 496 | } | |||||
| 497 | } | |||||
| 498 | } | |||||
| 499 | } | |||||
| 500 | ||||||
| 501 | 11 | 14 | if($family{'sisters'}) { | |||
| 502 | # Deduplicate by serializing hashes for comparison | |||||
| 503 | 3 | 3 | my %seen; | |||
| 504 | my @sisters = grep { | |||||
| 505 | 4 | 41 | my $key = JSON::MaybeXS->new->canonical(1)->encode($_); | |||
| 506 | 4 | 11 | !$seen{$key}++ | |||
| 507 | 3 3 | 3 4 | } @{$family{sisters}}; | |||
| 508 | ||||||
| 509 | 3 | 6 | $family{sisters} = \@sisters; | |||
| 510 | } | |||||
| 511 | } | |||||
| 512 | ||||||
| 513 | 13 | 53 | if($text =~ /predeceased by (his|her) brothers?\s*([^;\.]+);?/i) { | |||
| 514 | 0 | 0 | my $brothers_text = $2; | |||
| 515 | 0 | 0 | $brothers_text =~ s/^,\s+//; | |||
| 516 | 0 | 0 | $family{brothers} = extract_people_section($brothers_text); | |||
| 517 | # TODO: mark all statuses to deceased | |||||
| 518 | } else { | |||||
| 519 | 13 | 21 | while ($text =~ /\bbrother,\s*([A-Z][a-z]+(?:\s+[A-Z][a-z.]+)*)(?:,\s*([A-Z][a-z]+))?/g) { | |||
| 520 | 1 | 2 | $family{'brothers'} ||= []; | |||
| 521 | 1 1 | 1 33 | push @{$family{brothers}}, { | |||
| 522 | name => $1, | |||||
| 523 | status => ($text =~ /\bpredeceased by.*?$1/i) ? 'deceased' : 'living', | |||||
| 524 | }; | |||||
| 525 | } | |||||
| 526 | 13 | 34 | if((!$family{'brothers'}) && (!$family{'sisters'}) && (!$family{'siblings'})) { | |||
| 527 | 8 | 24 | if($text =~ /sister of ([a-z]+) and ([a-z]+)/i) { | |||
| 528 | 1 1 | 1 2 | push @{$family{'siblings'}}, | |||
| 529 | { 'name' => $1 }, | |||||
| 530 | { 'name' => $2 } | |||||
| 531 | } | |||||
| 532 | } | |||||
| 533 | ||||||
| 534 | 13 | 16 | if(!exists($family{'brothers'})) { | |||
| 535 | 12 | 16 | if($text =~ /\sbrothers,\s*(.*?)[;\.]/s) { | |||
| 536 | 1 | 1 | my $brothers_text = $1; | |||
| 537 | 1 | 1 | if($brothers_text =~ /, all of (.+)$/) { | |||
| 538 | 1 | 1 | my $location = $1; | |||
| 539 | 1 | 3 | while($brothers_text =~ /([\w. ]+?),\s/g) { | |||
| 540 | 2 | 2 | my $son = $1; | |||
| 541 | 2 | 2 | if($son =~ /(\w+)\s+and\s+(\w+)/) { | |||
| 542 | 1 1 | 1 2 | push @{$family{brothers}}, { | |||
| 543 | name => $1, | |||||
| 544 | location => $location, | |||||
| 545 | sex => 'M', | |||||
| 546 | }, { | |||||
| 547 | name => $2, | |||||
| 548 | location => $location, | |||||
| 549 | sex => 'M', | |||||
| 550 | }; | |||||
| 551 | 1 | 1 | last; | |||
| 552 | } else { | |||||
| 553 | 1 1 | 0 3 | push @{$family{brothers}}, { | |||
| 554 | name => $son, | |||||
| 555 | location => $location, | |||||
| 556 | sex => 'M', | |||||
| 557 | }; | |||||
| 558 | } | |||||
| 559 | } | |||||
| 560 | } else { | |||||
| 561 | 0 | 0 | while($brothers_text =~ /([\w. ]+?),\s*([\w. ]+?)(?:\s+and|\z)/g) { | |||
| 562 | 0 0 | 0 0 | push @{$family{brothers}}, { | |||
| 563 | name => $1, | |||||
| 564 | location => $2, | |||||
| 565 | sex => 'M', | |||||
| 566 | }; | |||||
| 567 | } | |||||
| 568 | } | |||||
| 569 | } | |||||
| 570 | } | |||||
| 571 | } | |||||
| 572 | ||||||
| 573 | 13 | 141 | if(!exists($family{'brothers'}) && $text =~ /\b(?:two|three|four)\s+brothers?,\s*(.+?)(?:,\s*a\s+(?:sister|half-sister)|;)/i) { | |||
| 574 | # Pattern for "two brothers, Name and Name" | |||||
| 575 | 0 | 0 | my $brothers_text = $1; | |||
| 576 | 0 | 0 | my @brothers; | |||
| 577 | ||||||
| 578 | # Handle "Charles F. Harris and Berton Harris" | |||||
| 579 | 0 | 0 | if($brothers_text =~ /\band\b/) { | |||
| 580 | 0 | 0 | my @names = split /\s+and\s+/, $brothers_text; | |||
| 581 | 0 | 0 | foreach my $name (@names) { | |||
| 582 | 0 | 0 | $name =~ s/^\s+|\s+$//g; | |||
| 583 | 0 | 0 | $name =~ s/,\s*$//; | |||
| 584 | 0 | 0 | push @brothers, { | |||
| 585 | name => $name, | |||||
| 586 | sex => 'M', | |||||
| 587 | status => 'living' | |||||
| 588 | }; | |||||
| 589 | } | |||||
| 590 | } | |||||
| 591 | 0 | 0 | $family{brothers} = \@brothers if(scalar @brothers); | |||
| 592 | } | |||||
| 593 | ||||||
| 594 | # Detect nieces/nephews | |||||
| 595 | 13 | 67 | $family{nieces_nephews} = ($text =~ /as well as several nieces and nephews/i) ? ['several nieces and nephews'] : []; | |||
| 596 | ||||||
| 597 | # Extract parents and clean the names by removing unnecessary details | |||||
| 598 | 13 | 339 | if($text =~ /(son|daughter) of the late\s+(.+?)\s+and\s+(.+?)\./i) { | |||
| 599 | 2 | 3 | my $father = $2; | |||
| 600 | 2 | 1 | my $mother = $3; | |||
| 601 | ||||||
| 602 | # Remove anything after the first comma in each parent's name | |||||
| 603 | 2 | 2 | $father =~ s/,.*//; | |||
| 604 | 2 | 2 | $mother =~ s/,.*//; | |||
| 605 | ||||||
| 606 | 2 | 6 | if($mother =~ /(.+)\s+\((.+)\)\s+(.+)/) { | |||
| 607 | 2 | 3 | $mother = "$1 $2"; | |||
| 608 | } | |||||
| 609 | $family{parents} = { | |||||
| 610 | 2 | 4 | father => { name => $father }, | |||
| 611 | mother => { name => $mother }, | |||||
| 612 | }; | |||||
| 613 | } elsif($text =~ /parents were (\w+) and (\w+)/i) { | |||||
| 614 | $family{parents} = { | |||||
| 615 | 1 | 3 | father => { name => $1 }, | |||
| 616 | mother => { name => $2 }, | |||||
| 617 | }; | |||||
| 618 | } | |||||
| 619 | ||||||
| 620 | # Extract spouse's death year and remove the "(year)" from the name | |||||
| 621 | 13 | 301 | if($text =~ /(wife|husband) of the late\s+([\w\s]+)\s+\((\d{4})\)/) { | |||
| 622 | 1 | 1 | my $name = $2; | |||
| 623 | 1 | 1 | my $death_year = $3; | |||
| 624 | ||||||
| 625 | 1 | 10 | $family{'spouse'} ||= []; | |||
| 626 | ||||||
| 627 | # Remove the death year part from the spouse's name | |||||
| 628 | 1 | 1 | $name =~ s/\s*\(\d{4}\)//; | |||
| 629 | ||||||
| 630 | 1 1 | 0 2 | push @{$family{'spouse'}}, { | |||
| 631 | name => $name, | |||||
| 632 | death_year => $death_year | |||||
| 633 | } | |||||
| 634 | } elsif($text =~ /\bmarried ([^,]+),.*?\b(?:on\s+)?([A-Z][a-z]+ \d{1,2}, \d{4})(?:.*?\b(?:at|in)\s+([^.,]+))?/i) { | |||||
| 635 | 1 | 3 | $family{'spouse'} ||= []; | |||
| 636 | ||||||
| 637 | 1 | 2 | my($name, $date, $place) = ($1, $2, $3); | |||
| 638 | 1 | 1 | $name =~ s/\s+on\s.+$//; | |||
| 639 | ||||||
| 640 | 1 1 | 1 3 | push @{$family{'spouse'}}, { | |||
| 641 | name => $name, | |||||
| 642 | married => { | |||||
| 643 | date => $date, | |||||
| 644 | place => $place // '', | |||||
| 645 | } | |||||
| 646 | }; | |||||
| 647 | } elsif($text =~ /husband (?:to|of) the late\s([\w\s]+)[\s\.]/i) { | |||||
| 648 | 1 | 3 | $family{'spouse'} ||= []; | |||
| 649 | ||||||
| 650 | 1 1 | 0 3 | push @{$family{'spouse'}}, { name => $1, status => 'deceased' } | |||
| 651 | } elsif($text =~ /\b(?:wife|husband) of ([^.,;]+)/i) { | |||||
| 652 | 2 | 5 | $family{'spouse'} ||= []; | |||
| 653 | ||||||
| 654 | 2 2 | 1 15 | push @{$family{'spouse'}}, { name => $1 } | |||
| 655 | } elsif($text =~ /\bsurvived by her husband ([^.,;]+)/i) { | |||||
| 656 | 1 1 | 1 2 | push @{$family{'spouse'}}, { name => $1, 'status' => 'living', 'sex' => 'M' } | |||
| 657 | } elsif($text =~ /\bsurvived by his wife[,\s]+([^.,;]+)/i) { | |||||
| 658 | 2 2 | 2 5 | push @{$family{'spouse'}}, { name => $1, 'status' => 'living', 'sex' => 'F' } | |||
| 659 | } | |||||
| 660 | ||||||
| 661 | # Ensure spouse location is properly handled | |||||
| 662 | 13 | 36 | if(exists $family{spouse} && (ref $family{'spouse'} eq 'HASH') && defined $family{spouse}[0]{location} && $family{spouse}[0]{location} eq 'the late') { | |||
| 663 | 0 | 0 | delete $family{spouse}[0]{location}; | |||
| 664 | } | |||||
| 665 | ||||||
| 666 | # Extract the funeral information | |||||
| 667 | 13 | 215 | if($text =~ /funeral service.*?at\s+(.+?),?\s+on\s+(.*?),?\s+at\s+(.+?)\./) { | |||
| 668 | $family{funeral} = { | |||||
| 669 | 1 | 3 | location => $1, | |||
| 670 | date => $2, | |||||
| 671 | time => $3, | |||||
| 672 | }; | |||||
| 673 | } elsif($text =~ /funeral service.*?at\s+([^\n]+?)\s+on\s+([^\n]+)\s+at\s+([^\n]+)/i) { | |||||
| 674 | $family{funeral} = { | |||||
| 675 | 1 | 3 | location => $1, | |||
| 676 | date => $2, | |||||
| 677 | time => $3, | |||||
| 678 | }; | |||||
| 679 | 1 | 3 | if($family{'funeral'}->{'date'} =~ /(.+?)\.\s{2,}/) { | |||
| 680 | 1 | 1 | $family{'funeral'}->{'date'} = $1; | |||
| 681 | 1 | 2 | if($family{'funeral'}->{'date'} =~ /(.+?)\sat\s(.+)/) { | |||
| 682 | # Wednesday 9th March at 1.15pm. Friends etc. etc. | |||||
| 683 | 1 | 1 | $family{'funeral'}->{'date'} = $1; | |||
| 684 | 1 | 2 | $family{'funeral'}->{'time'} = $2; | |||
| 685 | } | |||||
| 686 | } | |||||
| 687 | } elsif($text =~ /funeral services.+\sat\s(.+)\sat\s(.+),\swith\s/i) { | |||||
| 688 | $family{funeral} = { | |||||
| 689 | 1 | 3 | time => $1, | |||
| 690 | location => $2 | |||||
| 691 | }; | |||||
| 692 | } elsif($text =~ /funeral services.+\sat\s(.+),\swith\s/i) { | |||||
| 693 | 0 | 0 | $family{funeral} = { location => $1 } | |||
| 694 | } elsif($text =~ /services.+\sat\s(.+),\swith\s/i) { | |||||
| 695 | 3 | 6 | $family{funeral} = { location => $1 } | |||
| 696 | } | |||||
| 697 | ||||||
| 698 | # Extract father-in-law and mother-in-law information (if present) | |||||
| 699 | 13 | 25 | if($text =~ /father-in-law to\s+([A-Za-z\s]+)/) { | |||
| 700 | 1 | 1 | my $father_in_law = $1; | |||
| 701 | 1 | 2 | $family{children_in_law} = [{ name => $father_in_law }]; | |||
| 702 | } elsif($text =~ /mother-in-law to\s+([A-Za-z\s]+)/i) { | |||||
| 703 | 1 | 1 | my $mother_in_law = $1; | |||
| 704 | 1 | 6 | $family{children_in_law} = [ split /\s*(?:,|and)\s*/i, ($text =~ /mother-in-law to\s+([^\.;]+)/i)[0] || '' ]; | |||
| 705 | 1 | 3 | if(scalar($family{children_in_law} == 0)) { | |||
| 706 | 0 | 0 | $family{children_in_law} = [{ name => $mother_in_law }]; | |||
| 707 | } | |||||
| 708 | } | |||||
| 709 | ||||||
| 710 | # Extract aunt information | |||||
| 711 | 13 | 27 | if($text =~ /niece of\s+([A-Za-z]+)/) { | |||
| 712 | 1 | 1 | my $aunt = $1; | |||
| 713 | 1 | 2 | $family{aunt} = [{ 'name' => $aunt }]; | |||
| 714 | } | |||||
| 715 | ||||||
| 716 | # Birth info | |||||
| 717 | 13 | 1024 | if($text =~ /[^\b]Born in ([^,]+),.*?\b(?:on\s+)?([A-Z][a-z]+ \d{1,2}, \d{4})/i) { | |||
| 718 | $family{birth} = { | |||||
| 719 | 1 | 2 | place => $1, | |||
| 720 | date => $2, | |||||
| 721 | } | |||||
| 722 | } elsif($text =~ /[^\b]Born in ([a-z,\.\s]+)\s+on\s+(.+)/i) { | |||||
| 723 | 1 | 7 | $family{'birth'}->{'place'} = $1; | |||
| 724 | 1 | 2 | if(my $location = _extract_location($1)) { | |||
| 725 | 0 | 0 | $family{'birth'}->{'location'} = $location; | |||
| 726 | } | |||||
| 727 | 1 | 2 | if(my $dt = _extract_date($2)) { | |||
| 728 | 1 | 13 | $family{'birth'}->{date} = $dt->ymd('/'); | |||
| 729 | } | |||||
| 730 | 1 | 18 | $family{'birth'}->{'place'} =~ s/\s+$//; | |||
| 731 | } elsif($text =~ /S?he was born (.+)\sin ([a-z,\.\s]+)\s+to\s+(.+?)\sand\s(.+?)\./i) { | |||||
| 732 | 1 | 2 | $family{'birth'}->{'place'} = $2; | |||
| 733 | 1 | 1 | my $father = $3; | |||
| 734 | 1 | 1 | my $mother = $4; | |||
| 735 | 1 | 1 | eval { | |||
| 736 | 1 | 2 | if(my $dt = DateTime::Format::Text->parse_datetime($1)) { | |||
| 737 | 1 | 172 | $family{'birth'}->{date} = $dt->ymd('/'); | |||
| 738 | } | |||||
| 739 | }; | |||||
| 740 | # TODO | |||||
| 741 | # if($verbose && $@) { | |||||
| 742 | # Carp::carp($@); | |||||
| 743 | # } | |||||
| 744 | 1 | 11 | if($mother =~ /(.+)\s+\((.+)\)\s+(.+)/) { | |||
| 745 | 1 | 2 | $mother = "$1 $2"; | |||
| 746 | } | |||||
| 747 | 1 | 2 | if($father =~ /(.+?)\.\s\s/) { | |||
| 748 | 0 | 0 | $father = $1; | |||
| 749 | } | |||||
| 750 | $family{parents} = { | |||||
| 751 | 1 | 2 | father => { name => $father }, | |||
| 752 | mother => { name => $mother } | |||||
| 753 | }; | |||||
| 754 | 1 | 6 | if($text =~ /survived by (his|her) (father|mother)[\s,;]/i) { | |||
| 755 | 1 | 2 | $family{parents}->{$2}->{'status'} = 'living'; | |||
| 756 | } | |||||
| 757 | } elsif($text =~ /[^\b]S?he was born\s*(?:on\s+)?([A-Z][a-z]+ \d{1,2}, \d{4})[,\s]+(?:in\s+)([^,]+)?/i) { | |||||
| 758 | 1 | 2 | if(my $dt = _extract_date($1)) { | |||
| 759 | 1 | 7 | $family{'birth'}->{date} = $dt->ymd('/'); | |||
| 760 | } | |||||
| 761 | 1 | 8 | if($2) { | |||
| 762 | 1 | 2 | $family{'birth'}->{'location'} = $2; | |||
| 763 | } | |||||
| 764 | } | |||||
| 765 | ||||||
| 766 | # Date of death | |||||
| 767 | 13 | 139 | if($text =~ /\bpassed away\b.*?\b(?:on\s+)?([A-Z]+ \d{1,2}, \d{4})/i) { | |||
| 768 | 2 | 2 | $family{death}->{date} = $1; | |||
| 769 | 2 | 3 | if(my $dt = _extract_date($1)) { | |||
| 770 | 2 | 11 | $family{death}->{datetime} = $dt; | |||
| 771 | } | |||||
| 772 | } | |||||
| 773 | ||||||
| 774 | # Age at death | |||||
| 775 | 13 | 21 | if($text =~ /,\s(\d{1,3}), of\s/) { | |||
| 776 | 2 | 8 | if($1 < 110) { | |||
| 777 | 2 | 3 | $family{'death'}->{'age'} = $1; | |||
| 778 | } | |||||
| 779 | } | |||||
| 780 | ||||||
| 781 | # Place of death | |||||
| 782 | 13 | 163 | if($text =~ /\b(?:passed away|died)\b([a-z0-9\s,]+)\sat\s+(.+?)\./i) { | |||
| 783 | 3 | 4 | my $place = $2; | |||
| 784 | 3 | 284 | if($place =~ /(.+)\s+on\s+([A-Z]+ \d{1,2}, \d{4})/i) { | |||
| 785 | 0 | 0 | $place = $1; | |||
| 786 | 0 | 0 | $family{death}->{date} = $2; | |||
| 787 | } elsif($place =~ /(.+)\son\s(.+)/) { | |||||
| 788 | 1 | 1 | $place = $1; | |||
| 789 | 1 | 2 | if(my $dt = _extract_date($2)) { | |||
| 790 | 1 | 7 | $family{death}->{date} = $dt->ymd('/'); | |||
| 791 | } | |||||
| 792 | } | |||||
| 793 | 3 | 12 | $place =~ s/^\bthe residence,\s//; | |||
| 794 | 3 | 4 | $place =~ s/\bafter a.*$//; | |||
| 795 | 3 | 3 | $place =~ s/,\s+$//; | |||
| 796 | 3 | 4 | $family{death}->{place} = $place; | |||
| 797 | } | |||||
| 798 | ||||||
| 799 | # Remove blank fields from the main family hash | |||||
| 800 | 13 69 69 | 20 72 128 | %family = map { $_ => $family{$_} } grep { defined $family{$_} && $family{$_} ne '' } keys %family; | |||
| 801 | ||||||
| 802 | # Remove empty arrays the family hash | |||||
| 803 | 13 | 19 | foreach my $key (keys %family) { | |||
| 804 | 69 | 69 | if(ref($family{$key}) eq 'ARRAY') { | |||
| 805 | 50 75 50 | 32 88 39 | $family{$key} = [ grep { /\S/ } @{$family{$key}} ]; | |||
| 806 | 50 50 | 36 57 | if(@{$family{$key}} == 0) { | |||
| 807 | 12 | 12 | delete $family{$key}; | |||
| 808 | } | |||||
| 809 | } | |||||
| 810 | } | |||||
| 811 | ||||||
| 812 | 13 | 17 | return if(!scalar keys(%family)); | |||
| 813 | ||||||
| 814 | 13 | 28 | return Return::Set::set_return(\%family, { type => 'hashref', 'min' => 1, 'max' => 10 }); | |||
| 815 | } | |||||
| 816 | ||||||
| 817 | sub _extract_date | |||||
| 818 | { | |||||
| 819 | 5 | 5 | my $text = shift; | |||
| 820 | 5 | 10 | my $parser = DateTime::Format::Text->new(); | |||
| 821 | 5 | 19 | my $dt; | |||
| 822 | ||||||
| 823 | 5 5 | 6 8 | eval { $dt = $parser->parse_datetime($text); }; | |||
| 824 | 5 | 1218 | return $dt if $dt && !$@; | |||
| 825 | 0 | 0 | return undef; | |||
| 826 | } | |||||
| 827 | ||||||
| 828 | sub _extract_location { | |||||
| 829 | 1 | 3 | my $place_text = shift; | |||
| 830 | ||||||
| 831 | 1 | 3 | unless($geocoder) { | |||
| 832 | 1 1 | 1 85 | eval { require Geo::Coder::Free }; | |||
| 833 | 1 | 335 | if($@) { | |||
| 834 | 1 | 13 | Carp::carp(__PACKAGE__, ' (', __LINE__, "): geocoding locations disabled: $@"); | |||
| 835 | 1 | 559 | return; | |||
| 836 | } | |||||
| 837 | 0 | $geocoder = Geo::Coder::Free->new(); | ||||
| 838 | } | |||||
| 839 | ||||||
| 840 | 0 | my @locations = $geocoder->geocode(location => $place_text); # Use array to improve caching | ||||
| 841 | ||||||
| 842 | 0 | return unless scalar(@locations); | ||||
| 843 | ||||||
| 844 | 0 | my $result = $locations[0]; | ||||
| 845 | ||||||
| 846 | 0 | if(ref($result)) { | ||||
| 847 | return { | |||||
| 848 | 0 | raw => $place_text, | ||||
| 849 | # city => $result->{components}{city} || $result->{components}{town}, | |||||
| 850 | # region => $result->{components}{state}, | |||||
| 851 | # country => $result->{components}{country}, | |||||
| 852 | latitude => $result->latitude(), | |||||
| 853 | longitude => $result->longitude() | |||||
| 854 | }; | |||||
| 855 | } | |||||
| 856 | return { | |||||
| 857 | raw => $place_text, | |||||
| 858 | # city => $result->{components}{city} || $result->{components}{town}, | |||||
| 859 | # region => $result->{components}{state}, | |||||
| 860 | # country => $result->{components}{country}, | |||||
| 861 | latitude => $result->{'latitude'}, | |||||
| 862 | 0 | longitude => $result->{'longitude'} | ||||
| 863 | }; | |||||
| 864 | } | |||||
| 865 | ||||||
| 866 | ||||||
| 867 - 897 | =head1 AUTHOR Nigel Horne, C<< <njh at nigelhorne.com> >> =head1 SEE ALSO Test coverage report: L<https://nigelhorne.github.io/Genealogy-Obituary-Parser/coverage/> =head1 SUPPORT This module is provided as-is without any warranty. =head1 LICENCE AND COPYRIGHT Copyright 2025 Nigel Horne. Usage is subject to licence terms. The licence terms of this software are as follows: =over 4 =item * Personal single user, single computer use: GPL2 =item * All other users (including Commercial, Charity, Educational, Government) must apply in writing for a licence for use from Nigel Horne at the above e-mail. =back =cut | |||||
| 898 | ||||||
| 899 | 1; | |||||