File Coverage

File:	blib/lib/Geo/Address/Parser/Rules/IRL.pm
Coverage:	80.4%

line	stmt	bran	cond	sub	time	code
1						package Geo::Address::Parser::Rules::IRL;
2
3	1 1 1			1	125366 1 16	use strict;
4	1 1 1			1	1 1 18	use warnings;
5	1 1 1			1	3 1 2	use utf8;
6
7						# use Geo::Coder::Abbreviations;
8	1 1 1			1	206 1200 351	use Text::Capitalize qw(capitalize_title);
9
10 - 36						=head1 NAME Geo::Address::Parser::Rules::IRL - Parsing rules for Irish addresses =head1 DESCRIPTION Parses a flat Irish address string into components: name, road, city, and postcode. =head1 EXPORTS =head2 parse_address($text) Returns a hashref with keys: =over =item * name =item * road =item * city =item * postcode =back =cut
37
38						our $VERSION = '0.07';
39
40						# heuristics for detecting building/venue names
41						my $BUILDING_RE = qr/\b(?:house\|hall\|mill\|centre\|center\|museum\|church\|hotel\|inn\|club\|school\|library\|theatre)\b/i;
42
43						# Eircode-ish pattern (basic)
44						my $eircode_re = qr/\b[A-Z0-9]{3}\s?[A-Z0-9]{4}\b/i;
45
46						sub parse_address {
47	4			4	10257	my ($class, $text) = @_;
48	4	50			5	return unless defined $text;
49
50						# Basic normalisation
51	4				16	$text =~ s/^\s+\|\s+$//g;
52	4				8	$text =~ s/\s{2,}/ /g;
53
54						# Expand abbreviations if available
55						# my $abbrev;
56						# eval { $abbrev = Geo::Coder::Abbreviations->new; 1 } or $abbrev = undef;
57						# if ($abbrev) {
58						# eval { $text = $abbrev->expand($text) // $text; 1 } or do { /* keep original */ };
59						# }
60
61						# Split into comma parts and trim
62	4 13				5 32	my @parts = map { s/^\s+\|\s+$//gr } split /,/, $text;
63	4 13				5 11	@parts = grep { length $_ } @parts; # drop empty parts
64
65						# Remove trailing explicit country token (Ireland/Ã‰ire)
66	4	100	67		13	if (@parts and $parts[-1] =~ /^(?:ireland\|Ã©ire)$/i) {
67	2				1	pop @parts;
68						}
69
70						# Try to extract an Eircode from the last part (or anywhere in last part)
71	4				4	my $postal_code;
72	4	100	67		38	if (@parts and $parts[-1] =~ /($eircode_re)/) {
73	1				2	$postal_code = uc $1;
74	1				6	$parts[-1] =~ s/\Q$1\E//i;
75	1				1	$parts[-1] =~ s/^\s+\|\s+$//g;
76	1	50			3	pop @parts if $parts[-1] eq '';
77						}
78
79						# Detect "Co. CountyName" in the last part
80	4				2	my $region;
81	4	100	67		9	if (@parts and $parts[-1] =~ /^co\.?\s*(.+)$/i) {
82	2				5	$region = capitalize_title(lc $1);
83	2				106	pop @parts;
84						}
85
86						# Prepare result fields
87	4				3	my ($name, $road, $city);
88	4				4	my $n = scalar @parts;
89
90	4	50 50 50			5	if ($n == 0) {
91						# nothing left; return at least country/postal if present
92						return {
93	0				0	name => undef,
94						road => undef,
95						city => undef,
96						region => $region,
97						postal_code => $postal_code,
98						country => 'Ireland',
99						};
100						} elsif ($n == 1) {
101						# Single token: assume it's a road/locality
102	0				0	$road = capitalize_title(lc $parts[0]);
103	0				0	$city = undef;
104						} elsif ($n == 2) {
105						# Two tokens â€” ambiguous: decide if first is a building name
106	4	100			10	if ($parts[0] =~ $BUILDING_RE) {
107	1				2	$name = capitalize_title(lc $parts[0]);
108	1				71	$road = capitalize_title(lc $parts[1]); # treat locality as road too
109	1				46	$city = $road;
110						} else {
111						# likely "road, city"
112	3				4	$road = capitalize_title(lc $parts[0]);
113	3				246	$city = capitalize_title(lc $parts[1]);
114						}
115						} else { # n >= 3
116						# typical: [maybe-building-name..., road, city]
117	0				0	$city = capitalize_title(lc $parts[-1]);
118	0				0	$road = capitalize_title(lc $parts[-2]);
119
120						# everything before that is the name (may be empty)
121	0				0	my @name_parts = @parts[0 .. $n - 3];
122	0 0	0			0 0	$name = join(', ', map { capitalize_title(lc $_) } @name_parts) if @name_parts;
123						}
124
125	4	100			174	undef $road if($road eq $city);
126
127						# Fix Irish O' prefixes â€” e.g., O'connell => O'Connell
128	4 1	100			6 2	$road =~ s/\bO'([a-z])/"O'" . uc($1)/ge if($road);
129
130						# Final result
131	4				10	my %result = (
132						name => $name,
133						road => $road,
134						city => $city,
135						region => $region,
136						postal_code => $postal_code,
137						country => 'Ireland',
138						);
139
140	4				8	return \%result;
141						}
142
143						1;