File Coverage

File:blib/lib/Geo/Address/Parser/Rules/IRL.pm
Coverage:80.4%

linestmtbrancondsubtimecode
1package Geo::Address::Parser::Rules::IRL;
2
3
1
1
1
125366
1
16
use strict;
4
1
1
1
1
1
18
use warnings;
5
1
1
1
3
1
2
use utf8;
6
7# use Geo::Coder::Abbreviations;
8
1
1
1
206
1200
351
use Text::Capitalize qw(capitalize_title);
9
10 - 36
=head1 NAME

Geo::Address::Parser::Rules::IRL - Parsing rules for Irish addresses

=head1 DESCRIPTION

Parses a flat Irish address string into components: name, road, city, and postcode.

=head1 EXPORTS

=head2 parse_address($text)

Returns a hashref with keys:

=over

=item * name

=item * road

=item * city

=item * postcode

=back

=cut
37
38our $VERSION = '0.07';
39
40# heuristics for detecting building/venue names
41my $BUILDING_RE = qr/\b(?:house|hall|mill|centre|center|museum|church|hotel|inn|club|school|library|theatre)\b/i;
42
43# Eircode-ish pattern (basic)
44my $eircode_re = qr/\b[A-Z0-9]{3}\s?[A-Z0-9]{4}\b/i;
45
46sub parse_address {
47
4
10257
        my ($class, $text) = @_;
48
4
5
        return unless defined $text;
49
50        # Basic normalisation
51
4
16
        $text =~ s/^\s+|\s+$//g;
52
4
8
        $text =~ s/\s{2,}/ /g;
53
54        # Expand abbreviations if available
55        # my $abbrev;
56        # eval { $abbrev = Geo::Coder::Abbreviations->new; 1 } or $abbrev = undef;
57        # if ($abbrev) {
58                # eval { $text = $abbrev->expand($text) // $text; 1 } or do { /* keep original */ };
59        # }
60
61        # Split into comma parts and trim
62
4
13
5
32
        my @parts = map { s/^\s+|\s+$//gr } split /,/, $text;
63
4
13
5
11
        @parts = grep { length $_ } @parts;     # drop empty parts
64
65        # Remove trailing explicit country token (Ireland/Éire)
66
4
13
        if (@parts and $parts[-1] =~ /^(?:ireland|éire)$/i) {
67
2
1
                pop @parts;
68        }
69
70        # Try to extract an Eircode from the last part (or anywhere in last part)
71
4
4
        my $postal_code;
72
4
38
        if (@parts and $parts[-1] =~ /($eircode_re)/) {
73
1
2
                $postal_code = uc $1;
74
1
6
                $parts[-1] =~ s/\Q$1\E//i;
75
1
1
                $parts[-1] =~ s/^\s+|\s+$//g;
76
1
3
                pop @parts if $parts[-1] eq '';
77        }
78
79        # Detect "Co. CountyName" in the last part
80
4
2
        my $region;
81
4
9
        if (@parts and $parts[-1] =~ /^co\.?\s*(.+)$/i) {
82
2
5
                $region = capitalize_title(lc $1);
83
2
106
                pop @parts;
84        }
85
86        # Prepare result fields
87
4
3
        my ($name, $road, $city);
88
4
4
        my $n = scalar @parts;
89
90
4
5
        if ($n == 0) {
91                # nothing left; return at least country/postal if present
92                return {
93
0
0
                        name => undef,
94                        road => undef,
95                        city => undef,
96                        region => $region,
97                        postal_code => $postal_code,
98                        country => 'Ireland',
99                };
100        } elsif ($n == 1) {
101                # Single token: assume it's a road/locality
102
0
0
                $road = capitalize_title(lc $parts[0]);
103
0
0
                $city = undef;
104        } elsif ($n == 2) {
105                # Two tokens — ambiguous: decide if first is a building name
106
4
10
                if ($parts[0] =~ $BUILDING_RE) {
107
1
2
                        $name = capitalize_title(lc $parts[0]);
108
1
71
                        $road = capitalize_title(lc $parts[1]);  # treat locality as road too
109
1
46
                        $city = $road;
110                } else {
111                        # likely "road, city"
112
3
4
                        $road = capitalize_title(lc $parts[0]);
113
3
246
                        $city = capitalize_title(lc $parts[1]);
114                }
115        } else { # n >= 3
116                # typical: [maybe-building-name..., road, city]
117
0
0
                $city = capitalize_title(lc $parts[-1]);
118
0
0
                $road = capitalize_title(lc $parts[-2]);
119
120                # everything before that is the name (may be empty)
121
0
0
                my @name_parts = @parts[0 .. $n - 3];
122
0
0
0
0
                $name = join(', ', map { capitalize_title(lc $_) } @name_parts) if @name_parts;
123        }
124
125
4
174
        undef $road if($road eq $city);
126
127        # Fix Irish O' prefixes — e.g., O'connell => O'Connell
128
4
1
6
2
        $road =~ s/\bO'([a-z])/"O'" . uc($1)/ge if($road);
129
130        # Final result
131
4
10
        my %result = (
132                name => $name,
133                road => $road,
134                city => $city,
135                region => $region,
136                postal_code => $postal_code,
137                country => 'Ireland',
138        );
139
140
4
8
        return \%result;
141}
142
1431;