File Coverage

File:blib/lib/Geo/Address/Parser/Rules/UK.pm
Coverage:88.7%

linestmtbrancondsubtimecode
1our @EXPORT_OK = qw(parse_address);
2package Geo::Address::Parser::Rules::UK;
3
4
1
1
1
211
1
15
use strict;
5
1
1
1
2
1
18
use warnings;
6
7
1
1
1
1
6
296
use Exporter 'import';
8our @EXPORT_OK = qw(parse_address);
9
10our $VERSION = '0.07';
11
12 - 38
=head1 NAME

Geo::Address::Parser::Rules::UK - Parsing rules for UK addresses

=head1 DESCRIPTION

Parses a flat UK address string into components: name, road, city, and postcode.

=head1 EXPORTS

=head2 parse_address($text)

Returns a hashref with keys:

=over

=item * name

=item * road

=item * city

=item * postcode

=back

=cut
39
40my $postcode_re = qr/\b([A-Z]{1,2}\d{1,2}[A-Z]?)\s*(\d[A-Z]{2})\b/i;
41
42my %uk_countries = map { $_ => 1 } qw(England Scotland Wales 'Northern Ireland');
43
44my %uk_counties = map { $_ => 1 } (
45# England
46                'Bedfordshire', 'Berkshire', 'Bristol', 'Buckinghamshire', 'Cambridgeshire',
47                'Cheshire', 'City of London', 'Cornwall', 'Cumbria', 'Derbyshire',
48                'Devon', 'Dorset', 'Durham', 'East Riding of Yorkshire', 'East Sussex',
49                'Essex', 'Gloucestershire', 'Greater London', 'Greater Manchester', 'Hampshire',
50                'Herefordshire', 'Hertfordshire', 'Isle of Wight', 'Kent', 'Lancashire',
51                'Leicestershire', 'Lincolnshire', 'Merseyside', 'Norfolk', 'North Yorkshire',
52                'Northamptonshire', 'Northumberland', 'Nottinghamshire', 'Oxfordshire', 'Rutland',
53                'Shropshire', 'Somerset', 'South Yorkshire', 'Staffordshire', 'Suffolk',
54                'Surrey', 'Tyne and Wear', 'Warwickshire', 'West Midlands', 'West Sussex',
55                'West Yorkshire', 'Wiltshire', 'Worcestershire',
56# Scotland
57                'Aberdeenshire', 'Angus', 'Argyll and Bute', 'Clackmannanshire', 'Dumfries and Galloway',
58                'Dundee', 'East Ayrshire', 'East Dunbartonshire', 'East Lothian', 'East Renfrewshire',
59                'Edinburgh', 'Falkirk', 'Fife', 'Glasgow', 'Highland',
60                'Inverclyde', 'Midlothian', 'Moray', 'Na h-Eileanan Siar', 'North Ayrshire',
61                'North Lanarkshire', 'Orkney Islands', 'Perth and Kinross', 'Renfrewshire', 'Scottish Borders',
62                'Shetland Islands', 'South Ayrshire', 'South Lanarkshire', 'Stirling', 'West Dunbartonshire',
63                'West Lothian',
64# Wales
65                'Blaenau Gwent', 'Bridgend', 'Caerphilly', 'Cardiff', 'Carmarthenshire',
66                'Ceredigion', 'Conwy', 'Denbighshire', 'Flintshire', 'Gwynedd',
67                'Isle of Anglesey', 'Merthyr Tydfil', 'Monmouthshire', 'Neath Port Talbot', 'Newport',
68                'Pembrokeshire', 'Powys', 'Rhondda Cynon Taf', 'Swansea', 'Torfaen',
69                'Vale of Glamorgan', 'Wrexham',
70# Northern Ireland
71                'Antrim', 'Armagh', 'Belfast', 'Castlereagh', 'Coleraine',
72                'Cookstown', 'Craigavon', 'Down', 'Dungannon', 'Fermanagh',
73                'Larne', 'Limavady', 'Lisburn', 'Londonderry', 'Magherafelt',
74                'Moyle', 'Newry and Mourne', 'Newtownabbey', 'North Down', 'Omagh',
75                'Strabane', 'Tyrone'
76);
77
78sub parse_address {
79
2
2
        my ($class, $text) = @_;
80
2
2
        return unless defined $text;
81
82
2
9
3
19
        my @parts = map { s/^\s+|\s+$//gr } split /,/, $text;
83
2
9
3
6
        @parts = grep { length $_ } @parts;
84
85
2
2
        my ($name, $road, $city, $county, $postcode, $country);
86
87        # Remove trailing country if present
88
2
5
        if(@parts && exists $uk_countries{$parts[-1]}) {
89
1
1
                $country = 'UK';
90
1
1
                pop @parts;
91        }
92
93        # Look for postcode at end
94
2
6
        if(@parts && $parts[-1] =~ /$postcode_re/) {
95
1
2
                $postcode = uc("$1 $2");
96
1
1
                pop @parts;
97        }
98
99        # Check if last remaining token is a county
100
2
4
        if(@parts && exists $uk_counties{$parts[-1]}) {
101
1
1
                $county = pop @parts;
102        }
103
104        # Assign city: last remaining token
105
2
2
        if(@parts) {
106
2
2
                $city = pop @parts;
107        }
108
109        # Determine road and name
110
2
1
        if(@parts) {
111                # Heuristic: if first remaining token contains a number, treat it as road
112
2
7
                if($parts[-1] =~ /\d/) {
113
1
1
                        $road = pop @parts;
114                }
115        }
116
117        # Remaining tokens form name
118
2
4
        $name = join(', ', @parts) if @parts;
119
120        return {
121
2
8
                name => $name,
122                road => $road,
123                city => $city,
124                county => $county,
125                postcode => $postcode,
126                country => $country // 'UK',
127        };
128}
129
1301;