File Coverage

File:blib/lib/Geo/Address/Parser.pm
Coverage:90.1%

linestmtbrancondsubtimecode
1package Geo::Address::Parser;
2
3
7
7
287746
12
use 5.014;
4
7
7
7
11
5
62
use strict;
5
7
7
7
8
4
114
use warnings;
6
7
7
7
7
13
5
183
use Carp;
8
7
7
7
1232
4840
14
use Module::Runtime qw(use_module);
9
7
7
7
1515
384775
114
use Object::Configure 0.16;
10
7
7
7
21
33
108
use Params::Get 0.13;
11
7
7
7
18
6
116
use Params::Validate::Strict qw(validate_strict);
12
7
7
7
8
36
75
use Return::Set 0.02;
13
7
7
7
1379
9179
1841
use Text::Capitalize 'capitalize_title';
14
15 - 23
=head1 NAME

Geo::Address::Parser - Lightweight country-aware address parser from flat text

=head1 VERSION

Version 0.07

=cut
24
25our $VERSION = '0.07';
26
27# Supported countries and their corresponding rule modules
28my %COUNTRY_MODULE = (
29        US => 'Geo::Address::Parser::Rules::US',
30        USA => 'Geo::Address::Parser::Rules::US',
31        UK => 'Geo::Address::Parser::Rules::UK',
32        GB => 'Geo::Address::Parser::Rules::UK',
33        CA => 'Geo::Address::Parser::Rules::CA',
34        'CANADA' => 'Geo::Address::Parser::Rules::CA',
35        AU => 'Geo::Address::Parser::Rules::AU',
36        'AUSTRALIA' => 'Geo::Address::Parser::Rules::AU',
37        IE => 'Geo::Address::Parser::Rules::IRL',     # Ireland ISO code
38        IRL => 'Geo::Address::Parser::Rules::IRL',    # 3-letter code
39        'IRELAND' => 'Geo::Address::Parser::Rules::IRL', # Full name
40        NZ => 'Geo::Address::Parser::Rules::NZ',
41        'NEW ZEALAND' => 'Geo::Address::Parser::Rules::NZ',
42);
43
44 - 108
=head1 METHODS

=head1 SYNOPSIS

    use Geo::Address::Parser;

    my $parser = Geo::Address::Parser->new(country => 'US');

    my $result = $parser->parse("Mastick Senior Center, 1525 Bay St, Alameda, CA");

=encoding utf-8

=head1 DESCRIPTION

This module extracts address components from flat text input. It supports
lightweight parsing for the US, UK, Canada, Australia, and New Zealand, using
country-specific regular expressions.

The class can be configured at runtime using environments and configuration files,
for example,
setting C<$ENV{'GEO__ADDRESS__PARSER__carp_on_warn'}> causes warnings to use L<Carp>.
For more information about runtime configuration,
see L<Object::Configure>.

=head2 new(country)

Creates a new parser for a specific country (US, UK, CA, AU, NZ).

=head3 FORMAL SPECIFICATION

    [COUNTRY]

    GeoAddressParserNew
    ====================
    country? : COUNTRY
    supported : â„™ COUNTRY
    parser! : Parser

    supported = {US, UK, CA, AU, NZ}
    country? ∈ supported
    parser! = parserFor(country?)

=head3 API SPECIFICATION

=head4 INPUT

  {
    'country' => {
      'type' => 'string', 'min' => 2, 'matches' => qr/^[A-Za-z\s]+$/
    }
  }

=head4 OUTPUT

=over 4

=item * Error: log (if set); croak

=item * Can't parse: undef

=item * Otherwise: Geo::Address::Parser object

=back

=cut
109
110sub new {
111
9
436484
        my $class = shift;
112
113
9
24
        my $params = Params::Validate::Strict::validate_strict({
114                args => Params::Get::get_params('country', \@_),
115                schema => {
116                        'country' => { 'type' => 'string', 'min' => 2, 'matches' => qr/^[A-Za-z\s]+$/ }
117                }
118        });
119
120
8
829
        $params = Object::Configure::configure($class, $params);
121
122
8
149759
        my $country = uc($params->{'country'});
123
8
14
        my $module = $COUNTRY_MODULE{$country};
124
8
14
        if(!defined($module)) {
125
1
3
                if($params->{'logger'}) {
126
1
5
                        $params->{'logger'}->warn("Unsupported country: $country");
127                }
128
1
116
                croak("Unsupported country: $country");
129        }
130
131        # Load the appropriate parser module dynamically
132
7
18
        use_module($module);
133
134        return bless {
135
7
7
39
39
                %{$params},
136                country => $country,
137                module => $module
138        }, $class;
139}
140
141 - 203
=head2 parse

Takes a string and returns a hashref with the following fields:

=over

=item * name

=item * road

=item * city

=item * region

=item * country

=back

=head3 API SPECIFICATION

=head4 INPUT

  {
    'text' => { 'type' => 'string', 'min' => 2
  }

=head4 OUTPUT

=over 4

=item * Error: log (if set); croak

=item * Can't parse: undef

=item * Otherwise:

  {
    'type' => 'hashref', 'min' => 2
  }

=back

=head3 FORMAL SPECIFICATION

    [TEXT, COUNTRY, FIELD, VALUE]

    GeoAddressParserState
    ======================
    country : COUNTRY
    parser : COUNTRY ↛ (TEXT ↛ FIELD ↛ VALUE)

    GeoAddressParserParse
    ======================
    Î”GeoAddressParserState
    text? : TEXT
    result! : FIELD ↛ VALUE

    text? ≠ ∅
    country ∈ dom parser
    result! = (parser(country))(text?)
    result!("country") = country

=cut
204
205sub parse
206{
207
9
3151
        my $self = shift;
208
209
9
19
        my $params = Params::Validate::Strict::validate_strict({
210                args => Params::Get::get_params('text', \@_),
211                schema => {
212                        'text' => { 'type' => 'string', 'min' => 2 }
213                }
214        });
215
216
9
707
        if(!defined($params)) {
217
0
0
                croak(__PACKAGE__, '::parse: Usage($text => string)');
218        }
219
9
14
        my $text = $params->{'text'};
220
9
13
        if(!defined($text)) {
221
0
0
                croak(__PACKAGE__, '::parse: Usage($text => string)');
222        }
223
224
9
21
        my $parser = $self->{module};
225
226        # Strip extra whitespace
227
9
44
        $text =~ s/\s+/ /g;
228
9
12
        $text =~ s/^\s//g;
229
9
12
        $text =~ s/\s$//g;
230
9
13
        $text =~ s/\s,/,/g;
231
232
9
20
        if(my $result = $parser->parse_address($text)) {
233                # FIXME: The code addeth and the code taketh away.  It shouldn't addeth in the first place
234
9
9
8
16
                for my $key (keys %{$result}) {
235
47
42
                        delete $result->{$key} unless defined $result->{$key};
236                }
237                # Add country field to result if not already present
238
9
32
                $result->{country} //= $self->{country} if $result;
239
240
9
39
                $result->{'name'} = capitalize_title($result->{'name'}) if($result->{'name'});
241
242                # Returns a hashref with at least two items: name and country
243
9
1016
                return Return::Set::set_return($result, { 'type' => 'hashref', 'min' => 2 });
244        }
245}
246
247 - 285
=head1 SUPPORT

This module is provided as-is without any warranty.

Please report any bugs or feature requests to C<bug-geo-address-parser at rt.cpan.org>,
or through the web interface at
L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Geo-Address-Parser>.
I will be notified, and then you'll
automatically be notified of progress on your bug as I make changes.

=head1 SEE ALSO

=over 4

=item * L<Test Dashboard|https://nigelhorne.github.io/Geo-Address-Parser/coverage/>

=item * L<Object::Configure>

=back

=head1 LICENCE AND COPYRIGHT

Copyright 2025-2026 Nigel Horne.

Usage is subject to licence terms.

The licence terms of this software are as follows:

=over 4

=item * Personal single user, single computer use: GPL2

=item * All other users (including Commercial, Charity, Educational, Government)
  must apply in writing for a licence for use from Nigel Horne at the
  above e-mail.

=back

=cut
286
2871;