| File: | blib/lib/Geo/Address/Parser.pm |
| Coverage: | 90.1% |
| line | stmt | bran | cond | sub | time | code |
|---|---|---|---|---|---|---|
| 1 | package Geo::Address::Parser; | |||||
| 2 | ||||||
| 3 | 7 7 | 287746 12 | use 5.014; | |||
| 4 | 7 7 7 | 11 5 62 | use strict; | |||
| 5 | 7 7 7 | 8 4 114 | use warnings; | |||
| 6 | ||||||
| 7 | 7 7 7 | 13 5 183 | use Carp; | |||
| 8 | 7 7 7 | 1232 4840 14 | use Module::Runtime qw(use_module); | |||
| 9 | 7 7 7 | 1515 384775 114 | use Object::Configure 0.16; | |||
| 10 | 7 7 7 | 21 33 108 | use Params::Get 0.13; | |||
| 11 | 7 7 7 | 18 6 116 | use Params::Validate::Strict qw(validate_strict); | |||
| 12 | 7 7 7 | 8 36 75 | use Return::Set 0.02; | |||
| 13 | 7 7 7 | 1379 9179 1841 | use Text::Capitalize 'capitalize_title'; | |||
| 14 | ||||||
| 15 - 23 | =head1 NAME Geo::Address::Parser - Lightweight country-aware address parser from flat text =head1 VERSION Version 0.07 =cut | |||||
| 24 | ||||||
| 25 | our $VERSION = '0.07'; | |||||
| 26 | ||||||
| 27 | # Supported countries and their corresponding rule modules | |||||
| 28 | my %COUNTRY_MODULE = ( | |||||
| 29 | US => 'Geo::Address::Parser::Rules::US', | |||||
| 30 | USA => 'Geo::Address::Parser::Rules::US', | |||||
| 31 | UK => 'Geo::Address::Parser::Rules::UK', | |||||
| 32 | GB => 'Geo::Address::Parser::Rules::UK', | |||||
| 33 | CA => 'Geo::Address::Parser::Rules::CA', | |||||
| 34 | 'CANADA' => 'Geo::Address::Parser::Rules::CA', | |||||
| 35 | AU => 'Geo::Address::Parser::Rules::AU', | |||||
| 36 | 'AUSTRALIA' => 'Geo::Address::Parser::Rules::AU', | |||||
| 37 | IE => 'Geo::Address::Parser::Rules::IRL', # Ireland ISO code | |||||
| 38 | IRL => 'Geo::Address::Parser::Rules::IRL', # 3-letter code | |||||
| 39 | 'IRELAND' => 'Geo::Address::Parser::Rules::IRL', # Full name | |||||
| 40 | NZ => 'Geo::Address::Parser::Rules::NZ', | |||||
| 41 | 'NEW ZEALAND' => 'Geo::Address::Parser::Rules::NZ', | |||||
| 42 | ); | |||||
| 43 | ||||||
| 44 - 108 | =head1 METHODS
=head1 SYNOPSIS
use Geo::Address::Parser;
my $parser = Geo::Address::Parser->new(country => 'US');
my $result = $parser->parse("Mastick Senior Center, 1525 Bay St, Alameda, CA");
=encoding utf-8
=head1 DESCRIPTION
This module extracts address components from flat text input. It supports
lightweight parsing for the US, UK, Canada, Australia, and New Zealand, using
country-specific regular expressions.
The class can be configured at runtime using environments and configuration files,
for example,
setting C<$ENV{'GEO__ADDRESS__PARSER__carp_on_warn'}> causes warnings to use L<Carp>.
For more information about runtime configuration,
see L<Object::Configure>.
=head2 new(country)
Creates a new parser for a specific country (US, UK, CA, AU, NZ).
=head3 FORMAL SPECIFICATION
[COUNTRY]
GeoAddressParserNew
====================
country? : COUNTRY
supported : â COUNTRY
parser! : Parser
supported = {US, UK, CA, AU, NZ}
country? â supported
parser! = parserFor(country?)
=head3 API SPECIFICATION
=head4 INPUT
{
'country' => {
'type' => 'string', 'min' => 2, 'matches' => qr/^[A-Za-z\s]+$/
}
}
=head4 OUTPUT
=over 4
=item * Error: log (if set); croak
=item * Can't parse: undef
=item * Otherwise: Geo::Address::Parser object
=back
=cut | |||||
| 109 | ||||||
| 110 | sub new { | |||||
| 111 | 9 | 436484 | my $class = shift; | |||
| 112 | ||||||
| 113 | 9 | 24 | my $params = Params::Validate::Strict::validate_strict({ | |||
| 114 | args => Params::Get::get_params('country', \@_), | |||||
| 115 | schema => { | |||||
| 116 | 'country' => { 'type' => 'string', 'min' => 2, 'matches' => qr/^[A-Za-z\s]+$/ } | |||||
| 117 | } | |||||
| 118 | }); | |||||
| 119 | ||||||
| 120 | 8 | 829 | $params = Object::Configure::configure($class, $params); | |||
| 121 | ||||||
| 122 | 8 | 149759 | my $country = uc($params->{'country'}); | |||
| 123 | 8 | 14 | my $module = $COUNTRY_MODULE{$country}; | |||
| 124 | 8 | 14 | if(!defined($module)) { | |||
| 125 | 1 | 3 | if($params->{'logger'}) { | |||
| 126 | 1 | 5 | $params->{'logger'}->warn("Unsupported country: $country"); | |||
| 127 | } | |||||
| 128 | 1 | 116 | croak("Unsupported country: $country"); | |||
| 129 | } | |||||
| 130 | ||||||
| 131 | # Load the appropriate parser module dynamically | |||||
| 132 | 7 | 18 | use_module($module); | |||
| 133 | ||||||
| 134 | return bless { | |||||
| 135 | 7 7 | 39 39 | %{$params}, | |||
| 136 | country => $country, | |||||
| 137 | module => $module | |||||
| 138 | }, $class; | |||||
| 139 | } | |||||
| 140 | ||||||
| 141 - 203 | =head2 parse
Takes a string and returns a hashref with the following fields:
=over
=item * name
=item * road
=item * city
=item * region
=item * country
=back
=head3 API SPECIFICATION
=head4 INPUT
{
'text' => { 'type' => 'string', 'min' => 2
}
=head4 OUTPUT
=over 4
=item * Error: log (if set); croak
=item * Can't parse: undef
=item * Otherwise:
{
'type' => 'hashref', 'min' => 2
}
=back
=head3 FORMAL SPECIFICATION
[TEXT, COUNTRY, FIELD, VALUE]
GeoAddressParserState
======================
country : COUNTRY
parser : COUNTRY â (TEXT â FIELD â VALUE)
GeoAddressParserParse
======================
ÎGeoAddressParserState
text? : TEXT
result! : FIELD â VALUE
text? â â
country â dom parser
result! = (parser(country))(text?)
result!("country") = country
=cut | |||||
| 204 | ||||||
| 205 | sub parse | |||||
| 206 | { | |||||
| 207 | 9 | 3151 | my $self = shift; | |||
| 208 | ||||||
| 209 | 9 | 19 | my $params = Params::Validate::Strict::validate_strict({ | |||
| 210 | args => Params::Get::get_params('text', \@_), | |||||
| 211 | schema => { | |||||
| 212 | 'text' => { 'type' => 'string', 'min' => 2 } | |||||
| 213 | } | |||||
| 214 | }); | |||||
| 215 | ||||||
| 216 | 9 | 707 | if(!defined($params)) { | |||
| 217 | 0 | 0 | croak(__PACKAGE__, '::parse: Usage($text => string)'); | |||
| 218 | } | |||||
| 219 | 9 | 14 | my $text = $params->{'text'}; | |||
| 220 | 9 | 13 | if(!defined($text)) { | |||
| 221 | 0 | 0 | croak(__PACKAGE__, '::parse: Usage($text => string)'); | |||
| 222 | } | |||||
| 223 | ||||||
| 224 | 9 | 21 | my $parser = $self->{module}; | |||
| 225 | ||||||
| 226 | # Strip extra whitespace | |||||
| 227 | 9 | 44 | $text =~ s/\s+/ /g; | |||
| 228 | 9 | 12 | $text =~ s/^\s//g; | |||
| 229 | 9 | 12 | $text =~ s/\s$//g; | |||
| 230 | 9 | 13 | $text =~ s/\s,/,/g; | |||
| 231 | ||||||
| 232 | 9 | 20 | if(my $result = $parser->parse_address($text)) { | |||
| 233 | # FIXME: The code addeth and the code taketh away. It shouldn't addeth in the first place | |||||
| 234 | 9 9 | 8 16 | for my $key (keys %{$result}) { | |||
| 235 | 47 | 42 | delete $result->{$key} unless defined $result->{$key}; | |||
| 236 | } | |||||
| 237 | # Add country field to result if not already present | |||||
| 238 | 9 | 32 | $result->{country} //= $self->{country} if $result; | |||
| 239 | ||||||
| 240 | 9 | 39 | $result->{'name'} = capitalize_title($result->{'name'}) if($result->{'name'}); | |||
| 241 | ||||||
| 242 | # Returns a hashref with at least two items: name and country | |||||
| 243 | 9 | 1016 | return Return::Set::set_return($result, { 'type' => 'hashref', 'min' => 2 }); | |||
| 244 | } | |||||
| 245 | } | |||||
| 246 | ||||||
| 247 - 285 | =head1 SUPPORT This module is provided as-is without any warranty. Please report any bugs or feature requests to C<bug-geo-address-parser at rt.cpan.org>, or through the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Geo-Address-Parser>. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes. =head1 SEE ALSO =over 4 =item * L<Test Dashboard|https://nigelhorne.github.io/Geo-Address-Parser/coverage/> =item * L<Object::Configure> =back =head1 LICENCE AND COPYRIGHT Copyright 2025-2026 Nigel Horne. Usage is subject to licence terms. The licence terms of this software are as follows: =over 4 =item * Personal single user, single computer use: GPL2 =item * All other users (including Commercial, Charity, Educational, Government) must apply in writing for a licence for use from Nigel Horne at the above e-mail. =back =cut | |||||
| 286 | ||||||
| 287 | 1; | |||||