File Coverage

File:blib/lib/Geo/Coder/Free/Local.pm
Coverage:66.5%

linestmtbrancondsubpodtimecode
1package Geo::Coder::Free::Local;
2
3
8
8
8
53888
7
83
use strict;
4
8
8
8
11
7
133
use warnings;
5
6
8
8
8
12
3
195
use Carp;
7
8
8
8
901
38419
99
use Geo::Location::Point 0.14;
8
8
8
8
202
8
71
use Geo::Coder::Free;
9
8
8
8
1185
151625
291
use Geo::StreetAddress::US;
10
8
8
8
1645
374342
154
use Lingua::EN::AddressParse;
11
8
8
8
989
4364
102
use Locale::CA;
12
8
8
8
789
937
74
use Locale::US;
13
8
8
8
965
206572
115
use Object::Configure;
14
8
8
8
21
5
106
use Params::Get;
15
8
8
8
1311
65206
262
use Text::xSV::Slurp;
16
17 - 32
=head1 NAME

Geo::Coder::Free::Local -
Provides an interface to locations that you know yourself,
based on locally known data,
thereby giving a way to geocode locations using self-curated data instead of relying on external APIs.
For example, I have found locations by using GPS apps on a smartphone and by
inspecting GeoTagged photographs using
L<https://github.com/nigelhorne/NJH-Snippets/blob/master/bin/geotag>
or by using the app GPSCF which are included here.

=head1 VERSION

Version 0.41

=cut
33
34our $VERSION = '0.41';
35
36
8
8
8
25
7
172
use constant    LIBPOSTAL_UNKNOWN => 0;
37
8
8
8
9
7
103
use constant    LIBPOSTAL_INSTALLED => 1;
38
8
8
8
10
8
16362
use constant    LIBPOSTAL_NOT_INSTALLED => -1;
39our $libpostal_is_installed = LIBPOSTAL_UNKNOWN;
40
41# Alternative mappings for ambiguous or inconsistent place names
42# See also lib/Geo/Coder/Free.pm
43our %alternatives = (
44        'ST LAWRENCE, THANET, KENT' => 'RAMSGATE, KENT',
45        'ST PETERS, THANET, KENT' => 'ST PETERS, KENT',
46        'MINSTER, THANET, KENT' => 'RAMSGATE, KENT',
47        'TYNE AND WEAR' => 'BOROUGH OF NORTH TYNESIDE',
48);
49
50 - 69
=head1 SYNOPSIS

    use Geo::Coder::Free::Local;

    my $geocoder = Geo::Coder::Free::Local->new();
    my $location = $geocoder->geocode(location => 'Ramsgate, Kent, UK');

=head1 DESCRIPTION

Geo::Coder::Free::Local provides an interface to your own location data.

=head1 METHODS

=head2 new

Initializes a geocoder object, loading the local data.

    $geocoder = Geo::Coder::Free::Local->new();

=cut
70
71sub new
72{
73
7
1
101785
        my $class = shift;
74
7
16
        my $params = Params::Get::get_params(undef, \@_) || {};
75
76
7
94
        if(!defined($class)) {
77                # Geo::Coder::Free::Local->new not Geo::Coder::Free::Local::new
78                # carp(__PACKAGE__, ' use ->new() not ::new() to instantiate');
79                # return;
80
81                # FIXME: this only works when no arguments are given
82
1
1
                $class = __PACKAGE__;
83        } elsif(ref($class)) {
84                # clone the given object
85
2
2
2
2
3
8
                return bless { %{$class}, %{$params} }, ref($class);
86        }
87
88
5
11
        $params = Object::Configure::configure($class, $params);
89
90
5
17277
        my @data = <DATA>;
91
92        # TODO: since 'hoh' doesn't allow a CODEREF as a key,
93        #       I could build an hoh manually from this aoh,
94        #       it would make searching much quicker
95        my $self = bless {
96                data => xsv_slurp(
97                        shape => 'aoh',
98                        text_csv => {
99                                allow_loose_quotes => 1,
100                                blank_is_undef => 1,
101                                empty_is_undef => 1,
102                                binary => 1,
103                                escape_char => '\\',
104                        },
105                        string => \join('', grep(!/^\s*(#|$)/, @data))
106                ),
107
5
5
70
3204
                %{$params}
108        }, $class;
109
110        # Process the data to find geographic centres of location clusters.
111        # This will identify groups with 3+ locations in the same city/state/country,
112        #       thus adding towns/cities to the local database
113
5
16
        my $towns = _find_geographic_centres(\@data);
114
5
5
7
4
        foreach my $town (@{$towns}) {
115
12
12
2
15
                push @{$self->{data}}, $town;
116        }
117
118        # Build the hash-based index
119
5
5
6
4
        foreach my $row (@{$self->{data} }) {
120
188
135
                my $key = lc(Geo::Location::Point->new($row)->as_string());
121
188
12382
                $self->{'index'}{$key} = $row;
122        }
123
124        # TODO:  Perhaps the cache can be prepopulated, or stored in a less volatile location?
125        # The cache attribute stores normalized location strings as keys and Geo::Location::Point objects as values
126
5
27
        return $self;
127}
128
129 - 145
=head2 geocode

Performs the geocoding operation by matching an input location against the local data and attempting different strategies for parsing and resolving the address.

Handles parsing of addresses based on location-specific rules, e.g., U.S., U.K., or Canada.
Uses various parsers for country-specific address normalization.

    $location = $geocoder->geocode(location => $location);

    print 'Latitude: ', $location->lat(), "\n";
    print 'Longitude: ', $location->long(), "\n";

    # TODO:
    # @locations = $geocoder->geocode('Portland, USA');
    # diag 'There are Portlands in ', join (', ', map { $_->{'state'} } @locations);

=cut
146
147sub geocode {
148
37
1
10820
        my $self = shift;
149
37
29
        my %params;
150
151        # Try hard to support whatever API the user wants to use
152
37
120
        if(!ref($self)) {
153
0
0
                if(scalar(@_)) {
154
0
0
                        return(__PACKAGE__->new()->geocode(@_));
155                } elsif(!defined($self)) {
156                        # Geo::Coder::Free->geocode()
157
0
0
                        Carp::croak('Usage: ', __PACKAGE__, '::geocode(location => $location)');
158                } elsif($self eq __PACKAGE__) {
159
0
0
                        Carp::croak("Usage: $self", '::geocode(location => $location)');
160                }
161
0
0
                return(__PACKAGE__->new()->geocode($self));
162        } elsif(ref($self) eq 'HASH') {
163
0
0
                return(__PACKAGE__->new()->geocode($self));
164        } elsif(ref($_[0]) eq 'HASH') {
165
16
16
13
26
                %params = %{$_[0]};
166        # } elsif(ref($_[0]) && (ref($_[0] !~ /::/))) {
167        } elsif(ref($_[0])) {
168
0
0
                Carp::croak('Usage: ', __PACKAGE__, '::geocode(location => $location)');
169        } elsif(scalar(@_) && (scalar(@_) % 2 == 0)) {
170
8
14
                %params = @_;
171        } else {
172
13
16
                $params{'location'} = shift;
173        }
174
175        my $location = $params{location}
176
37
51
                or Carp::croak('Usage: geocode(location => $location)');
177
178        # Only used to geolocate full addresses, not states/provinces
179
36
88
        return if($location !~ /,.+,/);
180
181        # ::diag(__PACKAGE__, ': ', __LINE__, ': ', $location);
182
183        # Look for a quick match, we may get lucky
184
36
33
        my $lc = lc($location);
185
36
57
        $lc =~ s/,\susa$/, us/i;
186
187        # Check the cache first
188
36
59
        if(exists $self->{cache}{$lc}) {
189                # ::diag("Found $lc in the cache");
190
16
25
                return $self->{cache}{$lc};
191        }
192
193        # Use the hash-based index for a quick lookup
194
20
34
        if(exists $self->{index}{$lc}) {
195                # Store the result in the cache for future requests
196
5
17
                return $self->{cache}{$lc} = $self->{index}{$lc}; # Geo::Location::Point object
197        }
198        # ::diag("$location: hash search failed");
199
200
15
22
        if(0) {
201                # Old linear search mode, now replaced by the hash-based index
202                foreach my $row(@{$self->{'data'}}) {
203                        my $rc = Geo::Location::Point->new($row);
204                        my $str = lc($rc->as_string());
205
206                        # ::diag("Compare $str->$lc") if(($location =~ /MINSTER CEME/i) && ($str =~ /MINSTER CEME/i));
207                        # ::diag("Compare $str->$lc");
208                        # print "Compare $str->$lc\n";
209                        if($str eq $lc) {
210                                # This looks pointless and I can't recall why I put it in
211                                # foreach my $column ('name', 'state_district') {
212                                        # if((!defined($rc->{$column})) && exists($rc->{$column})) {
213                                                # delete $rc->{$column};
214                                        # }
215                                # }
216                                # ::diag("$location: linear search suceeded");
217                                return $rc;
218                        }
219
220                        if(($str =~ /, us$/) && ("${str}a" eq $lc)) {
221                                return $rc;
222                        }
223
224                        if(($lc =~ /(.+), (England|UK)$/i) && ($str eq "$1, gb")) {
225                                return $rc;
226                        }
227                }
228                # ::diag("$location: linear search failed");
229        }
230
231        # ::diag(__PACKAGE__, ': ', __LINE__, ': ', $location);
232
233
15
13
        my $ap;
234
15
84
        if(($location =~ /USA?$/) || ($location =~ /United States$/)) {
235
1
8
                $ap = $self->{'ap'}->{'us'} // Lingua::EN::AddressParse->new(country => 'US', auto_clean => 1, force_case => 1, force_post_code => 0);
236
1
140135
                $self->{'ap'}->{'us'} = $ap;
237        } elsif($location =~ /(England|Scotland|Wales|Northern Ireland|UK|GB)$/i) {
238
13
29
                $ap = $self->{'ap'}->{'gb'} // Lingua::EN::AddressParse->new(country => 'GB', auto_clean => 1, force_case => 1, force_post_code => 0);
239
13
610224
                $self->{'ap'}->{'gb'} = $ap;
240        } elsif($location =~ /Canada$/) {
241                # TODO: no Canadian addresses yet
242
0
0
                return;
243
0
0
                $ap = $self->{'ap'}->{'ca'} // Lingua::EN::AddressParse->new(country => 'CA', auto_clean => 1, force_case => 1, force_post_code => 0);
244
0
0
                $self->{'ap'}->{'ca'} = $ap;
245        } elsif($location =~ /Australia$/) {
246                # TODO: no Australian addresses yet
247
0
0
                return;
248
0
0
                $ap = $self->{'ap'}->{'au'} // Lingua::EN::AddressParse->new(country => 'AU', auto_clean => 1, force_case => 1, force_post_code => 0);
249
0
0
                $self->{'ap'}->{'au'} = $ap;
250        }
251
15
23
        if($ap) {
252                # ::diag(__PACKAGE__, ': ', __LINE__, ': ', $location);
253
254
14
15
                my $l = $location;
255
14
85
                if($l =~ /(.+), (England|UK)$/i) {
256
11
22
                        $l = "$1, GB";
257                }
258                # if(my $error = $ap->parse($l)) {
259                        # Carp::croak($ap->report());
260                        # ::diag('Address parse failed: ', $ap->report());
261                # } else {
262
14
31
                if($ap->parse($l) == 0) {
263                        # ::diag(__PACKAGE__, ': ', __LINE__, ': ', $location);
264
5
127785
                        my %c = $ap->components();
265                        # ::diag(Data::Dumper->new([\%c])->Dump());
266
5
826
                        my %addr = ('location' => $l);
267
5
9
                        my $street = $c{'street_name'};
268
5
7
                        if(my $type = $c{'street_type'}) {
269
5
14
                                if(my $a = Geo::Coder::Free::_abbreviate($type)) {
270
5
6
                                        $street .= " $a";
271                                } else {
272
0
0
                                        $street .= " $type";
273                                }
274
5
9
                                if(my $suffix = $c{'street_direction_suffix'}) {
275
0
0
                                        $street .= " $suffix";
276                                }
277
5
6
                                $street =~ s/^0+//;     # Turn 04th St into 4th St
278
5
8
                                $addr{'road'} = $street;
279                        }
280
5
6
                        if(length($c{'subcountry'}) == 2) {
281
0
0
                                $addr{'state'} = $c{'subcountry'};
282                        } else {
283
5
18
                                if($c{'country'} =~ /Canada/i) {
284
0
0
                                        $addr{'country'} = 'CA';
285
0
0
                                        if(my $twoletterstate = Locale::CA->new()->{province2code}{uc($c{'subcountry'})}) {
286
0
0
                                                $addr{'state'} = $twoletterstate;
287                                        }
288                                } elsif($c{'country'} =~ /^(United States|USA|US)$/i) {
289
0
0
                                        $addr{'country'} = 'US';
290
0
0
                                        if(my $twoletterstate = Locale::US->new()->{state2code}{uc($c{'subcountry'})}) {
291
0
0
                                                $addr{'state'} = $twoletterstate;
292                                        }
293                                } elsif($c{'country'}) {
294
5
7
                                        $addr{'country'} = $c{'country'};
295
5
5
                                        if($c{'subcountry'}) {
296
5
7
                                                $addr{'state'} = $c{'subcountry'};
297                                        }
298                                }
299                        }
300
5
6
                        $addr{'number'} = $c{'property_identifier'};
301
5
6
                        $addr{'city'} = $c{'suburb'};
302                        # ::diag(Data::Dumper->new([\%addr])->Dump());
303                        # print Data::Dumper->new([\%addr])->Dump(), "\n";
304
5
12
                        if(my $rc = $self->_search(\%addr, ('number', 'road', 'city', 'state', 'country'))) {
305                                # Store the result in the cache for future requests
306
1
38
                                $self->{cache}{$lc} = $rc;
307
308
1
4
                                return $rc;
309                        }
310
4
6
                        if($addr{'number'}) {
311
1
2
                                if(my $rc = $self->_search(\%addr, ('road', 'city', 'state', 'country'))) {
312                                        # Store the result in the cache for future requests
313
0
0
                                        $self->{cache}{$lc} = $rc;
314
315
0
0
                                        return $rc;
316                                }
317                        }
318
319                        # Decide if it's worth continuing to search
320
4
4
                        my $found = 0;
321
4
6
                        if(!defined($addr{'country'})) {
322
0
0
                                if($l =~ /(United States|USA|US)$/i) {
323
0
0
                                        $addr{'country'} = 'US';
324                                } else {
325
0
0
                                        die "TODO: extract country from $l";
326                                }
327                        }
328
4
4
4
3
                        foreach my $row(@{$self->{'data'}}) {
329
8
22
                                if((uc($row->{'state'}) eq uc($addr{'state'})) &&
330                                   (uc($row->{'country'}) eq uc($addr{'country'}))) {
331
4
3
                                        $found = 1;
332
4
3
                                        last;
333                                }
334                        }
335
4
13
                        if(!$found) {
336                                # Nothing at all in this state/country,
337                                #       so let's give up looking
338
0
0
                                return;
339                        }
340                }
341        }
342
343
14
41212
        if($location =~ /^(.+?)[,\s]+(United States|USA|USA?)$/i) {
344                # Try Geo::StreetAddress::US, which is rather buggy
345
346
2
2
                my $l = $1;
347
2
3
                $l =~ tr/,/ /;
348
2
7
                $l =~ s/\s\s+/ /g;
349
350                # ::diag(__PACKAGE__, ': ', __LINE__, ": $location ($l)");
351
352                # Work around for RT#122617
353
2
15
                if(($location !~ /\sCounty,/i) && (my $href = (Geo::StreetAddress::US->parse_location($l) || Geo::StreetAddress::US->parse_address($l)))) {
354                        # ::diag(Data::Dumper->new([$href])->Dump());
355
2
261
                        if(my $state = $href->{'state'}) {
356
2
3
                                if(length($state) > 2) {
357
0
0
                                        if(my $twoletterstate = Locale::US->new()->{state2code}{uc($state)}) {
358
0
0
                                                $state = $twoletterstate;
359                                        }
360                                }
361
2
2
                                my $city;
362
2
4
                                if($href->{city}) {
363
2
6
                                        $city = uc($href->{city});
364                                }
365
2
3
                                if(my $street = $href->{street}) {
366
2
6
                                        if($href->{'type'} && (my $type = Geo::Coder::Free::_abbreviate($href->{'type'}))) {
367
1
1
                                                $street .= " $type";
368                                        }
369
2
4
                                        if($href->{suffix}) {
370
1
2
                                                $street .= ' ' . $href->{suffix};
371                                        }
372
2
3
                                        if(my $prefix = $href->{prefix}) {
373
1
1
                                                $street = "$prefix $street";
374                                        }
375                                        my %addr = (
376
2
6
                                                number => $href->{'number'},
377                                                road => $street,
378                                                city => $city,
379                                                state => $state,
380                                                country => 'US'
381                                        );
382
2
4
                                        if($href->{'number'}) {
383
0
0
                                                if(my $rc = $self->_search(\%addr, ('number', 'road', 'city', 'state', 'country'))) {
384
0
0
                                                        $rc->{'country'} = 'US';
385
386                                                        # Store the result in the cache for future requests
387
0
0
                                                        $self->{cache}{$lc} = $rc;
388
389
0
0
                                                        return $rc;
390                                                }
391                                        }
392
2
6
                                        if(my $rc = $self->_search(\%addr, ('road', 'city', 'state', 'country'))) {
393
1
35
                                                $rc->{'country'} = 'US';
394
395                                                # Store the result in the cache for future requests
396
1
2
                                                $self->{cache}{$lc} = $rc;
397
398
1
3
                                                return $rc;
399                                        }
400                                        # ::diag(__PACKAGE__, ': ', __LINE__, ": $location");
401
1
4
                                        if($street && !$href->{'number'}) {
402                                                # If you give a building with
403                                                # no street to G:S:US it puts
404                                                # the building name into the
405                                                # street field
406
1
1
                                                $addr{'name'} = $street;
407
1
2
                                                delete $addr{'road'};
408
409
1
1
                                                if(my $rc = $self->_search(\%addr, ('name', 'city', 'state', 'country'))) {
410
0
0
                                                        $rc->{'country'} = 'US';
411
412                                                        # Store the result in the cache for future requests
413
0
0
                                                        $self->{cache}{$lc} = $rc;
414
415
0
0
                                                        return $rc;
416                                                }
417                                        }
418                                }
419                        }
420                }
421
422                # Hack to find "name, street, town, state, US"
423
1
4
                my @addr = split(/,\s*/, $location);
424                # ::diag(__PACKAGE__, ': ', __LINE__, ' ', scalar(@addr));
425
1
3
                if(scalar(@addr) == 5) {
426                        # ::diag(__PACKAGE__, ': ', __LINE__, ": $location");
427                        # ::diag(Data::Dumper->new([\@addr])->Dump());
428
1
2
                        my $state = $addr[3];
429
1
2
                        if(length($state) > 2) {
430
1
6
                                if(my $twoletterstate = Locale::US->new()->{state2code}{uc($state)}) {
431
1
289
                                        $state = $twoletterstate;
432                                }
433                        }
434
1
10
                        if(length($state) == 2) {
435
1
2
                                my %addr = (
436                                        city => $addr[2],
437                                        state => $state,
438                                        country => 'US'
439                                );
440                                # ::diag(__PACKAGE__, ': ', __LINE__);
441
1
2
                                if($addr[0] !~ /^\d/) {
442                                        # ::diag(__PACKAGE__, ': ', __LINE__);
443
1
1
                                        $addr{'name'} = $addr[0];
444
1
2
                                        if($addr[1] =~ /^(\d+)\s+(.+)/) {
445                                                # ::diag(__PACKAGE__, ': ', __LINE__);
446
1
2
                                                $addr{'number'} = $1;
447
1
4
                                                $addr{'road'} = Geo::Coder::Free::_normalize($2);
448
1
4
                                                if(my $rc = $self->_search(\%addr, ('name', 'number', 'road', 'city', 'state', 'country'))) {
449                                                        # ::diag(Data::Dumper->new([$rc])->Dump());
450
1
39
                                                        $rc->{'country'} = 'US';
451
452                                                        # Store the result in the cache for future requests
453
1
1
                                                        $self->{cache}{$lc} = $rc;
454
455
1
7
                                                        return $rc;
456                                                }
457                                        } else {
458
0
0
                                                $addr{'road'} = Geo::Coder::Free::_normalize($addr[1]);
459
0
0
                                                if(my $rc = $self->_search(\%addr, ('name', 'road', 'city', 'state', 'country'))) {
460                                                        # ::diag(Data::Dumper->new([$rc])->Dump());
461
0
0
                                                        $rc->{'country'} = 'US';
462
463                                                        # Store the result in the cache for future requests
464
0
0
                                                        $self->{cache}{$lc} = $rc;
465
466
0
0
                                                        return $rc;
467                                                }
468                                        }
469                                } else {
470
0
0
                                        $addr{'number'} = $addr[0];
471
0
0
                                        $addr{'road'} = Geo::Coder::Free::_normalize($addr[1]);
472
0
0
                                        if(my $rc = $self->_search(\%addr, ('number', 'road', 'city', 'state', 'country'))) {
473                                                # ::diag(Data::Dumper->new([$rc])->Dump());
474
0
0
                                                $rc->{'country'} = 'US';
475
476                                                # Store the result in the cache for future requests
477
0
0
                                                $self->{cache}{$lc} = $rc;
478
479
0
0
                                                return $rc;
480                                        }
481                                }
482                        }
483                }
484        }
485
486
12
45
        if(($location =~ /.+,.+,.*England$/) &&
487           ($location !~ /.+,.+,.+,.*England$/)) {
488                # Simple "Town, County, England"
489                # If we're here, it's not going to be found because the
490                # above parsers will have worked
491
1
4
                return;
492        }
493
494
11
266
        require Geo::Address::Parser && Geo::Address::Parser->import() unless Geo::Address::Parser->can('parse');
495
496
11
1597
        my $addr_parser = Geo::Address::Parser->new(country => 'UK');
497
11
37215
        if(my $fields = $addr_parser->parse($location)) {
498
11
11
3140
14
                for my $key (keys %{$fields}) {
499
36
35
                        delete $fields->{$key} unless defined $fields->{$key};
500                }
501
11
11
14
22
                if(my $rc = $self->_search($fields, keys %{$fields})) {
502
0
0
                        $rc->{'country'} = 'UK';
503
504                        # Store the result in the cache for future requests
505
0
0
                        $self->{cache}{$lc} = $rc;
506
507
0
0
                        return $rc;
508                }
509        }
510
511        # Finally try libpostal,
512        # which is good but uses a lot of memory and can take a very long time to parse data
513
11
21
        if($libpostal_is_installed == LIBPOSTAL_UNKNOWN) {
514
1
1
1
32
                if(eval { require Geo::libpostal; } ) {
515
0
0
                        Geo::libpostal->import();
516
0
0
                        $libpostal_is_installed = LIBPOSTAL_INSTALLED;
517                } else {
518
1
304
                        $libpostal_is_installed = LIBPOSTAL_NOT_INSTALLED;
519                }
520        }
521
522        # ::diag(__PACKAGE__, ': ', __LINE__, ": libpostal_is_installed = $libpostal_is_installed ($location)");
523        # print(__PACKAGE__, ': ', __LINE__, ": libpostal_is_installed = $libpostal_is_installed ($location)\n");
524
525        # TODO: cache calls to this
526
11
22
        if(($libpostal_is_installed == LIBPOSTAL_INSTALLED) && (my %addr = Geo::libpostal::parse_address($location))) {
527
0
0
                if($addr{'house_number'} && !$addr{'number'}) {
528
0
0
                        $addr{'number'} = delete $addr{'house_number'};
529                }
530
0
0
                if($addr{'house'} && !$addr{'name'}) {
531
0
0
                        $addr{'name'} = delete $addr{'house'};
532                }
533
0
0
                $addr{'location'} = $location;
534
0
0
                if(my $street = $addr{'road'}) {
535
0
0
                        $addr{'road'} = Geo::Coder::Free::_normalize($street);
536                }
537
0
0
                if(defined($addr{'state'}) && !defined($addr{'country'}) && ($addr{'state'} eq 'england')) {
538
0
0
                        delete $addr{'state'};
539
0
0
                        $addr{'country'} = 'GB';
540                }
541                # ::diag(__PACKAGE__, ': ', __LINE__, ': ', Data::Dumper->new([\%addr])->Dump());
542
0
0
                if($addr{'country'} && ($addr{'state'} || $addr{'state_district'})) {
543
0
0
                        if($addr{'country'} =~ /Canada/i) {
544
0
0
                                $addr{'country'} = 'Canada';
545
0
0
                                if(length($addr{'state'}) > 2) {
546
0
0
                                        if(my $twoletterstate = Locale::CA->new()->{province2code}{uc($addr{'state'})}) {
547
0
0
                                                $addr{'state'} = $twoletterstate;
548                                        }
549                                }
550                        } elsif($addr{'country'} =~ /^(United States|USA|US)$/i) {
551
0
0
                                $addr{'country'} = 'US';
552
0
0
                                if(length($addr{'state'}) > 2) {
553
0
0
                                        if(my $twoletterstate = Locale::US->new()->{state2code}{uc($addr{'state'})}) {
554
0
0
                                                $addr{'state'} = $twoletterstate;
555                                        }
556                                }
557                        }
558
0
0
                        if($addr{'state_district'}) {
559
0
0
                                $addr{'state_district'} =~ s/^(.+)\s+COUNTY/$1/i;
560
0
0
                                if(my $rc = $self->_search(\%addr, ('number', 'road', 'city', 'state_district', 'state', 'country'))) {
561
562                                        # Store the result in the cache for future requests
563
0
0
                                        $self->{cache}{$lc} = $rc;
564
565
0
0
                                        return $rc;
566                                }
567                        }
568
0
0
                        if(my $rc = $self->_search(\%addr, ('number', 'road', 'city', 'state', 'country'))) {
569                                # ::diag(__PACKAGE__, ': ', __LINE__, ': ', Data::Dumper->new([$rc])->Dump());
570
571                                # Store the result in the cache for future requests
572
0
0
                                $self->{cache}{$lc} = $rc;
573
574
0
0
                                return $rc;
575                        }
576
0
0
                        if($addr{'number'}) {
577
0
0
                                if(my $rc = $self->_search(\%addr, ('road', 'city', 'state', 'country'))) {
578
579                                        # Store the result in the cache for future requests
580
0
0
                                        $self->{cache}{$lc} = $rc;
581
582
0
0
                                        return $rc;
583                                }
584                        }
585                }
586        }
587
11
64
        if($location =~ /^(.+?),\s*([\s\w]+),\s*([\s\w]+),\s*([\w\s]+)$/) {
588                # >= 5.14 could say:
589                # my %addr = (
590                #       road => $1,
591                #       city => $2,
592                #       state => $3 =~ s/\s+$//r,
593                #       country => $4 =~ s/\s+$//r
594                # );
595
11
29
                my %addr = (
596                        road => $1,
597                        city => $2,
598                        state => $3,
599                        country => $4,
600                );
601
11
20
                $addr{'state'} =~ s/\s$//g;
602
11
16
                $addr{'country'} =~ s/\s$//g;
603
11
39
                if($addr{'road'} =~ /([\w\s]+),*\s+(.+)/) {
604
11
14
                        $addr{'name'} = $1;
605
11
11
                        $addr{'road'} = $2;
606                }
607
11
22
                if($addr{'road'} =~ /^(\d+)\s+(.+)/) {
608
2
3
                        $addr{'number'} = $1;
609
2
2
                        $addr{'road'} = $2;
610                        # ::diag(__LINE__, ': ', Data::Dumper->new([\%addr])->Dump());
611
2
3
                        if(my $rc = $self->_search(\%addr, ('name', 'number', 'road', 'city', 'state', 'country'))) {
612
613                                # Store the result in the cache for future requests
614
0
0
                                $self->{cache}{$lc} = $rc;
615
616
0
0
                                return $rc;
617                        }
618                } elsif(my $rc = $self->_search(\%addr, ('name', 'road', 'city', 'state', 'country'))) {
619
620                        # Store the result in the cache for future requests
621
1
31
                        $self->{cache}{$lc} = $rc;
622
623
1
3
                        return $rc;
624                }
625
10
32
                if($addr{'name'} && !defined($addr{'number'})) {
626                        # We know the name of the building but not the street number
627                        # ::diag(__LINE__, ': ', $addr{'name'});
628
8
12
                        if(my $rc = $self->_search(\%addr, ('name', 'road', 'city', 'state', 'country'))) {
629                                # ::diag(__PACKAGE__, ': ', __LINE__);
630
631                                # Store the result in the cache for future requests
632
0
0
                                $self->{cache}{$lc} = $rc;
633
634
0
0
                                return $rc;
635                        }
636                }
637        }
638
639
10
19
        $location = uc($location);
640
10
18
        foreach my $left(keys %alternatives) {
641                # ::diag("$location/$left");
642
36
153
                if($location =~ $left) {
643                        # ::diag($left, '=>', $alternatives{$left});
644
5
23
                        $location =~ s/$left/$alternatives{$left}/;
645
5
6
                        $params{'location'} = $location;
646                        # ::diag(__LINE__, ": found alternative '$location'");
647
5
20
                        if(my $rc = $self->geocode(\%params)) {
648                                # ::diag(__LINE__, ": $location");
649
650                                # Store the result in the cache for future requests
651
1
3
                                $self->{cache}{$lc} = $rc;
652
653
1
6
                                return $rc;
654                        }
655
4
43
                        if($location =~ /(.+), (England|UK)$/i) {
656
4
8
                                $params{'location'} = "$1, GB";
657
4
6
                                if(my $rc = $self->geocode(\%params)) {
658                                        # ::diag(__LINE__, ": $location");
659
660                                        # Store the result in the cache for future requests
661
3
15
                                        $self->{cache}{$lc} = $rc;
662
663
3
9
                                        return $rc;
664                                }
665                        }
666                }
667        }
668
6
19
        return;
669}
670
671# Match parsed address components against the locally loaded dataset.
672
673# $data is a hashref to data such as returned by Geo::libpostal::parse_address
674# @columns is the key names to use in $data
675sub _search
676{
677
40
56
        my ($self, $data, @columns) = @_;
678
679        # FIXME: linear search is slow
680        # ::diag(__LINE__, ': ', Data::Dumper->new([\@columns, $data])->Dump());
681        # print Data::Dumper->new([\@columns, $data])->Dump();
682        # my @call_details = caller(0);
683        # ::diag(__LINE__, ': called from ', $call_details[2]);
684
40
40
27
39
        foreach my $row(@{$self->{'data'}}) {
685
3434
1551
                my $match = 1;
686
3434
1533
                my $number_of_columns_matched;
687
688                # ::diag(Data::Dumper->new([$self->{data}])->Dump());
689                # print Data::Dumper->new([$self->{data}])->Dump();
690
691
3434
1611
                foreach my $column(@columns) {
692
3486
1971
                        if(defined($data->{$column})) {
693
3486
2481
                                if(!defined($row->{$column})) {
694
1195
544
                                        $match = 0;
695
1195
569
                                        last;
696                                }
697                                # ::diag("$column: ", $row->{$column}, '/', $data->{$column});
698                                # print "$column: ", $row->{$column}, '/', $data->{$column}, "\n";
699
2291
1744
                                if(uc($row->{$column}) ne uc($data->{$column})) {
700
2235
1031
                                        $match = 0;
701
2235
1033
                                        last;
702                                }
703
56
35
                                $number_of_columns_matched++;
704                        } elsif(exists $data->{$column}) {
705
0
0
                                delete $data->{$column};
706                        }
707                }
708                # ::diag("match: $match");
709
3434
2330
                if($match && ($number_of_columns_matched >= 3)) {
710
4
16
                        my $confidence;
711
4
7
                        if($number_of_columns_matched == scalar(@columns)) {
712
4
5
                                $confidence = 1.0;
713                        } elsif($number_of_columns_matched >= 4) {
714
0
0
                                $confidence = 0.7;
715                        } else {
716
0
0
                                $confidence = 0.5;
717                        }
718                        # ::diag("$number_of_columns_matched -> $confidence");
719                        return Geo::Location::Point->new(
720                                # 'latitude' => $row->{'latitude'},
721                                # 'longitude' => $row->{'longitude'},
722                                'location' => $data->{'location'},
723                                'confidence' => $confidence,
724                                'database' => __PACKAGE__,
725
4
4
6
25
                                %{$row}
726                        );
727                }
728        }
729
36
67
        return;
730}
731
732 - 736
=head2  reverse_geocode

    $location = $geocoder->reverse_geocode(latlng => '37.778907,-122.39732');

=cut
737
738sub reverse_geocode {
739
37
1
9149
        my $self = shift;
740
37
16
        my %params;
741
742        # Try hard to support whatever API that the user wants to use
743
37
93
        if(!ref($self)) {
744
0
0
                if(scalar(@_)) {
745
0
0
                        return(__PACKAGE__->new()->reverse_geocode(@_));
746                } elsif(!defined($self)) {
747                        # Geo::Coder::Free->reverse_geocode()
748
0
0
                        Carp::croak('Usage: ', __PACKAGE__, '::reverse_geocode(latlng => "$lat,$long")');
749                } elsif($self eq __PACKAGE__) {
750
0
0
                        Carp::croak("Usage: $self", '::reverse_geocode(latlng => "$lat,$long")');
751                }
752
0
0
                return(__PACKAGE__->new()->reverse_geocode($self));
753        } elsif(ref($self) eq 'HASH') {
754
0
0
                return(__PACKAGE__->new()->reverse_geocode($self));
755        } elsif(ref($_[0]) eq 'HASH') {
756
14
14
8
18
                %params = %{$_[0]};
757        # } elsif(ref($_[0]) && (ref($_[0] !~ /::/))) {
758        } elsif(ref($_[0])) {
759
0
0
                Carp::croak('Usage: ', __PACKAGE__, '::reverse_geocode(latlng => "$lat,$long")');
760        } elsif(scalar(@_) && (scalar(@_) % 2 == 0)) {
761
14
21
                %params = @_;
762        } else {
763
9
9
                $params{'latlng'} = shift;
764        }
765
766
37
27
        my $latlng = $params{'latlng'};
767
768
37
26
        my $latitude;
769        my $longitude;
770
771
37
31
        if($latlng) {
772
22
25
                ($latitude, $longitude) = split(/,/, $latlng);
773        } else {
774
15
30
                $latitude //= $params{'lat'};
775
15
29
                $longitude //= $params{'lon'};
776
15
24
                $longitude //= $params{'long'};
777        }
778
779
37
57
        if((!defined($latitude)) || !defined($longitude)) {
780
1
2
                Carp::croak('Usage: ', __PACKAGE__, '::reverse_geocode(latlng => "$lat,$long")');
781        }
782
783        # ::diag(__LINE__, ": $latitude,$longitude");
784
36
18
        my @rc;
785
36
36
19
29
        foreach my $row(@{$self->{'data'}}) {
786
3310
3415
                if(defined($row->{'latitude'}) && defined($row->{'longitude'})) {
787                        # ::diag(__LINE__, ': ', $row->{'latitude'}, ', ', $latitude);
788
3310
1889
                        if(_equal($row->{'latitude'}, $latitude, 4) &&
789                           _equal($row->{'longitude'}, $longitude, 4)) {
790                                # ::diag('match');
791
71
72
                                my $location = uc($row->as_string());        # Geo::Location::Point object
792
71
176
                                if(wantarray) {
793
70
45
                                        push @rc, $location;
794
70
68
                                        while(my($left, $right) = each %alternatives) {
795                                                # ::diag("$location/$left");
796
280
759
                                                if($location =~ $right) {
797                                                        # ::diag($right, '=>', $left);
798
100
49
                                                        my $l = $location;
799
100
129
                                                        $l =~ s/$right/$left/;
800                                                        # ::diag(__LINE__, ": $location");
801
100
136
                                                        push @rc, $l;
802                                                        # Don't add last here
803                                                }
804                                        }
805                                } else {
806
1
5
                                        return $location;
807                                }
808                        }
809                }
810        }
811
35
69
        return @rc;
812}
813
814# https://www.oreilly.com/library/view/perl-cookbook/1565922433/ch02s03.html
815# equal(NUM1, NUM2, ACCURACY) : returns true if NUM1 and NUM2 are
816# equal to ACCURACY number of decimal places
817sub _equal {
818
3461
2091
        my ($A, $B, $dp) = @_;
819
820
3461
6300
        return sprintf("%.${dp}g", $A) eq sprintf("%.${dp}g", $B);
821}
822
823 - 827
=head2  ua

Does nothing, here for compatibility with other geocoders

=cut
828
829
1
sub ua {
830}
831
832# find_geographic_centres($csv_data)
833#
834# Helper function that processes CSV geographic data to find centres of location clusters.
835# Takes a string containing CSV data with headers and analyzes it to find groups of
836# 3 or more locations in the same city/state/country combination. For each qualifying
837# group, it calculates the geographic centre and prints the results.
838#
839# Parameters:
840#   $csv_data - String containing complete CSV data including header row
841#
842# Processing steps:
843#   1. Parses CSV header to get field names
844#   2. Parses each data row into location hash objects
845#   3. Validates that coordinates are numeric
846#   4. Groups locations by city/state/country key
847#   5. For groups with 3+ locations, calculates and prints centre coordinates
848#
849sub _find_geographic_centres
850{
851
5
5
        my $csv_data = $_[0];
852
853        # Parse CSV data into an array of hashes
854        # my @lines = split /\n/, $csv_data;
855
5
5
4
20
        my @lines = @{$csv_data};
856
857
5
27
        return if(scalar(@lines) == 0);
858
859
2
2
        my $header = shift @lines;
860
861        # Remove quotes from header and split
862
2
23
        $header =~ s/"//g;
863
2
3
        chomp $header;
864
2
10
        my @fields = split /,/, $header;
865
866
2
2
        my @locations = ();
867
868        # Parse each data line
869
2
3
        foreach my $line (@lines) {
870
176
126
                next if $line =~ /^\s*$/;       # Skip empty lines
871
176
106
                chomp $line;
872
873                # Simple CSV parsing - handles quoted fields
874
176
71
                my @values = ();
875
176
87
                my $current_field = '';
876
176
77
                my $in_quotes = 0;
877
878
176
463
                for my $char (split //, $line) {
879
15084
10268
                        if ($char eq '"') {
880
2020
1023
                                $in_quotes = !$in_quotes;
881                        } elsif ($char eq ',' && !$in_quotes) {
882
1410
714
                                push @values, $current_field;
883
1410
710
                                $current_field = '';
884                        } else {
885
11654
5692
                                $current_field .= $char;
886                        }
887                }
888
176
356
                push @values, $current_field;   # Don't forget the last field
889
890                # Create location hash
891
176
67
                my %location = ();
892
176
125
                for my $i (0..$#fields) {
893
1584
1231
                        $location{$fields[$i]} = $values[$i] || '';
894                }
895
896                # Only include locations with valid coordinates
897
176
442
                if($location{latitude} && $location{longitude} &&
898                   ($location{latitude} =~ /^-?\d+\.?\d*$/) &&
899                   ($location{longitude} =~ /^-?\d+\.?\d*$/)) {
900
172
184
                        push @locations, \%location;
901                }
902        }
903
904        # Group locations by city, state, country
905
2
2
        my %groups = ();
906
907
2
2
        foreach my $loc (@locations) {
908
172
107
                my $key = join('|', $loc->{city}, $loc->{state}, $loc->{country});
909
172
172
69
137
                push @{$groups{$key}}, $loc;
910        }
911
912
2
2
        my $rc;
913
914        # Process groups with 3 or more locations
915
2
7
        foreach my $group_key (keys %groups) {
916
84
35
                my $locations_ref = $groups{$group_key};
917
918
84
54
                if (@$locations_ref >= 3) {
919
12
14
                        my ($city, $state, $country) = split /\|/, $group_key;
920
921                        # Calculate geographic centre
922
12
8
                        my ($centre_lat, $centre_lon) = _calculate_centre($locations_ref);
923
924                        # printf("Center of %d locations in %s, %s, %s: %.6f, %.6f\n",
925                                 # scalar(@$locations_ref), $city, $state, $country,
926                                 # $centre_lat, $centre_lon);
927
928
12
12
6
26
                        push @{$rc}, {
929                                'city' => $city,
930                                'state' => $state,
931                                'country' => $country,
932                                'lat' => $centre_lat,
933                                'latitude' => $centre_lat,
934                                'longitude' => $centre_lon,
935                                'long' => $centre_lon,
936                                'lng' => $centre_lon
937                        };
938                }
939        }
940
941
2
65
        return $rc;
942}
943
944# _calculate_centre($locations_ref)
945#
946# Helper funcation that calculates the geographic centre (centroid) of a group of locations using
947# the arithmetic mean method to 6 decimal places. This works well for small geographic areas but
948# may be less accurate for locations spread over large distances due to
949# Earth's curvature.
950#
951# Parameters:
952#   $locations_ref - Reference to array of location hash objects, each containing
953#                   latitude and longitude fields
954#
955# Returns:
956#   ($centre_lat, $centre_lon) - Two-element list containing the calculated
957#                               centre coordinates as decimal degrees
958#
959# Algorithm:
960#   - Sums all latitude values and divides by count
961#   - Sums all longitude values and divides by count
962#   - Returns the arithmetic mean of both coordinates
963sub _calculate_centre
964{
965
12
7
        my $locations_ref = $_[0];
966
967
12
7
        my $total_lat = 0;
968
12
5
        my $total_lon = 0;
969
12
5
        my $count = 0;
970
971
12
9
        foreach my $loc (@$locations_ref) {
972
90
78
                $total_lat += $loc->{latitude};
973
90
64
                $total_lon += $loc->{longitude};
974
90
41
                $count++;
975        }
976
977        # Round to 6 decimal places
978
12
28
        my $centre_lat = sprintf('%.6f', $total_lat / $count);
979
12
13
        my $centre_lon = sprintf('%.6f', $total_lon / $count);
980
981
12
12
        return ($centre_lat, $centre_lon);
982}
983
984 - 1006
=head1 AUTHOR

Nigel Horne <njh@bandsman.co.uk>

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=head1 BUGS

The data are stored in the source,
they should be read in from somewhere else to make it easier for non-authors to add data.

=head1 SEE ALSO

=head1 LICENSE AND COPYRIGHT

Copyright 2020-2024 Nigel Horne.

The program code is released under the following licence: GPL2 for personal use on a single computer.
All other users (including Commercial, Charity, Educational, and Government)
must apply in writing for a licence for use from Nigel Horne at `<njh at nigelhorne.com>`.

=cut
1007
10081;
1009
1010# Ensure you use abbreviations, e.g., RD not ROAD