File Coverage

File:blib/lib/Geo/Coder/Free/Local.pm
Coverage:66.5%

linestmtbrancondsubpodtimecode
1package Geo::Coder::Free::Local;
2
3
8
8
8
65591
9
116
use strict;
4
8
8
8
11
8
133
use warnings;
5
6
8
8
8
12
4
261
use Carp;
7
8
8
8
1320
46076
109
use Geo::Location::Point 0.14;
8
8
8
8
235
8
75
use Geo::Coder::Free;
9
8
8
8
1431
172428
255
use Geo::StreetAddress::US;
10
8
8
8
1972
411774
163
use Lingua::EN::AddressParse;
11
8
8
8
1203
5352
110
use Locale::CA;
12
8
8
8
1103
1116
108
use Locale::US;
13
8
8
8
1402
239143
142
use Object::Configure;
14
8
8
8
23
7
119
use Params::Get;
15
8
8
8
1647
74683
298
use Text::xSV::Slurp;
16
17 - 32
=head1 NAME

Geo::Coder::Free::Local -
Provides an interface to locations that you know yourself,
based on locally known data,
thereby giving a way to geocode locations using self-curated data instead of relying on external APIs.
For example, I have found locations by using GPS apps on a smartphone and by
inspecting GeoTagged photographs using
L<https://github.com/nigelhorne/NJH-Snippets/blob/master/bin/geotag>
or by using the app GPSCF which are included here.

=head1 VERSION

Version 0.41

=cut
33
34our $VERSION = '0.41';
35
36
8
8
8
23
6
188
use constant    LIBPOSTAL_UNKNOWN => 0;
37
8
8
8
15
7
130
use constant    LIBPOSTAL_INSTALLED => 1;
38
8
8
8
15
5
18369
use constant    LIBPOSTAL_NOT_INSTALLED => -1;
39our $libpostal_is_installed = LIBPOSTAL_UNKNOWN;
40
41# Alternative mappings for ambiguous or inconsistent place names
42# See also lib/Geo/Coder/Free.pm
43our %alternatives = (
44        'ST LAWRENCE, THANET, KENT' => 'RAMSGATE, KENT',
45        'ST PETERS, THANET, KENT' => 'ST PETERS, KENT',
46        'MINSTER, THANET, KENT' => 'RAMSGATE, KENT',
47        'TYNE AND WEAR' => 'BOROUGH OF NORTH TYNESIDE',
48);
49
50 - 69
=head1 SYNOPSIS

    use Geo::Coder::Free::Local;

    my $geocoder = Geo::Coder::Free::Local->new();
    my $location = $geocoder->geocode(location => 'Ramsgate, Kent, UK');

=head1 DESCRIPTION

Geo::Coder::Free::Local provides an interface to your own location data.

=head1 METHODS

=head2 new

Initializes a geocoder object, loading the local data.

    $geocoder = Geo::Coder::Free::Local->new();

=cut
70
71sub new
72{
73
7
1
116300
        my $class = shift;
74
7
16
        my $params = Params::Get::get_params(undef, \@_) || {};
75
76
7
82
        if(!defined($class)) {
77                # Geo::Coder::Free::Local->new not Geo::Coder::Free::Local::new
78                # carp(__PACKAGE__, ' use ->new() not ::new() to instantiate');
79                # return;
80
81                # FIXME: this only works when no arguments are given
82
1
1
                $class = __PACKAGE__;
83        } elsif(ref($class)) {
84                # clone the given object
85
2
2
2
2
2
9
                return bless { %{$class}, %{$params} }, ref($class);
86        }
87
88
5
11
        $params = Object::Configure::configure($class, $params);
89
90
5
19224
        my @data = <DATA>;
91
92        # TODO: since 'hoh' doesn't allow a CODEREF as a key,
93        #       I could build an hoh manually from this aoh,
94        #       it would make searching much quicker
95        my $self = bless {
96                data => xsv_slurp(
97                        shape => 'aoh',
98                        text_csv => {
99                                allow_loose_quotes => 1,
100                                blank_is_undef => 1,
101                                empty_is_undef => 1,
102                                binary => 1,
103                                escape_char => '\\',
104                        },
105                        string => \join('', grep(!/^\s*(#|$)/, @data))
106                ),
107
5
5
67
3352
                %{$params}
108        }, $class;
109
110        # Process the data to find geographic centres of location clusters.
111        # This will identify groups with 3+ locations in the same city/state/country,
112        #       thus adding towns/cities to the local database
113
5
15
        my $towns = _find_geographic_centres(\@data);
114
5
5
5
5
        foreach my $town (@{$towns}) {
115
12
12
7
12
                push @{$self->{data}}, $town;
116        }
117
118        # Build the hash-based index
119
5
5
0
7
        foreach my $row (@{$self->{data} }) {
120
188
147
                my $key = lc(Geo::Location::Point->new($row)->as_string());
121
188
12993
                $self->{'index'}{$key} = $row;
122        }
123
124        # TODO:  Perhaps the cache can be prepopulated, or stored in a less volatile location?
125        # The cache attribute stores normalized location strings as keys and Geo::Location::Point objects as values
126
5
27
        return $self;
127}
128
129 - 145
=head2 geocode

Performs the geocoding operation by matching an input location against the local data and attempting different strategies for parsing and resolving the address.

Handles parsing of addresses based on location-specific rules, e.g., U.S., U.K., or Canada.
Uses various parsers for country-specific address normalization.

    $location = $geocoder->geocode(location => $location);

    print 'Latitude: ', $location->lat(), "\n";
    print 'Longitude: ', $location->long(), "\n";

    # TODO:
    # @locations = $geocoder->geocode('Portland, USA');
    # diag 'There are Portlands in ', join (', ', map { $_->{'state'} } @locations);

=cut
146
147sub geocode {
148
37
1
11804
        my $self = shift;
149
37
36
        my %params;
150
151        # Try hard to support whatever API the user wants to use
152
37
143
        if(!ref($self)) {
153
0
0
                if(scalar(@_)) {
154
0
0
                        return(__PACKAGE__->new()->geocode(@_));
155                } elsif(!defined($self)) {
156                        # Geo::Coder::Free->geocode()
157
0
0
                        Carp::croak('Usage: ', __PACKAGE__, '::geocode(location => $location)');
158                } elsif($self eq __PACKAGE__) {
159
0
0
                        Carp::croak("Usage: $self", '::geocode(location => $location)');
160                }
161
0
0
                return(__PACKAGE__->new()->geocode($self));
162        } elsif(ref($self) eq 'HASH') {
163
0
0
                return(__PACKAGE__->new()->geocode($self));
164        } elsif(ref($_[0]) eq 'HASH') {
165
16
16
16
23
                %params = %{$_[0]};
166        # } elsif(ref($_[0]) && (ref($_[0] !~ /::/))) {
167        } elsif(ref($_[0])) {
168
0
0
                Carp::croak('Usage: ', __PACKAGE__, '::geocode(location => $location)');
169        } elsif(scalar(@_) && (scalar(@_) % 2 == 0)) {
170
8
11
                %params = @_;
171        } else {
172
13
14
                $params{'location'} = shift;
173        }
174
175        my $location = $params{location}
176
37
50
                or Carp::croak('Usage: geocode(location => $location)');
177
178        # Only used to geolocate full addresses, not states/provinces
179
36
94
        return if($location !~ /,.+,/);
180
181        # ::diag(__PACKAGE__, ': ', __LINE__, ': ', $location);
182
183        # Look for a quick match, we may get lucky
184
36
40
        my $lc = lc($location);
185
36
54
        $lc =~ s/,\susa$/, us/i;
186
187        # Check the cache first
188
36
56
        if(exists $self->{cache}{$lc}) {
189                # ::diag("Found $lc in the cache");
190
16
24
                return $self->{cache}{$lc};
191        }
192
193        # Use the hash-based index for a quick lookup
194
20
33
        if(exists $self->{index}{$lc}) {
195                # Store the result in the cache for future requests
196
5
18
                return $self->{cache}{$lc} = $self->{index}{$lc}; # Geo::Location::Point object
197        }
198        # ::diag("$location: hash search failed");
199
200
15
14
        if(0) {
201                # Old linear search mode, now replaced by the hash-based index
202                foreach my $row(@{$self->{'data'}}) {
203                        my $rc = Geo::Location::Point->new($row);
204                        my $str = lc($rc->as_string());
205
206                        # ::diag("Compare $str->$lc") if(($location =~ /MINSTER CEME/i) && ($str =~ /MINSTER CEME/i));
207                        # ::diag("Compare $str->$lc");
208                        # print "Compare $str->$lc\n";
209                        if($str eq $lc) {
210                                # This looks pointless and I can't recall why I put it in
211                                # foreach my $column ('name', 'state_district') {
212                                        # if((!defined($rc->{$column})) && exists($rc->{$column})) {
213                                                # delete $rc->{$column};
214                                        # }
215                                # }
216                                # ::diag("$location: linear search suceeded");
217                                return $rc;
218                        }
219
220                        if(($str =~ /, us$/) && ("${str}a" eq $lc)) {
221                                return $rc;
222                        }
223
224                        if(($lc =~ /(.+), (England|UK)$/i) && ($str eq "$1, gb")) {
225                                return $rc;
226                        }
227                }
228                # ::diag("$location: linear search failed");
229        }
230
231        # ::diag(__PACKAGE__, ': ', __LINE__, ': ', $location);
232
233
15
14
        my $ap;
234
15
100
        if(($location =~ /USA?$/) || ($location =~ /United States$/)) {
235
1
9
                $ap = $self->{'ap'}->{'us'} // Lingua::EN::AddressParse->new(country => 'US', auto_clean => 1, force_case => 1, force_post_code => 0);
236
1
159667
                $self->{'ap'}->{'us'} = $ap;
237        } elsif($location =~ /(England|Scotland|Wales|Northern Ireland|UK|GB)$/i) {
238
13
36
                $ap = $self->{'ap'}->{'gb'} // Lingua::EN::AddressParse->new(country => 'GB', auto_clean => 1, force_case => 1, force_post_code => 0);
239
13
697197
                $self->{'ap'}->{'gb'} = $ap;
240        } elsif($location =~ /Canada$/) {
241                # TODO: no Canadian addresses yet
242
0
0
                return;
243
0
0
                $ap = $self->{'ap'}->{'ca'} // Lingua::EN::AddressParse->new(country => 'CA', auto_clean => 1, force_case => 1, force_post_code => 0);
244
0
0
                $self->{'ap'}->{'ca'} = $ap;
245        } elsif($location =~ /Australia$/) {
246                # TODO: no Australian addresses yet
247
0
0
                return;
248
0
0
                $ap = $self->{'ap'}->{'au'} // Lingua::EN::AddressParse->new(country => 'AU', auto_clean => 1, force_case => 1, force_post_code => 0);
249
0
0
                $self->{'ap'}->{'au'} = $ap;
250        }
251
15
25
        if($ap) {
252                # ::diag(__PACKAGE__, ': ', __LINE__, ': ', $location);
253
254
14
16
                my $l = $location;
255
14
93
                if($l =~ /(.+), (England|UK)$/i) {
256
11
20
                        $l = "$1, GB";
257                }
258                # if(my $error = $ap->parse($l)) {
259                        # Carp::croak($ap->report());
260                        # ::diag('Address parse failed: ', $ap->report());
261                # } else {
262
14
37
                if($ap->parse($l) == 0) {
263                        # ::diag(__PACKAGE__, ': ', __LINE__, ': ', $location);
264
5
150381
                        my %c = $ap->components();
265                        # ::diag(Data::Dumper->new([\%c])->Dump());
266
5
887
                        my %addr = ('location' => $l);
267
5
8
                        my $street = $c{'street_name'};
268
5
11
                        if(my $type = $c{'street_type'}) {
269
5
18
                                if(my $a = Geo::Coder::Free::_abbreviate($type)) {
270
5
8
                                        $street .= " $a";
271                                } else {
272
0
0
                                        $street .= " $type";
273                                }
274
5
11
                                if(my $suffix = $c{'street_direction_suffix'}) {
275
0
0
                                        $street .= " $suffix";
276                                }
277
5
8
                                $street =~ s/^0+//;     # Turn 04th St into 4th St
278
5
6
                                $addr{'road'} = $street;
279                        }
280
5
10
                        if(length($c{'subcountry'}) == 2) {
281
0
0
                                $addr{'state'} = $c{'subcountry'};
282                        } else {
283
5
23
                                if($c{'country'} =~ /Canada/i) {
284
0
0
                                        $addr{'country'} = 'CA';
285
0
0
                                        if(my $twoletterstate = Locale::CA->new()->{province2code}{uc($c{'subcountry'})}) {
286
0
0
                                                $addr{'state'} = $twoletterstate;
287                                        }
288                                } elsif($c{'country'} =~ /^(United States|USA|US)$/i) {
289
0
0
                                        $addr{'country'} = 'US';
290
0
0
                                        if(my $twoletterstate = Locale::US->new()->{state2code}{uc($c{'subcountry'})}) {
291
0
0
                                                $addr{'state'} = $twoletterstate;
292                                        }
293                                } elsif($c{'country'}) {
294
5
5
                                        $addr{'country'} = $c{'country'};
295
5
6
                                        if($c{'subcountry'}) {
296
5
7
                                                $addr{'state'} = $c{'subcountry'};
297                                        }
298                                }
299                        }
300
5
7
                        $addr{'number'} = $c{'property_identifier'};
301
5
7
                        $addr{'city'} = $c{'suburb'};
302                        # ::diag(Data::Dumper->new([\%addr])->Dump());
303                        # print Data::Dumper->new([\%addr])->Dump(), "\n";
304
5
18
                        if(my $rc = $self->_search(\%addr, ('number', 'road', 'city', 'state', 'country'))) {
305                                # Store the result in the cache for future requests
306
1
43
                                $self->{cache}{$lc} = $rc;
307
308
1
5
                                return $rc;
309                        }
310
4
8
                        if($addr{'number'}) {
311
1
2
                                if(my $rc = $self->_search(\%addr, ('road', 'city', 'state', 'country'))) {
312                                        # Store the result in the cache for future requests
313
0
0
                                        $self->{cache}{$lc} = $rc;
314
315
0
0
                                        return $rc;
316                                }
317                        }
318
319                        # Decide if it's worth continuing to search
320
4
5
                        my $found = 0;
321
4
8
                        if(!defined($addr{'country'})) {
322
0
0
                                if($l =~ /(United States|USA|US)$/i) {
323
0
0
                                        $addr{'country'} = 'US';
324                                } else {
325
0
0
                                        die "TODO: extract country from $l";
326                                }
327                        }
328
4
4
4
6
                        foreach my $row(@{$self->{'data'}}) {
329
8
25
                                if((uc($row->{'state'}) eq uc($addr{'state'})) &&
330                                   (uc($row->{'country'}) eq uc($addr{'country'}))) {
331
4
4
                                        $found = 1;
332
4
3
                                        last;
333                                }
334                        }
335
4
15
                        if(!$found) {
336                                # Nothing at all in this state/country,
337                                #       so let's give up looking
338
0
0
                                return;
339                        }
340                }
341        }
342
343
14
45819
        if($location =~ /^(.+?)[,\s]+(United States|USA|USA?)$/i) {
344                # Try Geo::StreetAddress::US, which is rather buggy
345
346
2
3
                my $l = $1;
347
2
3
                $l =~ tr/,/ /;
348
2
7
                $l =~ s/\s\s+/ /g;
349
350                # ::diag(__PACKAGE__, ': ', __LINE__, ": $location ($l)");
351
352                # Work around for RT#122617
353
2
16
                if(($location !~ /\sCounty,/i) && (my $href = (Geo::StreetAddress::US->parse_location($l) || Geo::StreetAddress::US->parse_address($l)))) {
354                        # ::diag(Data::Dumper->new([$href])->Dump());
355
2
261
                        if(my $state = $href->{'state'}) {
356
2
3
                                if(length($state) > 2) {
357
0
0
                                        if(my $twoletterstate = Locale::US->new()->{state2code}{uc($state)}) {
358
0
0
                                                $state = $twoletterstate;
359                                        }
360                                }
361
2
2
                                my $city;
362
2
4
                                if($href->{city}) {
363
2
2
                                        $city = uc($href->{city});
364                                }
365
2
2
                                if(my $street = $href->{street}) {
366
2
7
                                        if($href->{'type'} && (my $type = Geo::Coder::Free::_abbreviate($href->{'type'}))) {
367
1
2
                                                $street .= " $type";
368                                        }
369
2
4
                                        if($href->{suffix}) {
370
1
1
                                                $street .= ' ' . $href->{suffix};
371                                        }
372
2
3
                                        if(my $prefix = $href->{prefix}) {
373
1
2
                                                $street = "$prefix $street";
374                                        }
375                                        my %addr = (
376
2
7
                                                number => $href->{'number'},
377                                                road => $street,
378                                                city => $city,
379                                                state => $state,
380                                                country => 'US'
381                                        );
382
2
3
                                        if($href->{'number'}) {
383
0
0
                                                if(my $rc = $self->_search(\%addr, ('number', 'road', 'city', 'state', 'country'))) {
384
0
0
                                                        $rc->{'country'} = 'US';
385
386                                                        # Store the result in the cache for future requests
387
0
0
                                                        $self->{cache}{$lc} = $rc;
388
389
0
0
                                                        return $rc;
390                                                }
391                                        }
392
2
4
                                        if(my $rc = $self->_search(\%addr, ('road', 'city', 'state', 'country'))) {
393
1
27
                                                $rc->{'country'} = 'US';
394
395                                                # Store the result in the cache for future requests
396
1
2
                                                $self->{cache}{$lc} = $rc;
397
398
1
3
                                                return $rc;
399                                        }
400                                        # ::diag(__PACKAGE__, ': ', __LINE__, ": $location");
401
1
3
                                        if($street && !$href->{'number'}) {
402                                                # If you give a building with
403                                                # no street to G:S:US it puts
404                                                # the building name into the
405                                                # street field
406
1
2
                                                $addr{'name'} = $street;
407
1
2
                                                delete $addr{'road'};
408
409
1
2
                                                if(my $rc = $self->_search(\%addr, ('name', 'city', 'state', 'country'))) {
410
0
0
                                                        $rc->{'country'} = 'US';
411
412                                                        # Store the result in the cache for future requests
413
0
0
                                                        $self->{cache}{$lc} = $rc;
414
415
0
0
                                                        return $rc;
416                                                }
417                                        }
418                                }
419                        }
420                }
421
422                # Hack to find "name, street, town, state, US"
423
1
4
                my @addr = split(/,\s*/, $location);
424                # ::diag(__PACKAGE__, ': ', __LINE__, ' ', scalar(@addr));
425
1
2
                if(scalar(@addr) == 5) {
426                        # ::diag(__PACKAGE__, ': ', __LINE__, ": $location");
427                        # ::diag(Data::Dumper->new([\@addr])->Dump());
428
1
1
                        my $state = $addr[3];
429
1
2
                        if(length($state) > 2) {
430
1
7
                                if(my $twoletterstate = Locale::US->new()->{state2code}{uc($state)}) {
431
1
284
                                        $state = $twoletterstate;
432                                }
433                        }
434
1
10
                        if(length($state) == 2) {
435
1
3
                                my %addr = (
436                                        city => $addr[2],
437                                        state => $state,
438                                        country => 'US'
439                                );
440                                # ::diag(__PACKAGE__, ': ', __LINE__);
441
1
3
                                if($addr[0] !~ /^\d/) {
442                                        # ::diag(__PACKAGE__, ': ', __LINE__);
443
1
1
                                        $addr{'name'} = $addr[0];
444
1
3
                                        if($addr[1] =~ /^(\d+)\s+(.+)/) {
445                                                # ::diag(__PACKAGE__, ': ', __LINE__);
446
1
2
                                                $addr{'number'} = $1;
447
1
4
                                                $addr{'road'} = Geo::Coder::Free::_normalize($2);
448
1
2
                                                if(my $rc = $self->_search(\%addr, ('name', 'number', 'road', 'city', 'state', 'country'))) {
449                                                        # ::diag(Data::Dumper->new([$rc])->Dump());
450
1
40
                                                        $rc->{'country'} = 'US';
451
452                                                        # Store the result in the cache for future requests
453
1
2
                                                        $self->{cache}{$lc} = $rc;
454
455
1
90
                                                        return $rc;
456                                                }
457                                        } else {
458
0
0
                                                $addr{'road'} = Geo::Coder::Free::_normalize($addr[1]);
459
0
0
                                                if(my $rc = $self->_search(\%addr, ('name', 'road', 'city', 'state', 'country'))) {
460                                                        # ::diag(Data::Dumper->new([$rc])->Dump());
461
0
0
                                                        $rc->{'country'} = 'US';
462
463                                                        # Store the result in the cache for future requests
464
0
0
                                                        $self->{cache}{$lc} = $rc;
465
466
0
0
                                                        return $rc;
467                                                }
468                                        }
469                                } else {
470
0
0
                                        $addr{'number'} = $addr[0];
471
0
0
                                        $addr{'road'} = Geo::Coder::Free::_normalize($addr[1]);
472
0
0
                                        if(my $rc = $self->_search(\%addr, ('number', 'road', 'city', 'state', 'country'))) {
473                                                # ::diag(Data::Dumper->new([$rc])->Dump());
474
0
0
                                                $rc->{'country'} = 'US';
475
476                                                # Store the result in the cache for future requests
477
0
0
                                                $self->{cache}{$lc} = $rc;
478
479
0
0
                                                return $rc;
480                                        }
481                                }
482                        }
483                }
484        }
485
486
12
48
        if(($location =~ /.+,.+,.*England$/) &&
487           ($location !~ /.+,.+,.+,.*England$/)) {
488                # Simple "Town, County, England"
489                # If we're here, it's not going to be found because the
490                # above parsers will have worked
491
1
3
                return;
492        }
493
494
11
342
        require Geo::Address::Parser && Geo::Address::Parser->import() unless Geo::Address::Parser->can('parse');
495
496
11
1808
        my $addr_parser = Geo::Address::Parser->new(country => 'UK');
497
11
42342
        if(my $fields = $addr_parser->parse($location)) {
498
11
11
3417
20
                for my $key (keys %{$fields}) {
499
36
33
                        delete $fields->{$key} unless defined $fields->{$key};
500                }
501
11
11
12
19
                if(my $rc = $self->_search($fields, keys %{$fields})) {
502
0
0
                        $rc->{'country'} = 'UK';
503
504                        # Store the result in the cache for future requests
505
0
0
                        $self->{cache}{$lc} = $rc;
506
507
0
0
                        return $rc;
508                }
509        }
510
511        # Finally try libpostal,
512        # which is good but uses a lot of memory and can take a very long time to parse data
513
11
22
        if($libpostal_is_installed == LIBPOSTAL_UNKNOWN) {
514
1
1
1
45
                if(eval { require Geo::libpostal; } ) {
515
0
0
                        Geo::libpostal->import();
516
0
0
                        $libpostal_is_installed = LIBPOSTAL_INSTALLED;
517                } else {
518
1
314
                        $libpostal_is_installed = LIBPOSTAL_NOT_INSTALLED;
519                }
520        }
521
522        # ::diag(__PACKAGE__, ': ', __LINE__, ": libpostal_is_installed = $libpostal_is_installed ($location)");
523        # print(__PACKAGE__, ': ', __LINE__, ": libpostal_is_installed = $libpostal_is_installed ($location)\n");
524
525        # TODO: cache calls to this
526
11
23
        if(($libpostal_is_installed == LIBPOSTAL_INSTALLED) && (my %addr = Geo::libpostal::parse_address($location))) {
527
0
0
                if($addr{'house_number'} && !$addr{'number'}) {
528
0
0
                        $addr{'number'} = delete $addr{'house_number'};
529                }
530
0
0
                if($addr{'house'} && !$addr{'name'}) {
531
0
0
                        $addr{'name'} = delete $addr{'house'};
532                }
533
0
0
                $addr{'location'} = $location;
534
0
0
                if(my $street = $addr{'road'}) {
535
0
0
                        $addr{'road'} = Geo::Coder::Free::_normalize($street);
536                }
537
0
0
                if(defined($addr{'state'}) && !defined($addr{'country'}) && ($addr{'state'} eq 'england')) {
538
0
0
                        delete $addr{'state'};
539
0
0
                        $addr{'country'} = 'GB';
540                }
541                # ::diag(__PACKAGE__, ': ', __LINE__, ': ', Data::Dumper->new([\%addr])->Dump());
542
0
0
                if($addr{'country'} && ($addr{'state'} || $addr{'state_district'})) {
543
0
0
                        if($addr{'country'} =~ /Canada/i) {
544
0
0
                                $addr{'country'} = 'Canada';
545
0
0
                                if(length($addr{'state'}) > 2) {
546
0
0
                                        if(my $twoletterstate = Locale::CA->new()->{province2code}{uc($addr{'state'})}) {
547
0
0
                                                $addr{'state'} = $twoletterstate;
548                                        }
549                                }
550                        } elsif($addr{'country'} =~ /^(United States|USA|US)$/i) {
551
0
0
                                $addr{'country'} = 'US';
552
0
0
                                if(length($addr{'state'}) > 2) {
553
0
0
                                        if(my $twoletterstate = Locale::US->new()->{state2code}{uc($addr{'state'})}) {
554
0
0
                                                $addr{'state'} = $twoletterstate;
555                                        }
556                                }
557                        }
558
0
0
                        if($addr{'state_district'}) {
559
0
0
                                $addr{'state_district'} =~ s/^(.+)\s+COUNTY/$1/i;
560
0
0
                                if(my $rc = $self->_search(\%addr, ('number', 'road', 'city', 'state_district', 'state', 'country'))) {
561
562                                        # Store the result in the cache for future requests
563
0
0
                                        $self->{cache}{$lc} = $rc;
564
565
0
0
                                        return $rc;
566                                }
567                        }
568
0
0
                        if(my $rc = $self->_search(\%addr, ('number', 'road', 'city', 'state', 'country'))) {
569                                # ::diag(__PACKAGE__, ': ', __LINE__, ': ', Data::Dumper->new([$rc])->Dump());
570
571                                # Store the result in the cache for future requests
572
0
0
                                $self->{cache}{$lc} = $rc;
573
574
0
0
                                return $rc;
575                        }
576
0
0
                        if($addr{'number'}) {
577
0
0
                                if(my $rc = $self->_search(\%addr, ('road', 'city', 'state', 'country'))) {
578
579                                        # Store the result in the cache for future requests
580
0
0
                                        $self->{cache}{$lc} = $rc;
581
582
0
0
                                        return $rc;
583                                }
584                        }
585                }
586        }
587
11
72
        if($location =~ /^(.+?),\s*([\s\w]+),\s*([\s\w]+),\s*([\w\s]+)$/) {
588                # >= 5.14 could say:
589                # my %addr = (
590                #       road => $1,
591                #       city => $2,
592                #       state => $3 =~ s/\s+$//r,
593                #       country => $4 =~ s/\s+$//r
594                # );
595
11
37
                my %addr = (
596                        road => $1,
597                        city => $2,
598                        state => $3,
599                        country => $4,
600                );
601
11
18
                $addr{'state'} =~ s/\s$//g;
602
11
17
                $addr{'country'} =~ s/\s$//g;
603
11
30
                if($addr{'road'} =~ /([\w\s]+),*\s+(.+)/) {
604
11
15
                        $addr{'name'} = $1;
605
11
14
                        $addr{'road'} = $2;
606                }
607
11
25
                if($addr{'road'} =~ /^(\d+)\s+(.+)/) {
608
2
2
                        $addr{'number'} = $1;
609
2
3
                        $addr{'road'} = $2;
610                        # ::diag(__LINE__, ': ', Data::Dumper->new([\%addr])->Dump());
611
2
3
                        if(my $rc = $self->_search(\%addr, ('name', 'number', 'road', 'city', 'state', 'country'))) {
612
613                                # Store the result in the cache for future requests
614
0
0
                                $self->{cache}{$lc} = $rc;
615
616
0
0
                                return $rc;
617                        }
618                } elsif(my $rc = $self->_search(\%addr, ('name', 'road', 'city', 'state', 'country'))) {
619
620                        # Store the result in the cache for future requests
621
1
35
                        $self->{cache}{$lc} = $rc;
622
623
1
5
                        return $rc;
624                }
625
10
35
                if($addr{'name'} && !defined($addr{'number'})) {
626                        # We know the name of the building but not the street number
627                        # ::diag(__LINE__, ': ', $addr{'name'});
628
8
9
                        if(my $rc = $self->_search(\%addr, ('name', 'road', 'city', 'state', 'country'))) {
629                                # ::diag(__PACKAGE__, ': ', __LINE__);
630
631                                # Store the result in the cache for future requests
632
0
0
                                $self->{cache}{$lc} = $rc;
633
634
0
0
                                return $rc;
635                        }
636                }
637        }
638
639
10
19
        $location = uc($location);
640
10
17
        foreach my $left(keys %alternatives) {
641                # ::diag("$location/$left");
642
36
166
                if($location =~ $left) {
643                        # ::diag($left, '=>', $alternatives{$left});
644
5
20
                        $location =~ s/$left/$alternatives{$left}/;
645
5
9
                        $params{'location'} = $location;
646                        # ::diag(__LINE__, ": found alternative '$location'");
647
5
16
                        if(my $rc = $self->geocode(\%params)) {
648                                # ::diag(__LINE__, ": $location");
649
650                                # Store the result in the cache for future requests
651
1
6
                                $self->{cache}{$lc} = $rc;
652
653
1
13
                                return $rc;
654                        }
655
4
37
                        if($location =~ /(.+), (England|UK)$/i) {
656
4
9
                                $params{'location'} = "$1, GB";
657
4
9
                                if(my $rc = $self->geocode(\%params)) {
658                                        # ::diag(__LINE__, ": $location");
659
660                                        # Store the result in the cache for future requests
661
3
16
                                        $self->{cache}{$lc} = $rc;
662
663
3
16
                                        return $rc;
664                                }
665                        }
666                }
667        }
668
6
32
        return;
669}
670
671# Match parsed address components against the locally loaded dataset.
672
673# $data is a hashref to data such as returned by Geo::libpostal::parse_address
674# @columns is the key names to use in $data
675sub _search
676{
677
40
61
        my ($self, $data, @columns) = @_;
678
679        # FIXME: linear search is slow
680        # ::diag(__LINE__, ': ', Data::Dumper->new([\@columns, $data])->Dump());
681        # print Data::Dumper->new([\@columns, $data])->Dump();
682        # my @call_details = caller(0);
683        # ::diag(__LINE__, ': called from ', $call_details[2]);
684
40
40
24
42
        foreach my $row(@{$self->{'data'}}) {
685
3434
1666
                my $match = 1;
686
3434
1602
                my $number_of_columns_matched;
687
688                # ::diag(Data::Dumper->new([$self->{data}])->Dump());
689                # print Data::Dumper->new([$self->{data}])->Dump();
690
691
3434
1782
                foreach my $column(@columns) {
692
3486
2053
                        if(defined($data->{$column})) {
693
3486
2669
                                if(!defined($row->{$column})) {
694
1189
567
                                        $match = 0;
695
1189
598
                                        last;
696                                }
697                                # ::diag("$column: ", $row->{$column}, '/', $data->{$column});
698                                # print "$column: ", $row->{$column}, '/', $data->{$column}, "\n";
699
2297
1860
                                if(uc($row->{$column}) ne uc($data->{$column})) {
700
2241
1035
                                        $match = 0;
701
2241
1143
                                        last;
702                                }
703
56
35
                                $number_of_columns_matched++;
704                        } elsif(exists $data->{$column}) {
705
0
0
                                delete $data->{$column};
706                        }
707                }
708                # ::diag("match: $match");
709
3434
2417
                if($match && ($number_of_columns_matched >= 3)) {
710
4
5
                        my $confidence;
711
4
6
                        if($number_of_columns_matched == scalar(@columns)) {
712
4
4
                                $confidence = 1.0;
713                        } elsif($number_of_columns_matched >= 4) {
714
0
0
                                $confidence = 0.7;
715                        } else {
716
0
0
                                $confidence = 0.5;
717                        }
718                        # ::diag("$number_of_columns_matched -> $confidence");
719                        return Geo::Location::Point->new(
720                                # 'latitude' => $row->{'latitude'},
721                                # 'longitude' => $row->{'longitude'},
722                                'location' => $data->{'location'},
723                                'confidence' => $confidence,
724                                'database' => __PACKAGE__,
725
4
4
9
24
                                %{$row}
726                        );
727                }
728        }
729
36
60
        return;
730}
731
732 - 736
=head2  reverse_geocode

    $location = $geocoder->reverse_geocode(latlng => '37.778907,-122.39732');

=cut
737
738sub reverse_geocode {
739
37
1
11019
        my $self = shift;
740
37
27
        my %params;
741
742        # Try hard to support whatever API that the user wants to use
743
37
111
        if(!ref($self)) {
744
0
0
                if(scalar(@_)) {
745
0
0
                        return(__PACKAGE__->new()->reverse_geocode(@_));
746                } elsif(!defined($self)) {
747                        # Geo::Coder::Free->reverse_geocode()
748
0
0
                        Carp::croak('Usage: ', __PACKAGE__, '::reverse_geocode(latlng => "$lat,$long")');
749                } elsif($self eq __PACKAGE__) {
750
0
0
                        Carp::croak("Usage: $self", '::reverse_geocode(latlng => "$lat,$long")');
751                }
752
0
0
                return(__PACKAGE__->new()->reverse_geocode($self));
753        } elsif(ref($self) eq 'HASH') {
754
0
0
                return(__PACKAGE__->new()->reverse_geocode($self));
755        } elsif(ref($_[0]) eq 'HASH') {
756
14
14
10
20
                %params = %{$_[0]};
757        # } elsif(ref($_[0]) && (ref($_[0] !~ /::/))) {
758        } elsif(ref($_[0])) {
759
0
0
                Carp::croak('Usage: ', __PACKAGE__, '::reverse_geocode(latlng => "$lat,$long")');
760        } elsif(scalar(@_) && (scalar(@_) % 2 == 0)) {
761
14
20
                %params = @_;
762        } else {
763
9
11
                $params{'latlng'} = shift;
764        }
765
766
37
27
        my $latlng = $params{'latlng'};
767
768
37
30
        my $latitude;
769        my $longitude;
770
771
37
33
        if($latlng) {
772
22
29
                ($latitude, $longitude) = split(/,/, $latlng);
773        } else {
774
15
36
                $latitude //= $params{'lat'};
775
15
34
                $longitude //= $params{'lon'};
776
15
19
                $longitude //= $params{'long'};
777        }
778
779
37
62
        if((!defined($latitude)) || !defined($longitude)) {
780
1
2
                Carp::croak('Usage: ', __PACKAGE__, '::reverse_geocode(latlng => "$lat,$long")');
781        }
782
783        # ::diag(__LINE__, ": $latitude,$longitude");
784
36
39
        my @rc;
785
36
36
32
36
        foreach my $row(@{$self->{'data'}}) {
786
3310
3350
                if(defined($row->{'latitude'}) && defined($row->{'longitude'})) {
787                        # ::diag(__LINE__, ': ', $row->{'latitude'}, ', ', $latitude);
788
3310
2051
                        if(_equal($row->{'latitude'}, $latitude, 4) &&
789                           _equal($row->{'longitude'}, $longitude, 4)) {
790                                # ::diag('match');
791
71
75
                                my $location = uc($row->as_string());        # Geo::Location::Point object
792
71
203
                                if(wantarray) {
793
70
55
                                        push @rc, $location;
794
70
73
                                        while(my($left, $right) = each %alternatives) {
795                                                # ::diag("$location/$left");
796
280
825
                                                if($location =~ $right) {
797                                                        # ::diag($right, '=>', $left);
798
100
55
                                                        my $l = $location;
799
100
150
                                                        $l =~ s/$right/$left/;
800                                                        # ::diag(__LINE__, ": $location");
801
100
137
                                                        push @rc, $l;
802                                                        # Don't add last here
803                                                }
804                                        }
805                                } else {
806
1
4
                                        return $location;
807                                }
808                        }
809                }
810        }
811
35
79
        return @rc;
812}
813
814# https://www.oreilly.com/library/view/perl-cookbook/1565922433/ch02s03.html
815# equal(NUM1, NUM2, ACCURACY) : returns true if NUM1 and NUM2 are
816# equal to ACCURACY number of decimal places
817sub _equal {
818
3461
2005
        my ($A, $B, $dp) = @_;
819
820
3461
6605
        return sprintf("%.${dp}g", $A) eq sprintf("%.${dp}g", $B);
821}
822
823 - 827
=head2  ua

Does nothing, here for compatibility with other geocoders

=cut
828
829
1
sub ua {
830}
831
832# find_geographic_centres($csv_data)
833#
834# Helper function that processes CSV geographic data to find centres of location clusters.
835# Takes a string containing CSV data with headers and analyzes it to find groups of
836# 3 or more locations in the same city/state/country combination. For each qualifying
837# group, it calculates the geographic centre and prints the results.
838#
839# Parameters:
840#   $csv_data - String containing complete CSV data including header row
841#
842# Processing steps:
843#   1. Parses CSV header to get field names
844#   2. Parses each data row into location hash objects
845#   3. Validates that coordinates are numeric
846#   4. Groups locations by city/state/country key
847#   5. For groups with 3+ locations, calculates and prints centre coordinates
848#
849sub _find_geographic_centres
850{
851
5
5
        my $csv_data = $_[0];
852
853        # Parse CSV data into an array of hashes
854        # my @lines = split /\n/, $csv_data;
855
5
5
4
42
        my @lines = @{$csv_data};
856
857
5
11
        return if(scalar(@lines) == 0);
858
859
2
2
        my $header = shift @lines;
860
861        # Remove quotes from header and split
862
2
26
        $header =~ s/"//g;
863
2
2
        chomp $header;
864
2
4
        my @fields = split /,/, $header;
865
866
2
2
        my @locations = ();
867
868        # Parse each data line
869
2
3
        foreach my $line (@lines) {
870
176
141
                next if $line =~ /^\s*$/;       # Skip empty lines
871
176
87
                chomp $line;
872
873                # Simple CSV parsing - handles quoted fields
874
176
90
                my @values = ();
875
176
80
                my $current_field = '';
876
176
87
                my $in_quotes = 0;
877
878
176
462
                for my $char (split //, $line) {
879
15084
10440
                        if ($char eq '"') {
880
2020
1033
                                $in_quotes = !$in_quotes;
881                        } elsif ($char eq ',' && !$in_quotes) {
882
1410
738
                                push @values, $current_field;
883
1410
704
                                $current_field = '';
884                        } else {
885
11654
5751
                                $current_field .= $char;
886                        }
887                }
888
176
384
                push @values, $current_field;   # Don't forget the last field
889
890                # Create location hash
891
176
79
                my %location = ();
892
176
109
                for my $i (0..$#fields) {
893
1584
1261
                        $location{$fields[$i]} = $values[$i] || '';
894                }
895
896                # Only include locations with valid coordinates
897
176
434
                if($location{latitude} && $location{longitude} &&
898                   ($location{latitude} =~ /^-?\d+\.?\d*$/) &&
899                   ($location{longitude} =~ /^-?\d+\.?\d*$/)) {
900
172
177
                        push @locations, \%location;
901                }
902        }
903
904        # Group locations by city, state, country
905
2
2
        my %groups = ();
906
907
2
2
        foreach my $loc (@locations) {
908
172
116
                my $key = join('|', $loc->{city}, $loc->{state}, $loc->{country});
909
172
172
74
144
                push @{$groups{$key}}, $loc;
910        }
911
912
2
2
        my $rc;
913
914        # Process groups with 3 or more locations
915
2
4
        foreach my $group_key (keys %groups) {
916
84
30
                my $locations_ref = $groups{$group_key};
917
918
84
61
                if (@$locations_ref >= 3) {
919
12
16
                        my ($city, $state, $country) = split /\|/, $group_key;
920
921                        # Calculate geographic centre
922
12
14
                        my ($centre_lat, $centre_lon) = _calculate_centre($locations_ref);
923
924                        # printf("Center of %d locations in %s, %s, %s: %.6f, %.6f\n",
925                                 # scalar(@$locations_ref), $city, $state, $country,
926                                 # $centre_lat, $centre_lon);
927
928
12
12
9
28
                        push @{$rc}, {
929                                'city' => $city,
930                                'state' => $state,
931                                'country' => $country,
932                                'lat' => $centre_lat,
933                                'latitude' => $centre_lat,
934                                'longitude' => $centre_lon,
935                                'long' => $centre_lon,
936                                'lng' => $centre_lon
937                        };
938                }
939        }
940
941
2
74
        return $rc;
942}
943
944# _calculate_centre($locations_ref)
945#
946# Helper funcation that calculates the geographic centre (centroid) of a group of locations using
947# the arithmetic mean method to 6 decimal places. This works well for small geographic areas but
948# may be less accurate for locations spread over large distances due to
949# Earth's curvature.
950#
951# Parameters:
952#   $locations_ref - Reference to array of location hash objects, each containing
953#                   latitude and longitude fields
954#
955# Returns:
956#   ($centre_lat, $centre_lon) - Two-element list containing the calculated
957#                               centre coordinates as decimal degrees
958#
959# Algorithm:
960#   - Sums all latitude values and divides by count
961#   - Sums all longitude values and divides by count
962#   - Returns the arithmetic mean of both coordinates
963sub _calculate_centre
964{
965
12
4
        my $locations_ref = $_[0];
966
967
12
7
        my $total_lat = 0;
968
12
7
        my $total_lon = 0;
969
12
4
        my $count = 0;
970
971
12
8
        foreach my $loc (@$locations_ref) {
972
90
74
                $total_lat += $loc->{latitude};
973
90
95
                $total_lon += $loc->{longitude};
974
90
42
                $count++;
975        }
976
977        # Round to 6 decimal places
978
12
28
        my $centre_lat = sprintf('%.6f', $total_lat / $count);
979
12
14
        my $centre_lon = sprintf('%.6f', $total_lon / $count);
980
981
12
8
        return ($centre_lat, $centre_lon);
982}
983
984 - 1006
=head1 AUTHOR

Nigel Horne <njh@bandsman.co.uk>

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=head1 BUGS

The data are stored in the source,
they should be read in from somewhere else to make it easier for non-authors to add data.

=head1 SEE ALSO

=head1 LICENSE AND COPYRIGHT

Copyright 2020-2024 Nigel Horne.

The program code is released under the following licence: GPL2 for personal use on a single computer.
All other users (including Commercial, Charity, Educational, and Government)
must apply in writing for a licence for use from Nigel Horne at `<njh at nigelhorne.com>`.

=cut
1007
10081;
1009
1010# Ensure you use abbreviations, e.g., RD not ROAD