| File: | blib/lib/Geo/Coder/Free/Local.pm |
| Coverage: | 66.5% |
| line | stmt | bran | cond | sub | pod | time | code |
|---|---|---|---|---|---|---|---|
| 1 | package Geo::Coder::Free::Local; | ||||||
| 2 | |||||||
| 3 | 8 8 8 | 53888 7 83 | use strict; | ||||
| 4 | 8 8 8 | 11 7 133 | use warnings; | ||||
| 5 | |||||||
| 6 | 8 8 8 | 12 3 195 | use Carp; | ||||
| 7 | 8 8 8 | 901 38419 99 | use Geo::Location::Point 0.14; | ||||
| 8 | 8 8 8 | 202 8 71 | use Geo::Coder::Free; | ||||
| 9 | 8 8 8 | 1185 151625 291 | use Geo::StreetAddress::US; | ||||
| 10 | 8 8 8 | 1645 374342 154 | use Lingua::EN::AddressParse; | ||||
| 11 | 8 8 8 | 989 4364 102 | use Locale::CA; | ||||
| 12 | 8 8 8 | 789 937 74 | use Locale::US; | ||||
| 13 | 8 8 8 | 965 206572 115 | use Object::Configure; | ||||
| 14 | 8 8 8 | 21 5 106 | use Params::Get; | ||||
| 15 | 8 8 8 | 1311 65206 262 | use Text::xSV::Slurp; | ||||
| 16 | |||||||
| 17 - 32 | =head1 NAME Geo::Coder::Free::Local - Provides an interface to locations that you know yourself, based on locally known data, thereby giving a way to geocode locations using self-curated data instead of relying on external APIs. For example, I have found locations by using GPS apps on a smartphone and by inspecting GeoTagged photographs using L<https://github.com/nigelhorne/NJH-Snippets/blob/master/bin/geotag> or by using the app GPSCF which are included here. =head1 VERSION Version 0.41 =cut | ||||||
| 33 | |||||||
| 34 | our $VERSION = '0.41'; | ||||||
| 35 | |||||||
| 36 | 8 8 8 | 25 7 172 | use constant LIBPOSTAL_UNKNOWN => 0; | ||||
| 37 | 8 8 8 | 9 7 103 | use constant LIBPOSTAL_INSTALLED => 1; | ||||
| 38 | 8 8 8 | 10 8 16362 | use constant LIBPOSTAL_NOT_INSTALLED => -1; | ||||
| 39 | our $libpostal_is_installed = LIBPOSTAL_UNKNOWN; | ||||||
| 40 | |||||||
| 41 | # Alternative mappings for ambiguous or inconsistent place names | ||||||
| 42 | # See also lib/Geo/Coder/Free.pm | ||||||
| 43 | our %alternatives = ( | ||||||
| 44 | 'ST LAWRENCE, THANET, KENT' => 'RAMSGATE, KENT', | ||||||
| 45 | 'ST PETERS, THANET, KENT' => 'ST PETERS, KENT', | ||||||
| 46 | 'MINSTER, THANET, KENT' => 'RAMSGATE, KENT', | ||||||
| 47 | 'TYNE AND WEAR' => 'BOROUGH OF NORTH TYNESIDE', | ||||||
| 48 | ); | ||||||
| 49 | |||||||
| 50 - 69 | =head1 SYNOPSIS use Geo::Coder::Free::Local; my $geocoder = Geo::Coder::Free::Local->new(); my $location = $geocoder->geocode(location => 'Ramsgate, Kent, UK'); =head1 DESCRIPTION Geo::Coder::Free::Local provides an interface to your own location data. =head1 METHODS =head2 new Initializes a geocoder object, loading the local data. $geocoder = Geo::Coder::Free::Local->new(); =cut | ||||||
| 70 | |||||||
| 71 | sub new | ||||||
| 72 | { | ||||||
| 73 | 7 | 1 | 101785 | my $class = shift; | |||
| 74 | 7 | 16 | my $params = Params::Get::get_params(undef, \@_) || {}; | ||||
| 75 | |||||||
| 76 | 7 | 94 | if(!defined($class)) { | ||||
| 77 | # Geo::Coder::Free::Local->new not Geo::Coder::Free::Local::new | ||||||
| 78 | # carp(__PACKAGE__, ' use ->new() not ::new() to instantiate'); | ||||||
| 79 | # return; | ||||||
| 80 | |||||||
| 81 | # FIXME: this only works when no arguments are given | ||||||
| 82 | 1 | 1 | $class = __PACKAGE__; | ||||
| 83 | } elsif(ref($class)) { | ||||||
| 84 | # clone the given object | ||||||
| 85 | 2 2 2 | 2 3 8 | return bless { %{$class}, %{$params} }, ref($class); | ||||
| 86 | } | ||||||
| 87 | |||||||
| 88 | 5 | 11 | $params = Object::Configure::configure($class, $params); | ||||
| 89 | |||||||
| 90 | 5 | 17277 | my @data = <DATA>; | ||||
| 91 | |||||||
| 92 | # TODO: since 'hoh' doesn't allow a CODEREF as a key, | ||||||
| 93 | # I could build an hoh manually from this aoh, | ||||||
| 94 | # it would make searching much quicker | ||||||
| 95 | my $self = bless { | ||||||
| 96 | data => xsv_slurp( | ||||||
| 97 | shape => 'aoh', | ||||||
| 98 | text_csv => { | ||||||
| 99 | allow_loose_quotes => 1, | ||||||
| 100 | blank_is_undef => 1, | ||||||
| 101 | empty_is_undef => 1, | ||||||
| 102 | binary => 1, | ||||||
| 103 | escape_char => '\\', | ||||||
| 104 | }, | ||||||
| 105 | string => \join('', grep(!/^\s*(#|$)/, @data)) | ||||||
| 106 | ), | ||||||
| 107 | 5 5 | 70 3204 | %{$params} | ||||
| 108 | }, $class; | ||||||
| 109 | |||||||
| 110 | # Process the data to find geographic centres of location clusters. | ||||||
| 111 | # This will identify groups with 3+ locations in the same city/state/country, | ||||||
| 112 | # thus adding towns/cities to the local database | ||||||
| 113 | 5 | 16 | my $towns = _find_geographic_centres(\@data); | ||||
| 114 | 5 5 | 7 4 | foreach my $town (@{$towns}) { | ||||
| 115 | 12 12 | 2 15 | push @{$self->{data}}, $town; | ||||
| 116 | } | ||||||
| 117 | |||||||
| 118 | # Build the hash-based index | ||||||
| 119 | 5 5 | 6 4 | foreach my $row (@{$self->{data} }) { | ||||
| 120 | 188 | 135 | my $key = lc(Geo::Location::Point->new($row)->as_string()); | ||||
| 121 | 188 | 12382 | $self->{'index'}{$key} = $row; | ||||
| 122 | } | ||||||
| 123 | |||||||
| 124 | # TODO: Perhaps the cache can be prepopulated, or stored in a less volatile location? | ||||||
| 125 | # The cache attribute stores normalized location strings as keys and Geo::Location::Point objects as values | ||||||
| 126 | 5 | 27 | return $self; | ||||
| 127 | } | ||||||
| 128 | |||||||
| 129 - 145 | =head2 geocode
Performs the geocoding operation by matching an input location against the local data and attempting different strategies for parsing and resolving the address.
Handles parsing of addresses based on location-specific rules, e.g., U.S., U.K., or Canada.
Uses various parsers for country-specific address normalization.
$location = $geocoder->geocode(location => $location);
print 'Latitude: ', $location->lat(), "\n";
print 'Longitude: ', $location->long(), "\n";
# TODO:
# @locations = $geocoder->geocode('Portland, USA');
# diag 'There are Portlands in ', join (', ', map { $_->{'state'} } @locations);
=cut | ||||||
| 146 | |||||||
| 147 | sub geocode { | ||||||
| 148 | 37 | 1 | 10820 | my $self = shift; | |||
| 149 | 37 | 29 | my %params; | ||||
| 150 | |||||||
| 151 | # Try hard to support whatever API the user wants to use | ||||||
| 152 | 37 | 120 | if(!ref($self)) { | ||||
| 153 | 0 | 0 | if(scalar(@_)) { | ||||
| 154 | 0 | 0 | return(__PACKAGE__->new()->geocode(@_)); | ||||
| 155 | } elsif(!defined($self)) { | ||||||
| 156 | # Geo::Coder::Free->geocode() | ||||||
| 157 | 0 | 0 | Carp::croak('Usage: ', __PACKAGE__, '::geocode(location => $location)'); | ||||
| 158 | } elsif($self eq __PACKAGE__) { | ||||||
| 159 | 0 | 0 | Carp::croak("Usage: $self", '::geocode(location => $location)'); | ||||
| 160 | } | ||||||
| 161 | 0 | 0 | return(__PACKAGE__->new()->geocode($self)); | ||||
| 162 | } elsif(ref($self) eq 'HASH') { | ||||||
| 163 | 0 | 0 | return(__PACKAGE__->new()->geocode($self)); | ||||
| 164 | } elsif(ref($_[0]) eq 'HASH') { | ||||||
| 165 | 16 16 | 13 26 | %params = %{$_[0]}; | ||||
| 166 | # } elsif(ref($_[0]) && (ref($_[0] !~ /::/))) { | ||||||
| 167 | } elsif(ref($_[0])) { | ||||||
| 168 | 0 | 0 | Carp::croak('Usage: ', __PACKAGE__, '::geocode(location => $location)'); | ||||
| 169 | } elsif(scalar(@_) && (scalar(@_) % 2 == 0)) { | ||||||
| 170 | 8 | 14 | %params = @_; | ||||
| 171 | } else { | ||||||
| 172 | 13 | 16 | $params{'location'} = shift; | ||||
| 173 | } | ||||||
| 174 | |||||||
| 175 | my $location = $params{location} | ||||||
| 176 | 37 | 51 | or Carp::croak('Usage: geocode(location => $location)'); | ||||
| 177 | |||||||
| 178 | # Only used to geolocate full addresses, not states/provinces | ||||||
| 179 | 36 | 88 | return if($location !~ /,.+,/); | ||||
| 180 | |||||||
| 181 | # ::diag(__PACKAGE__, ': ', __LINE__, ': ', $location); | ||||||
| 182 | |||||||
| 183 | # Look for a quick match, we may get lucky | ||||||
| 184 | 36 | 33 | my $lc = lc($location); | ||||
| 185 | 36 | 57 | $lc =~ s/,\susa$/, us/i; | ||||
| 186 | |||||||
| 187 | # Check the cache first | ||||||
| 188 | 36 | 59 | if(exists $self->{cache}{$lc}) { | ||||
| 189 | # ::diag("Found $lc in the cache"); | ||||||
| 190 | 16 | 25 | return $self->{cache}{$lc}; | ||||
| 191 | } | ||||||
| 192 | |||||||
| 193 | # Use the hash-based index for a quick lookup | ||||||
| 194 | 20 | 34 | if(exists $self->{index}{$lc}) { | ||||
| 195 | # Store the result in the cache for future requests | ||||||
| 196 | 5 | 17 | return $self->{cache}{$lc} = $self->{index}{$lc}; # Geo::Location::Point object | ||||
| 197 | } | ||||||
| 198 | # ::diag("$location: hash search failed"); | ||||||
| 199 | |||||||
| 200 | 15 | 22 | if(0) { | ||||
| 201 | # Old linear search mode, now replaced by the hash-based index | ||||||
| 202 | foreach my $row(@{$self->{'data'}}) { | ||||||
| 203 | my $rc = Geo::Location::Point->new($row); | ||||||
| 204 | my $str = lc($rc->as_string()); | ||||||
| 205 | |||||||
| 206 | # ::diag("Compare $str->$lc") if(($location =~ /MINSTER CEME/i) && ($str =~ /MINSTER CEME/i)); | ||||||
| 207 | # ::diag("Compare $str->$lc"); | ||||||
| 208 | # print "Compare $str->$lc\n"; | ||||||
| 209 | if($str eq $lc) { | ||||||
| 210 | # This looks pointless and I can't recall why I put it in | ||||||
| 211 | # foreach my $column ('name', 'state_district') { | ||||||
| 212 | # if((!defined($rc->{$column})) && exists($rc->{$column})) { | ||||||
| 213 | # delete $rc->{$column}; | ||||||
| 214 | # } | ||||||
| 215 | # } | ||||||
| 216 | # ::diag("$location: linear search suceeded"); | ||||||
| 217 | return $rc; | ||||||
| 218 | } | ||||||
| 219 | |||||||
| 220 | if(($str =~ /, us$/) && ("${str}a" eq $lc)) { | ||||||
| 221 | return $rc; | ||||||
| 222 | } | ||||||
| 223 | |||||||
| 224 | if(($lc =~ /(.+), (England|UK)$/i) && ($str eq "$1, gb")) { | ||||||
| 225 | return $rc; | ||||||
| 226 | } | ||||||
| 227 | } | ||||||
| 228 | # ::diag("$location: linear search failed"); | ||||||
| 229 | } | ||||||
| 230 | |||||||
| 231 | # ::diag(__PACKAGE__, ': ', __LINE__, ': ', $location); | ||||||
| 232 | |||||||
| 233 | 15 | 13 | my $ap; | ||||
| 234 | 15 | 84 | if(($location =~ /USA?$/) || ($location =~ /United States$/)) { | ||||
| 235 | 1 | 8 | $ap = $self->{'ap'}->{'us'} // Lingua::EN::AddressParse->new(country => 'US', auto_clean => 1, force_case => 1, force_post_code => 0); | ||||
| 236 | 1 | 140135 | $self->{'ap'}->{'us'} = $ap; | ||||
| 237 | } elsif($location =~ /(England|Scotland|Wales|Northern Ireland|UK|GB)$/i) { | ||||||
| 238 | 13 | 29 | $ap = $self->{'ap'}->{'gb'} // Lingua::EN::AddressParse->new(country => 'GB', auto_clean => 1, force_case => 1, force_post_code => 0); | ||||
| 239 | 13 | 610224 | $self->{'ap'}->{'gb'} = $ap; | ||||
| 240 | } elsif($location =~ /Canada$/) { | ||||||
| 241 | # TODO: no Canadian addresses yet | ||||||
| 242 | 0 | 0 | return; | ||||
| 243 | 0 | 0 | $ap = $self->{'ap'}->{'ca'} // Lingua::EN::AddressParse->new(country => 'CA', auto_clean => 1, force_case => 1, force_post_code => 0); | ||||
| 244 | 0 | 0 | $self->{'ap'}->{'ca'} = $ap; | ||||
| 245 | } elsif($location =~ /Australia$/) { | ||||||
| 246 | # TODO: no Australian addresses yet | ||||||
| 247 | 0 | 0 | return; | ||||
| 248 | 0 | 0 | $ap = $self->{'ap'}->{'au'} // Lingua::EN::AddressParse->new(country => 'AU', auto_clean => 1, force_case => 1, force_post_code => 0); | ||||
| 249 | 0 | 0 | $self->{'ap'}->{'au'} = $ap; | ||||
| 250 | } | ||||||
| 251 | 15 | 23 | if($ap) { | ||||
| 252 | # ::diag(__PACKAGE__, ': ', __LINE__, ': ', $location); | ||||||
| 253 | |||||||
| 254 | 14 | 15 | my $l = $location; | ||||
| 255 | 14 | 85 | if($l =~ /(.+), (England|UK)$/i) { | ||||
| 256 | 11 | 22 | $l = "$1, GB"; | ||||
| 257 | } | ||||||
| 258 | # if(my $error = $ap->parse($l)) { | ||||||
| 259 | # Carp::croak($ap->report()); | ||||||
| 260 | # ::diag('Address parse failed: ', $ap->report()); | ||||||
| 261 | # } else { | ||||||
| 262 | 14 | 31 | if($ap->parse($l) == 0) { | ||||
| 263 | # ::diag(__PACKAGE__, ': ', __LINE__, ': ', $location); | ||||||
| 264 | 5 | 127785 | my %c = $ap->components(); | ||||
| 265 | # ::diag(Data::Dumper->new([\%c])->Dump()); | ||||||
| 266 | 5 | 826 | my %addr = ('location' => $l); | ||||
| 267 | 5 | 9 | my $street = $c{'street_name'}; | ||||
| 268 | 5 | 7 | if(my $type = $c{'street_type'}) { | ||||
| 269 | 5 | 14 | if(my $a = Geo::Coder::Free::_abbreviate($type)) { | ||||
| 270 | 5 | 6 | $street .= " $a"; | ||||
| 271 | } else { | ||||||
| 272 | 0 | 0 | $street .= " $type"; | ||||
| 273 | } | ||||||
| 274 | 5 | 9 | if(my $suffix = $c{'street_direction_suffix'}) { | ||||
| 275 | 0 | 0 | $street .= " $suffix"; | ||||
| 276 | } | ||||||
| 277 | 5 | 6 | $street =~ s/^0+//; # Turn 04th St into 4th St | ||||
| 278 | 5 | 8 | $addr{'road'} = $street; | ||||
| 279 | } | ||||||
| 280 | 5 | 6 | if(length($c{'subcountry'}) == 2) { | ||||
| 281 | 0 | 0 | $addr{'state'} = $c{'subcountry'}; | ||||
| 282 | } else { | ||||||
| 283 | 5 | 18 | if($c{'country'} =~ /Canada/i) { | ||||
| 284 | 0 | 0 | $addr{'country'} = 'CA'; | ||||
| 285 | 0 | 0 | if(my $twoletterstate = Locale::CA->new()->{province2code}{uc($c{'subcountry'})}) { | ||||
| 286 | 0 | 0 | $addr{'state'} = $twoletterstate; | ||||
| 287 | } | ||||||
| 288 | } elsif($c{'country'} =~ /^(United States|USA|US)$/i) { | ||||||
| 289 | 0 | 0 | $addr{'country'} = 'US'; | ||||
| 290 | 0 | 0 | if(my $twoletterstate = Locale::US->new()->{state2code}{uc($c{'subcountry'})}) { | ||||
| 291 | 0 | 0 | $addr{'state'} = $twoletterstate; | ||||
| 292 | } | ||||||
| 293 | } elsif($c{'country'}) { | ||||||
| 294 | 5 | 7 | $addr{'country'} = $c{'country'}; | ||||
| 295 | 5 | 5 | if($c{'subcountry'}) { | ||||
| 296 | 5 | 7 | $addr{'state'} = $c{'subcountry'}; | ||||
| 297 | } | ||||||
| 298 | } | ||||||
| 299 | } | ||||||
| 300 | 5 | 6 | $addr{'number'} = $c{'property_identifier'}; | ||||
| 301 | 5 | 6 | $addr{'city'} = $c{'suburb'}; | ||||
| 302 | # ::diag(Data::Dumper->new([\%addr])->Dump()); | ||||||
| 303 | # print Data::Dumper->new([\%addr])->Dump(), "\n"; | ||||||
| 304 | 5 | 12 | if(my $rc = $self->_search(\%addr, ('number', 'road', 'city', 'state', 'country'))) { | ||||
| 305 | # Store the result in the cache for future requests | ||||||
| 306 | 1 | 38 | $self->{cache}{$lc} = $rc; | ||||
| 307 | |||||||
| 308 | 1 | 4 | return $rc; | ||||
| 309 | } | ||||||
| 310 | 4 | 6 | if($addr{'number'}) { | ||||
| 311 | 1 | 2 | if(my $rc = $self->_search(\%addr, ('road', 'city', 'state', 'country'))) { | ||||
| 312 | # Store the result in the cache for future requests | ||||||
| 313 | 0 | 0 | $self->{cache}{$lc} = $rc; | ||||
| 314 | |||||||
| 315 | 0 | 0 | return $rc; | ||||
| 316 | } | ||||||
| 317 | } | ||||||
| 318 | |||||||
| 319 | # Decide if it's worth continuing to search | ||||||
| 320 | 4 | 4 | my $found = 0; | ||||
| 321 | 4 | 6 | if(!defined($addr{'country'})) { | ||||
| 322 | 0 | 0 | if($l =~ /(United States|USA|US)$/i) { | ||||
| 323 | 0 | 0 | $addr{'country'} = 'US'; | ||||
| 324 | } else { | ||||||
| 325 | 0 | 0 | die "TODO: extract country from $l"; | ||||
| 326 | } | ||||||
| 327 | } | ||||||
| 328 | 4 4 | 4 3 | foreach my $row(@{$self->{'data'}}) { | ||||
| 329 | 8 | 22 | if((uc($row->{'state'}) eq uc($addr{'state'})) && | ||||
| 330 | (uc($row->{'country'}) eq uc($addr{'country'}))) { | ||||||
| 331 | 4 | 3 | $found = 1; | ||||
| 332 | 4 | 3 | last; | ||||
| 333 | } | ||||||
| 334 | } | ||||||
| 335 | 4 | 13 | if(!$found) { | ||||
| 336 | # Nothing at all in this state/country, | ||||||
| 337 | # so let's give up looking | ||||||
| 338 | 0 | 0 | return; | ||||
| 339 | } | ||||||
| 340 | } | ||||||
| 341 | } | ||||||
| 342 | |||||||
| 343 | 14 | 41212 | if($location =~ /^(.+?)[,\s]+(United States|USA|USA?)$/i) { | ||||
| 344 | # Try Geo::StreetAddress::US, which is rather buggy | ||||||
| 345 | |||||||
| 346 | 2 | 2 | my $l = $1; | ||||
| 347 | 2 | 3 | $l =~ tr/,/ /; | ||||
| 348 | 2 | 7 | $l =~ s/\s\s+/ /g; | ||||
| 349 | |||||||
| 350 | # ::diag(__PACKAGE__, ': ', __LINE__, ": $location ($l)"); | ||||||
| 351 | |||||||
| 352 | # Work around for RT#122617 | ||||||
| 353 | 2 | 15 | if(($location !~ /\sCounty,/i) && (my $href = (Geo::StreetAddress::US->parse_location($l) || Geo::StreetAddress::US->parse_address($l)))) { | ||||
| 354 | # ::diag(Data::Dumper->new([$href])->Dump()); | ||||||
| 355 | 2 | 261 | if(my $state = $href->{'state'}) { | ||||
| 356 | 2 | 3 | if(length($state) > 2) { | ||||
| 357 | 0 | 0 | if(my $twoletterstate = Locale::US->new()->{state2code}{uc($state)}) { | ||||
| 358 | 0 | 0 | $state = $twoletterstate; | ||||
| 359 | } | ||||||
| 360 | } | ||||||
| 361 | 2 | 2 | my $city; | ||||
| 362 | 2 | 4 | if($href->{city}) { | ||||
| 363 | 2 | 6 | $city = uc($href->{city}); | ||||
| 364 | } | ||||||
| 365 | 2 | 3 | if(my $street = $href->{street}) { | ||||
| 366 | 2 | 6 | if($href->{'type'} && (my $type = Geo::Coder::Free::_abbreviate($href->{'type'}))) { | ||||
| 367 | 1 | 1 | $street .= " $type"; | ||||
| 368 | } | ||||||
| 369 | 2 | 4 | if($href->{suffix}) { | ||||
| 370 | 1 | 2 | $street .= ' ' . $href->{suffix}; | ||||
| 371 | } | ||||||
| 372 | 2 | 3 | if(my $prefix = $href->{prefix}) { | ||||
| 373 | 1 | 1 | $street = "$prefix $street"; | ||||
| 374 | } | ||||||
| 375 | my %addr = ( | ||||||
| 376 | 2 | 6 | number => $href->{'number'}, | ||||
| 377 | road => $street, | ||||||
| 378 | city => $city, | ||||||
| 379 | state => $state, | ||||||
| 380 | country => 'US' | ||||||
| 381 | ); | ||||||
| 382 | 2 | 4 | if($href->{'number'}) { | ||||
| 383 | 0 | 0 | if(my $rc = $self->_search(\%addr, ('number', 'road', 'city', 'state', 'country'))) { | ||||
| 384 | 0 | 0 | $rc->{'country'} = 'US'; | ||||
| 385 | |||||||
| 386 | # Store the result in the cache for future requests | ||||||
| 387 | 0 | 0 | $self->{cache}{$lc} = $rc; | ||||
| 388 | |||||||
| 389 | 0 | 0 | return $rc; | ||||
| 390 | } | ||||||
| 391 | } | ||||||
| 392 | 2 | 6 | if(my $rc = $self->_search(\%addr, ('road', 'city', 'state', 'country'))) { | ||||
| 393 | 1 | 35 | $rc->{'country'} = 'US'; | ||||
| 394 | |||||||
| 395 | # Store the result in the cache for future requests | ||||||
| 396 | 1 | 2 | $self->{cache}{$lc} = $rc; | ||||
| 397 | |||||||
| 398 | 1 | 3 | return $rc; | ||||
| 399 | } | ||||||
| 400 | # ::diag(__PACKAGE__, ': ', __LINE__, ": $location"); | ||||||
| 401 | 1 | 4 | if($street && !$href->{'number'}) { | ||||
| 402 | # If you give a building with | ||||||
| 403 | # no street to G:S:US it puts | ||||||
| 404 | # the building name into the | ||||||
| 405 | # street field | ||||||
| 406 | 1 | 1 | $addr{'name'} = $street; | ||||
| 407 | 1 | 2 | delete $addr{'road'}; | ||||
| 408 | |||||||
| 409 | 1 | 1 | if(my $rc = $self->_search(\%addr, ('name', 'city', 'state', 'country'))) { | ||||
| 410 | 0 | 0 | $rc->{'country'} = 'US'; | ||||
| 411 | |||||||
| 412 | # Store the result in the cache for future requests | ||||||
| 413 | 0 | 0 | $self->{cache}{$lc} = $rc; | ||||
| 414 | |||||||
| 415 | 0 | 0 | return $rc; | ||||
| 416 | } | ||||||
| 417 | } | ||||||
| 418 | } | ||||||
| 419 | } | ||||||
| 420 | } | ||||||
| 421 | |||||||
| 422 | # Hack to find "name, street, town, state, US" | ||||||
| 423 | 1 | 4 | my @addr = split(/,\s*/, $location); | ||||
| 424 | # ::diag(__PACKAGE__, ': ', __LINE__, ' ', scalar(@addr)); | ||||||
| 425 | 1 | 3 | if(scalar(@addr) == 5) { | ||||
| 426 | # ::diag(__PACKAGE__, ': ', __LINE__, ": $location"); | ||||||
| 427 | # ::diag(Data::Dumper->new([\@addr])->Dump()); | ||||||
| 428 | 1 | 2 | my $state = $addr[3]; | ||||
| 429 | 1 | 2 | if(length($state) > 2) { | ||||
| 430 | 1 | 6 | if(my $twoletterstate = Locale::US->new()->{state2code}{uc($state)}) { | ||||
| 431 | 1 | 289 | $state = $twoletterstate; | ||||
| 432 | } | ||||||
| 433 | } | ||||||
| 434 | 1 | 10 | if(length($state) == 2) { | ||||
| 435 | 1 | 2 | my %addr = ( | ||||
| 436 | city => $addr[2], | ||||||
| 437 | state => $state, | ||||||
| 438 | country => 'US' | ||||||
| 439 | ); | ||||||
| 440 | # ::diag(__PACKAGE__, ': ', __LINE__); | ||||||
| 441 | 1 | 2 | if($addr[0] !~ /^\d/) { | ||||
| 442 | # ::diag(__PACKAGE__, ': ', __LINE__); | ||||||
| 443 | 1 | 1 | $addr{'name'} = $addr[0]; | ||||
| 444 | 1 | 2 | if($addr[1] =~ /^(\d+)\s+(.+)/) { | ||||
| 445 | # ::diag(__PACKAGE__, ': ', __LINE__); | ||||||
| 446 | 1 | 2 | $addr{'number'} = $1; | ||||
| 447 | 1 | 4 | $addr{'road'} = Geo::Coder::Free::_normalize($2); | ||||
| 448 | 1 | 4 | if(my $rc = $self->_search(\%addr, ('name', 'number', 'road', 'city', 'state', 'country'))) { | ||||
| 449 | # ::diag(Data::Dumper->new([$rc])->Dump()); | ||||||
| 450 | 1 | 39 | $rc->{'country'} = 'US'; | ||||
| 451 | |||||||
| 452 | # Store the result in the cache for future requests | ||||||
| 453 | 1 | 1 | $self->{cache}{$lc} = $rc; | ||||
| 454 | |||||||
| 455 | 1 | 7 | return $rc; | ||||
| 456 | } | ||||||
| 457 | } else { | ||||||
| 458 | 0 | 0 | $addr{'road'} = Geo::Coder::Free::_normalize($addr[1]); | ||||
| 459 | 0 | 0 | if(my $rc = $self->_search(\%addr, ('name', 'road', 'city', 'state', 'country'))) { | ||||
| 460 | # ::diag(Data::Dumper->new([$rc])->Dump()); | ||||||
| 461 | 0 | 0 | $rc->{'country'} = 'US'; | ||||
| 462 | |||||||
| 463 | # Store the result in the cache for future requests | ||||||
| 464 | 0 | 0 | $self->{cache}{$lc} = $rc; | ||||
| 465 | |||||||
| 466 | 0 | 0 | return $rc; | ||||
| 467 | } | ||||||
| 468 | } | ||||||
| 469 | } else { | ||||||
| 470 | 0 | 0 | $addr{'number'} = $addr[0]; | ||||
| 471 | 0 | 0 | $addr{'road'} = Geo::Coder::Free::_normalize($addr[1]); | ||||
| 472 | 0 | 0 | if(my $rc = $self->_search(\%addr, ('number', 'road', 'city', 'state', 'country'))) { | ||||
| 473 | # ::diag(Data::Dumper->new([$rc])->Dump()); | ||||||
| 474 | 0 | 0 | $rc->{'country'} = 'US'; | ||||
| 475 | |||||||
| 476 | # Store the result in the cache for future requests | ||||||
| 477 | 0 | 0 | $self->{cache}{$lc} = $rc; | ||||
| 478 | |||||||
| 479 | 0 | 0 | return $rc; | ||||
| 480 | } | ||||||
| 481 | } | ||||||
| 482 | } | ||||||
| 483 | } | ||||||
| 484 | } | ||||||
| 485 | |||||||
| 486 | 12 | 45 | if(($location =~ /.+,.+,.*England$/) && | ||||
| 487 | ($location !~ /.+,.+,.+,.*England$/)) { | ||||||
| 488 | # Simple "Town, County, England" | ||||||
| 489 | # If we're here, it's not going to be found because the | ||||||
| 490 | # above parsers will have worked | ||||||
| 491 | 1 | 4 | return; | ||||
| 492 | } | ||||||
| 493 | |||||||
| 494 | 11 | 266 | require Geo::Address::Parser && Geo::Address::Parser->import() unless Geo::Address::Parser->can('parse'); | ||||
| 495 | |||||||
| 496 | 11 | 1597 | my $addr_parser = Geo::Address::Parser->new(country => 'UK'); | ||||
| 497 | 11 | 37215 | if(my $fields = $addr_parser->parse($location)) { | ||||
| 498 | 11 11 | 3140 14 | for my $key (keys %{$fields}) { | ||||
| 499 | 36 | 35 | delete $fields->{$key} unless defined $fields->{$key}; | ||||
| 500 | } | ||||||
| 501 | 11 11 | 14 22 | if(my $rc = $self->_search($fields, keys %{$fields})) { | ||||
| 502 | 0 | 0 | $rc->{'country'} = 'UK'; | ||||
| 503 | |||||||
| 504 | # Store the result in the cache for future requests | ||||||
| 505 | 0 | 0 | $self->{cache}{$lc} = $rc; | ||||
| 506 | |||||||
| 507 | 0 | 0 | return $rc; | ||||
| 508 | } | ||||||
| 509 | } | ||||||
| 510 | |||||||
| 511 | # Finally try libpostal, | ||||||
| 512 | # which is good but uses a lot of memory and can take a very long time to parse data | ||||||
| 513 | 11 | 21 | if($libpostal_is_installed == LIBPOSTAL_UNKNOWN) { | ||||
| 514 | 1 1 | 1 32 | if(eval { require Geo::libpostal; } ) { | ||||
| 515 | 0 | 0 | Geo::libpostal->import(); | ||||
| 516 | 0 | 0 | $libpostal_is_installed = LIBPOSTAL_INSTALLED; | ||||
| 517 | } else { | ||||||
| 518 | 1 | 304 | $libpostal_is_installed = LIBPOSTAL_NOT_INSTALLED; | ||||
| 519 | } | ||||||
| 520 | } | ||||||
| 521 | |||||||
| 522 | # ::diag(__PACKAGE__, ': ', __LINE__, ": libpostal_is_installed = $libpostal_is_installed ($location)"); | ||||||
| 523 | # print(__PACKAGE__, ': ', __LINE__, ": libpostal_is_installed = $libpostal_is_installed ($location)\n"); | ||||||
| 524 | |||||||
| 525 | # TODO: cache calls to this | ||||||
| 526 | 11 | 22 | if(($libpostal_is_installed == LIBPOSTAL_INSTALLED) && (my %addr = Geo::libpostal::parse_address($location))) { | ||||
| 527 | 0 | 0 | if($addr{'house_number'} && !$addr{'number'}) { | ||||
| 528 | 0 | 0 | $addr{'number'} = delete $addr{'house_number'}; | ||||
| 529 | } | ||||||
| 530 | 0 | 0 | if($addr{'house'} && !$addr{'name'}) { | ||||
| 531 | 0 | 0 | $addr{'name'} = delete $addr{'house'}; | ||||
| 532 | } | ||||||
| 533 | 0 | 0 | $addr{'location'} = $location; | ||||
| 534 | 0 | 0 | if(my $street = $addr{'road'}) { | ||||
| 535 | 0 | 0 | $addr{'road'} = Geo::Coder::Free::_normalize($street); | ||||
| 536 | } | ||||||
| 537 | 0 | 0 | if(defined($addr{'state'}) && !defined($addr{'country'}) && ($addr{'state'} eq 'england')) { | ||||
| 538 | 0 | 0 | delete $addr{'state'}; | ||||
| 539 | 0 | 0 | $addr{'country'} = 'GB'; | ||||
| 540 | } | ||||||
| 541 | # ::diag(__PACKAGE__, ': ', __LINE__, ': ', Data::Dumper->new([\%addr])->Dump()); | ||||||
| 542 | 0 | 0 | if($addr{'country'} && ($addr{'state'} || $addr{'state_district'})) { | ||||
| 543 | 0 | 0 | if($addr{'country'} =~ /Canada/i) { | ||||
| 544 | 0 | 0 | $addr{'country'} = 'Canada'; | ||||
| 545 | 0 | 0 | if(length($addr{'state'}) > 2) { | ||||
| 546 | 0 | 0 | if(my $twoletterstate = Locale::CA->new()->{province2code}{uc($addr{'state'})}) { | ||||
| 547 | 0 | 0 | $addr{'state'} = $twoletterstate; | ||||
| 548 | } | ||||||
| 549 | } | ||||||
| 550 | } elsif($addr{'country'} =~ /^(United States|USA|US)$/i) { | ||||||
| 551 | 0 | 0 | $addr{'country'} = 'US'; | ||||
| 552 | 0 | 0 | if(length($addr{'state'}) > 2) { | ||||
| 553 | 0 | 0 | if(my $twoletterstate = Locale::US->new()->{state2code}{uc($addr{'state'})}) { | ||||
| 554 | 0 | 0 | $addr{'state'} = $twoletterstate; | ||||
| 555 | } | ||||||
| 556 | } | ||||||
| 557 | } | ||||||
| 558 | 0 | 0 | if($addr{'state_district'}) { | ||||
| 559 | 0 | 0 | $addr{'state_district'} =~ s/^(.+)\s+COUNTY/$1/i; | ||||
| 560 | 0 | 0 | if(my $rc = $self->_search(\%addr, ('number', 'road', 'city', 'state_district', 'state', 'country'))) { | ||||
| 561 | |||||||
| 562 | # Store the result in the cache for future requests | ||||||
| 563 | 0 | 0 | $self->{cache}{$lc} = $rc; | ||||
| 564 | |||||||
| 565 | 0 | 0 | return $rc; | ||||
| 566 | } | ||||||
| 567 | } | ||||||
| 568 | 0 | 0 | if(my $rc = $self->_search(\%addr, ('number', 'road', 'city', 'state', 'country'))) { | ||||
| 569 | # ::diag(__PACKAGE__, ': ', __LINE__, ': ', Data::Dumper->new([$rc])->Dump()); | ||||||
| 570 | |||||||
| 571 | # Store the result in the cache for future requests | ||||||
| 572 | 0 | 0 | $self->{cache}{$lc} = $rc; | ||||
| 573 | |||||||
| 574 | 0 | 0 | return $rc; | ||||
| 575 | } | ||||||
| 576 | 0 | 0 | if($addr{'number'}) { | ||||
| 577 | 0 | 0 | if(my $rc = $self->_search(\%addr, ('road', 'city', 'state', 'country'))) { | ||||
| 578 | |||||||
| 579 | # Store the result in the cache for future requests | ||||||
| 580 | 0 | 0 | $self->{cache}{$lc} = $rc; | ||||
| 581 | |||||||
| 582 | 0 | 0 | return $rc; | ||||
| 583 | } | ||||||
| 584 | } | ||||||
| 585 | } | ||||||
| 586 | } | ||||||
| 587 | 11 | 64 | if($location =~ /^(.+?),\s*([\s\w]+),\s*([\s\w]+),\s*([\w\s]+)$/) { | ||||
| 588 | # >= 5.14 could say: | ||||||
| 589 | # my %addr = ( | ||||||
| 590 | # road => $1, | ||||||
| 591 | # city => $2, | ||||||
| 592 | # state => $3 =~ s/\s+$//r, | ||||||
| 593 | # country => $4 =~ s/\s+$//r | ||||||
| 594 | # ); | ||||||
| 595 | 11 | 29 | my %addr = ( | ||||
| 596 | road => $1, | ||||||
| 597 | city => $2, | ||||||
| 598 | state => $3, | ||||||
| 599 | country => $4, | ||||||
| 600 | ); | ||||||
| 601 | 11 | 20 | $addr{'state'} =~ s/\s$//g; | ||||
| 602 | 11 | 16 | $addr{'country'} =~ s/\s$//g; | ||||
| 603 | 11 | 39 | if($addr{'road'} =~ /([\w\s]+),*\s+(.+)/) { | ||||
| 604 | 11 | 14 | $addr{'name'} = $1; | ||||
| 605 | 11 | 11 | $addr{'road'} = $2; | ||||
| 606 | } | ||||||
| 607 | 11 | 22 | if($addr{'road'} =~ /^(\d+)\s+(.+)/) { | ||||
| 608 | 2 | 3 | $addr{'number'} = $1; | ||||
| 609 | 2 | 2 | $addr{'road'} = $2; | ||||
| 610 | # ::diag(__LINE__, ': ', Data::Dumper->new([\%addr])->Dump()); | ||||||
| 611 | 2 | 3 | if(my $rc = $self->_search(\%addr, ('name', 'number', 'road', 'city', 'state', 'country'))) { | ||||
| 612 | |||||||
| 613 | # Store the result in the cache for future requests | ||||||
| 614 | 0 | 0 | $self->{cache}{$lc} = $rc; | ||||
| 615 | |||||||
| 616 | 0 | 0 | return $rc; | ||||
| 617 | } | ||||||
| 618 | } elsif(my $rc = $self->_search(\%addr, ('name', 'road', 'city', 'state', 'country'))) { | ||||||
| 619 | |||||||
| 620 | # Store the result in the cache for future requests | ||||||
| 621 | 1 | 31 | $self->{cache}{$lc} = $rc; | ||||
| 622 | |||||||
| 623 | 1 | 3 | return $rc; | ||||
| 624 | } | ||||||
| 625 | 10 | 32 | if($addr{'name'} && !defined($addr{'number'})) { | ||||
| 626 | # We know the name of the building but not the street number | ||||||
| 627 | # ::diag(__LINE__, ': ', $addr{'name'}); | ||||||
| 628 | 8 | 12 | if(my $rc = $self->_search(\%addr, ('name', 'road', 'city', 'state', 'country'))) { | ||||
| 629 | # ::diag(__PACKAGE__, ': ', __LINE__); | ||||||
| 630 | |||||||
| 631 | # Store the result in the cache for future requests | ||||||
| 632 | 0 | 0 | $self->{cache}{$lc} = $rc; | ||||
| 633 | |||||||
| 634 | 0 | 0 | return $rc; | ||||
| 635 | } | ||||||
| 636 | } | ||||||
| 637 | } | ||||||
| 638 | |||||||
| 639 | 10 | 19 | $location = uc($location); | ||||
| 640 | 10 | 18 | foreach my $left(keys %alternatives) { | ||||
| 641 | # ::diag("$location/$left"); | ||||||
| 642 | 36 | 153 | if($location =~ $left) { | ||||
| 643 | # ::diag($left, '=>', $alternatives{$left}); | ||||||
| 644 | 5 | 23 | $location =~ s/$left/$alternatives{$left}/; | ||||
| 645 | 5 | 6 | $params{'location'} = $location; | ||||
| 646 | # ::diag(__LINE__, ": found alternative '$location'"); | ||||||
| 647 | 5 | 20 | if(my $rc = $self->geocode(\%params)) { | ||||
| 648 | # ::diag(__LINE__, ": $location"); | ||||||
| 649 | |||||||
| 650 | # Store the result in the cache for future requests | ||||||
| 651 | 1 | 3 | $self->{cache}{$lc} = $rc; | ||||
| 652 | |||||||
| 653 | 1 | 6 | return $rc; | ||||
| 654 | } | ||||||
| 655 | 4 | 43 | if($location =~ /(.+), (England|UK)$/i) { | ||||
| 656 | 4 | 8 | $params{'location'} = "$1, GB"; | ||||
| 657 | 4 | 6 | if(my $rc = $self->geocode(\%params)) { | ||||
| 658 | # ::diag(__LINE__, ": $location"); | ||||||
| 659 | |||||||
| 660 | # Store the result in the cache for future requests | ||||||
| 661 | 3 | 15 | $self->{cache}{$lc} = $rc; | ||||
| 662 | |||||||
| 663 | 3 | 9 | return $rc; | ||||
| 664 | } | ||||||
| 665 | } | ||||||
| 666 | } | ||||||
| 667 | } | ||||||
| 668 | 6 | 19 | return; | ||||
| 669 | } | ||||||
| 670 | |||||||
| 671 | # Match parsed address components against the locally loaded dataset. | ||||||
| 672 | |||||||
| 673 | # $data is a hashref to data such as returned by Geo::libpostal::parse_address | ||||||
| 674 | # @columns is the key names to use in $data | ||||||
| 675 | sub _search | ||||||
| 676 | { | ||||||
| 677 | 40 | 56 | my ($self, $data, @columns) = @_; | ||||
| 678 | |||||||
| 679 | # FIXME: linear search is slow | ||||||
| 680 | # ::diag(__LINE__, ': ', Data::Dumper->new([\@columns, $data])->Dump()); | ||||||
| 681 | # print Data::Dumper->new([\@columns, $data])->Dump(); | ||||||
| 682 | # my @call_details = caller(0); | ||||||
| 683 | # ::diag(__LINE__, ': called from ', $call_details[2]); | ||||||
| 684 | 40 40 | 27 39 | foreach my $row(@{$self->{'data'}}) { | ||||
| 685 | 3434 | 1551 | my $match = 1; | ||||
| 686 | 3434 | 1533 | my $number_of_columns_matched; | ||||
| 687 | |||||||
| 688 | # ::diag(Data::Dumper->new([$self->{data}])->Dump()); | ||||||
| 689 | # print Data::Dumper->new([$self->{data}])->Dump(); | ||||||
| 690 | |||||||
| 691 | 3434 | 1611 | foreach my $column(@columns) { | ||||
| 692 | 3486 | 1971 | if(defined($data->{$column})) { | ||||
| 693 | 3486 | 2481 | if(!defined($row->{$column})) { | ||||
| 694 | 1195 | 544 | $match = 0; | ||||
| 695 | 1195 | 569 | last; | ||||
| 696 | } | ||||||
| 697 | # ::diag("$column: ", $row->{$column}, '/', $data->{$column}); | ||||||
| 698 | # print "$column: ", $row->{$column}, '/', $data->{$column}, "\n"; | ||||||
| 699 | 2291 | 1744 | if(uc($row->{$column}) ne uc($data->{$column})) { | ||||
| 700 | 2235 | 1031 | $match = 0; | ||||
| 701 | 2235 | 1033 | last; | ||||
| 702 | } | ||||||
| 703 | 56 | 35 | $number_of_columns_matched++; | ||||
| 704 | } elsif(exists $data->{$column}) { | ||||||
| 705 | 0 | 0 | delete $data->{$column}; | ||||
| 706 | } | ||||||
| 707 | } | ||||||
| 708 | # ::diag("match: $match"); | ||||||
| 709 | 3434 | 2330 | if($match && ($number_of_columns_matched >= 3)) { | ||||
| 710 | 4 | 16 | my $confidence; | ||||
| 711 | 4 | 7 | if($number_of_columns_matched == scalar(@columns)) { | ||||
| 712 | 4 | 5 | $confidence = 1.0; | ||||
| 713 | } elsif($number_of_columns_matched >= 4) { | ||||||
| 714 | 0 | 0 | $confidence = 0.7; | ||||
| 715 | } else { | ||||||
| 716 | 0 | 0 | $confidence = 0.5; | ||||
| 717 | } | ||||||
| 718 | # ::diag("$number_of_columns_matched -> $confidence"); | ||||||
| 719 | return Geo::Location::Point->new( | ||||||
| 720 | # 'latitude' => $row->{'latitude'}, | ||||||
| 721 | # 'longitude' => $row->{'longitude'}, | ||||||
| 722 | 'location' => $data->{'location'}, | ||||||
| 723 | 'confidence' => $confidence, | ||||||
| 724 | 'database' => __PACKAGE__, | ||||||
| 725 | 4 4 | 6 25 | %{$row} | ||||
| 726 | ); | ||||||
| 727 | } | ||||||
| 728 | } | ||||||
| 729 | 36 | 67 | return; | ||||
| 730 | } | ||||||
| 731 | |||||||
| 732 - 736 | =head2 reverse_geocode $location = $geocoder->reverse_geocode(latlng => '37.778907,-122.39732'); =cut | ||||||
| 737 | |||||||
| 738 | sub reverse_geocode { | ||||||
| 739 | 37 | 1 | 9149 | my $self = shift; | |||
| 740 | 37 | 16 | my %params; | ||||
| 741 | |||||||
| 742 | # Try hard to support whatever API that the user wants to use | ||||||
| 743 | 37 | 93 | if(!ref($self)) { | ||||
| 744 | 0 | 0 | if(scalar(@_)) { | ||||
| 745 | 0 | 0 | return(__PACKAGE__->new()->reverse_geocode(@_)); | ||||
| 746 | } elsif(!defined($self)) { | ||||||
| 747 | # Geo::Coder::Free->reverse_geocode() | ||||||
| 748 | 0 | 0 | Carp::croak('Usage: ', __PACKAGE__, '::reverse_geocode(latlng => "$lat,$long")'); | ||||
| 749 | } elsif($self eq __PACKAGE__) { | ||||||
| 750 | 0 | 0 | Carp::croak("Usage: $self", '::reverse_geocode(latlng => "$lat,$long")'); | ||||
| 751 | } | ||||||
| 752 | 0 | 0 | return(__PACKAGE__->new()->reverse_geocode($self)); | ||||
| 753 | } elsif(ref($self) eq 'HASH') { | ||||||
| 754 | 0 | 0 | return(__PACKAGE__->new()->reverse_geocode($self)); | ||||
| 755 | } elsif(ref($_[0]) eq 'HASH') { | ||||||
| 756 | 14 14 | 8 18 | %params = %{$_[0]}; | ||||
| 757 | # } elsif(ref($_[0]) && (ref($_[0] !~ /::/))) { | ||||||
| 758 | } elsif(ref($_[0])) { | ||||||
| 759 | 0 | 0 | Carp::croak('Usage: ', __PACKAGE__, '::reverse_geocode(latlng => "$lat,$long")'); | ||||
| 760 | } elsif(scalar(@_) && (scalar(@_) % 2 == 0)) { | ||||||
| 761 | 14 | 21 | %params = @_; | ||||
| 762 | } else { | ||||||
| 763 | 9 | 9 | $params{'latlng'} = shift; | ||||
| 764 | } | ||||||
| 765 | |||||||
| 766 | 37 | 27 | my $latlng = $params{'latlng'}; | ||||
| 767 | |||||||
| 768 | 37 | 26 | my $latitude; | ||||
| 769 | my $longitude; | ||||||
| 770 | |||||||
| 771 | 37 | 31 | if($latlng) { | ||||
| 772 | 22 | 25 | ($latitude, $longitude) = split(/,/, $latlng); | ||||
| 773 | } else { | ||||||
| 774 | 15 | 30 | $latitude //= $params{'lat'}; | ||||
| 775 | 15 | 29 | $longitude //= $params{'lon'}; | ||||
| 776 | 15 | 24 | $longitude //= $params{'long'}; | ||||
| 777 | } | ||||||
| 778 | |||||||
| 779 | 37 | 57 | if((!defined($latitude)) || !defined($longitude)) { | ||||
| 780 | 1 | 2 | Carp::croak('Usage: ', __PACKAGE__, '::reverse_geocode(latlng => "$lat,$long")'); | ||||
| 781 | } | ||||||
| 782 | |||||||
| 783 | # ::diag(__LINE__, ": $latitude,$longitude"); | ||||||
| 784 | 36 | 18 | my @rc; | ||||
| 785 | 36 36 | 19 29 | foreach my $row(@{$self->{'data'}}) { | ||||
| 786 | 3310 | 3415 | if(defined($row->{'latitude'}) && defined($row->{'longitude'})) { | ||||
| 787 | # ::diag(__LINE__, ': ', $row->{'latitude'}, ', ', $latitude); | ||||||
| 788 | 3310 | 1889 | if(_equal($row->{'latitude'}, $latitude, 4) && | ||||
| 789 | _equal($row->{'longitude'}, $longitude, 4)) { | ||||||
| 790 | # ::diag('match'); | ||||||
| 791 | 71 | 72 | my $location = uc($row->as_string()); # Geo::Location::Point object | ||||
| 792 | 71 | 176 | if(wantarray) { | ||||
| 793 | 70 | 45 | push @rc, $location; | ||||
| 794 | 70 | 68 | while(my($left, $right) = each %alternatives) { | ||||
| 795 | # ::diag("$location/$left"); | ||||||
| 796 | 280 | 759 | if($location =~ $right) { | ||||
| 797 | # ::diag($right, '=>', $left); | ||||||
| 798 | 100 | 49 | my $l = $location; | ||||
| 799 | 100 | 129 | $l =~ s/$right/$left/; | ||||
| 800 | # ::diag(__LINE__, ": $location"); | ||||||
| 801 | 100 | 136 | push @rc, $l; | ||||
| 802 | # Don't add last here | ||||||
| 803 | } | ||||||
| 804 | } | ||||||
| 805 | } else { | ||||||
| 806 | 1 | 5 | return $location; | ||||
| 807 | } | ||||||
| 808 | } | ||||||
| 809 | } | ||||||
| 810 | } | ||||||
| 811 | 35 | 69 | return @rc; | ||||
| 812 | } | ||||||
| 813 | |||||||
| 814 | # https://www.oreilly.com/library/view/perl-cookbook/1565922433/ch02s03.html | ||||||
| 815 | # equal(NUM1, NUM2, ACCURACY) : returns true if NUM1 and NUM2 are | ||||||
| 816 | # equal to ACCURACY number of decimal places | ||||||
| 817 | sub _equal { | ||||||
| 818 | 3461 | 2091 | my ($A, $B, $dp) = @_; | ||||
| 819 | |||||||
| 820 | 3461 | 6300 | return sprintf("%.${dp}g", $A) eq sprintf("%.${dp}g", $B); | ||||
| 821 | } | ||||||
| 822 | |||||||
| 823 - 827 | =head2 ua Does nothing, here for compatibility with other geocoders =cut | ||||||
| 828 | |||||||
| 829 | 1 | sub ua { | |||||
| 830 | } | ||||||
| 831 | |||||||
| 832 | # find_geographic_centres($csv_data) | ||||||
| 833 | # | ||||||
| 834 | # Helper function that processes CSV geographic data to find centres of location clusters. | ||||||
| 835 | # Takes a string containing CSV data with headers and analyzes it to find groups of | ||||||
| 836 | # 3 or more locations in the same city/state/country combination. For each qualifying | ||||||
| 837 | # group, it calculates the geographic centre and prints the results. | ||||||
| 838 | # | ||||||
| 839 | # Parameters: | ||||||
| 840 | # $csv_data - String containing complete CSV data including header row | ||||||
| 841 | # | ||||||
| 842 | # Processing steps: | ||||||
| 843 | # 1. Parses CSV header to get field names | ||||||
| 844 | # 2. Parses each data row into location hash objects | ||||||
| 845 | # 3. Validates that coordinates are numeric | ||||||
| 846 | # 4. Groups locations by city/state/country key | ||||||
| 847 | # 5. For groups with 3+ locations, calculates and prints centre coordinates | ||||||
| 848 | # | ||||||
| 849 | sub _find_geographic_centres | ||||||
| 850 | { | ||||||
| 851 | 5 | 5 | my $csv_data = $_[0]; | ||||
| 852 | |||||||
| 853 | # Parse CSV data into an array of hashes | ||||||
| 854 | # my @lines = split /\n/, $csv_data; | ||||||
| 855 | 5 5 | 4 20 | my @lines = @{$csv_data}; | ||||
| 856 | |||||||
| 857 | 5 | 27 | return if(scalar(@lines) == 0); | ||||
| 858 | |||||||
| 859 | 2 | 2 | my $header = shift @lines; | ||||
| 860 | |||||||
| 861 | # Remove quotes from header and split | ||||||
| 862 | 2 | 23 | $header =~ s/"//g; | ||||
| 863 | 2 | 3 | chomp $header; | ||||
| 864 | 2 | 10 | my @fields = split /,/, $header; | ||||
| 865 | |||||||
| 866 | 2 | 2 | my @locations = (); | ||||
| 867 | |||||||
| 868 | # Parse each data line | ||||||
| 869 | 2 | 3 | foreach my $line (@lines) { | ||||
| 870 | 176 | 126 | next if $line =~ /^\s*$/; # Skip empty lines | ||||
| 871 | 176 | 106 | chomp $line; | ||||
| 872 | |||||||
| 873 | # Simple CSV parsing - handles quoted fields | ||||||
| 874 | 176 | 71 | my @values = (); | ||||
| 875 | 176 | 87 | my $current_field = ''; | ||||
| 876 | 176 | 77 | my $in_quotes = 0; | ||||
| 877 | |||||||
| 878 | 176 | 463 | for my $char (split //, $line) { | ||||
| 879 | 15084 | 10268 | if ($char eq '"') { | ||||
| 880 | 2020 | 1023 | $in_quotes = !$in_quotes; | ||||
| 881 | } elsif ($char eq ',' && !$in_quotes) { | ||||||
| 882 | 1410 | 714 | push @values, $current_field; | ||||
| 883 | 1410 | 710 | $current_field = ''; | ||||
| 884 | } else { | ||||||
| 885 | 11654 | 5692 | $current_field .= $char; | ||||
| 886 | } | ||||||
| 887 | } | ||||||
| 888 | 176 | 356 | push @values, $current_field; # Don't forget the last field | ||||
| 889 | |||||||
| 890 | # Create location hash | ||||||
| 891 | 176 | 67 | my %location = (); | ||||
| 892 | 176 | 125 | for my $i (0..$#fields) { | ||||
| 893 | 1584 | 1231 | $location{$fields[$i]} = $values[$i] || ''; | ||||
| 894 | } | ||||||
| 895 | |||||||
| 896 | # Only include locations with valid coordinates | ||||||
| 897 | 176 | 442 | if($location{latitude} && $location{longitude} && | ||||
| 898 | ($location{latitude} =~ /^-?\d+\.?\d*$/) && | ||||||
| 899 | ($location{longitude} =~ /^-?\d+\.?\d*$/)) { | ||||||
| 900 | 172 | 184 | push @locations, \%location; | ||||
| 901 | } | ||||||
| 902 | } | ||||||
| 903 | |||||||
| 904 | # Group locations by city, state, country | ||||||
| 905 | 2 | 2 | my %groups = (); | ||||
| 906 | |||||||
| 907 | 2 | 2 | foreach my $loc (@locations) { | ||||
| 908 | 172 | 107 | my $key = join('|', $loc->{city}, $loc->{state}, $loc->{country}); | ||||
| 909 | 172 172 | 69 137 | push @{$groups{$key}}, $loc; | ||||
| 910 | } | ||||||
| 911 | |||||||
| 912 | 2 | 2 | my $rc; | ||||
| 913 | |||||||
| 914 | # Process groups with 3 or more locations | ||||||
| 915 | 2 | 7 | foreach my $group_key (keys %groups) { | ||||
| 916 | 84 | 35 | my $locations_ref = $groups{$group_key}; | ||||
| 917 | |||||||
| 918 | 84 | 54 | if (@$locations_ref >= 3) { | ||||
| 919 | 12 | 14 | my ($city, $state, $country) = split /\|/, $group_key; | ||||
| 920 | |||||||
| 921 | # Calculate geographic centre | ||||||
| 922 | 12 | 8 | my ($centre_lat, $centre_lon) = _calculate_centre($locations_ref); | ||||
| 923 | |||||||
| 924 | # printf("Center of %d locations in %s, %s, %s: %.6f, %.6f\n", | ||||||
| 925 | # scalar(@$locations_ref), $city, $state, $country, | ||||||
| 926 | # $centre_lat, $centre_lon); | ||||||
| 927 | |||||||
| 928 | 12 12 | 6 26 | push @{$rc}, { | ||||
| 929 | 'city' => $city, | ||||||
| 930 | 'state' => $state, | ||||||
| 931 | 'country' => $country, | ||||||
| 932 | 'lat' => $centre_lat, | ||||||
| 933 | 'latitude' => $centre_lat, | ||||||
| 934 | 'longitude' => $centre_lon, | ||||||
| 935 | 'long' => $centre_lon, | ||||||
| 936 | 'lng' => $centre_lon | ||||||
| 937 | }; | ||||||
| 938 | } | ||||||
| 939 | } | ||||||
| 940 | |||||||
| 941 | 2 | 65 | return $rc; | ||||
| 942 | } | ||||||
| 943 | |||||||
| 944 | # _calculate_centre($locations_ref) | ||||||
| 945 | # | ||||||
| 946 | # Helper funcation that calculates the geographic centre (centroid) of a group of locations using | ||||||
| 947 | # the arithmetic mean method to 6 decimal places. This works well for small geographic areas but | ||||||
| 948 | # may be less accurate for locations spread over large distances due to | ||||||
| 949 | # Earth's curvature. | ||||||
| 950 | # | ||||||
| 951 | # Parameters: | ||||||
| 952 | # $locations_ref - Reference to array of location hash objects, each containing | ||||||
| 953 | # latitude and longitude fields | ||||||
| 954 | # | ||||||
| 955 | # Returns: | ||||||
| 956 | # ($centre_lat, $centre_lon) - Two-element list containing the calculated | ||||||
| 957 | # centre coordinates as decimal degrees | ||||||
| 958 | # | ||||||
| 959 | # Algorithm: | ||||||
| 960 | # - Sums all latitude values and divides by count | ||||||
| 961 | # - Sums all longitude values and divides by count | ||||||
| 962 | # - Returns the arithmetic mean of both coordinates | ||||||
| 963 | sub _calculate_centre | ||||||
| 964 | { | ||||||
| 965 | 12 | 7 | my $locations_ref = $_[0]; | ||||
| 966 | |||||||
| 967 | 12 | 7 | my $total_lat = 0; | ||||
| 968 | 12 | 5 | my $total_lon = 0; | ||||
| 969 | 12 | 5 | my $count = 0; | ||||
| 970 | |||||||
| 971 | 12 | 9 | foreach my $loc (@$locations_ref) { | ||||
| 972 | 90 | 78 | $total_lat += $loc->{latitude}; | ||||
| 973 | 90 | 64 | $total_lon += $loc->{longitude}; | ||||
| 974 | 90 | 41 | $count++; | ||||
| 975 | } | ||||||
| 976 | |||||||
| 977 | # Round to 6 decimal places | ||||||
| 978 | 12 | 28 | my $centre_lat = sprintf('%.6f', $total_lat / $count); | ||||
| 979 | 12 | 13 | my $centre_lon = sprintf('%.6f', $total_lon / $count); | ||||
| 980 | |||||||
| 981 | 12 | 12 | return ($centre_lat, $centre_lon); | ||||
| 982 | } | ||||||
| 983 | |||||||
| 984 - 1006 | =head1 AUTHOR Nigel Horne <njh@bandsman.co.uk> This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =head1 BUGS The data are stored in the source, they should be read in from somewhere else to make it easier for non-authors to add data. =head1 SEE ALSO =head1 LICENSE AND COPYRIGHT Copyright 2020-2024 Nigel Horne. The program code is released under the following licence: GPL2 for personal use on a single computer. All other users (including Commercial, Charity, Educational, and Government) must apply in writing for a licence for use from Nigel Horne at `<njh at nigelhorne.com>`. =cut | ||||||
| 1007 | |||||||
| 1008 | 1; | ||||||
| 1009 | |||||||
| 1010 | # Ensure you use abbreviations, e.g., RD not ROAD | ||||||