| File: | blib/lib/Geo/Coder/Free/MaxMind.pm |
| Coverage: | 14.8% |
| line | stmt | bran | cond | sub | pod | time | code |
|---|---|---|---|---|---|---|---|
| 1 | package Geo::Coder::Free::MaxMind; | ||||||
| 2 | |||||||
| 3 | # sqlite3 cities.sql | ||||||
| 4 | # select * from cities where City like '%north shields%'; | ||||||
| 5 | # - note 'J5' | ||||||
| 6 | # grep 'GB.ENG.J5' admin2.db | ||||||
| 7 | |||||||
| 8 | # FIXME: If you search for something like "Sheppy, Kent, England" in list | ||||||
| 9 | # context, it returns them all. That's a lot! Should limit to, say | ||||||
| 10 | # 10 results (that number should be tuneable, and be a LIMIT in DB.pm) | ||||||
| 11 | # And as the correct spelling in Sheppey, arguably it should return nothing | ||||||
| 12 | |||||||
| 13 | 8 8 8 | 15 6 79 | use strict; | ||||
| 14 | 8 8 8 | 11 7 139 | use warnings; | ||||
| 15 | |||||||
| 16 | 8 8 8 | 731 15 35 | use Geo::Coder::Free::DB::MaxMind::admin1; | ||||
| 17 | 8 8 8 | 36212 14 29 | use Geo::Coder::Free::DB::MaxMind::admin2; | ||||
| 18 | 8 8 8 | 35482 10 27 | use Geo::Coder::Free::DB::MaxMind::cities; | ||||
| 19 | 8 8 8 | 29460 8 89 | use Geo::Location::Point; | ||||
| 20 | 8 8 8 | 1147 13111 90 | use Module::Info; | ||||
| 21 | 8 8 8 | 17 6 179 | use Carp; | ||||
| 22 | 8 8 8 | 13 8 51 | use File::Spec; | ||||
| 23 | 8 8 8 | 120 625 64 | use Locale::CA; | ||||
| 24 | 8 8 8 | 114 130 56 | use Locale::US; | ||||
| 25 | 8 8 8 | 1548 203714 115 | use CHI; | ||||
| 26 | 8 8 8 | 904 139130 501 | use Locale::Country; | ||||
| 27 | 8 8 8 | 26 7 13740 | use Scalar::Util; | ||||
| 28 | |||||||
| 29 | our %admin1cache; | ||||||
| 30 | our %admin2cache; # e.g. maps 'Kent' => 'P5' | ||||||
| 31 | |||||||
| 32 | sub _prepare($$); | ||||||
| 33 | |||||||
| 34 | # Some locations aren't found because of inconsistencies in the way things are stored - these are some values I know | ||||||
| 35 | # FIXME: Should be in a configuration file | ||||||
| 36 | my %known_locations = ( | ||||||
| 37 | 'Newport Pagnell, Buckinghamshire, England' => { | ||||||
| 38 | 'latitude' => 52.08675, | ||||||
| 39 | 'longitude' => -0.72270 | ||||||
| 40 | }, | ||||||
| 41 | ); | ||||||
| 42 | |||||||
| 43 - 51 | =head1 NAME Geo::Coder::Free::MaxMind - Provides a geocoding functionality using the MaxMind and GeoNames databases =head1 VERSION Version 0.41 =cut | ||||||
| 52 | |||||||
| 53 | our $VERSION = '0.41'; | ||||||
| 54 | |||||||
| 55 - 106 | =head1 SYNOPSIS use Geo::Coder::Free::MaxMind; my $geocoder = Geo::Coder::Free::MaxMind->new(); my $location = $geocoder->geocode(location => 'Ramsgate, Kent, UK'); =head1 DESCRIPTION Geo::Coder::Free::MaxMind provides an interface to free databases. Refer to the source URL for licencing information for these files: cities.csv is from L<https://www.maxmind.com/en/free-world-cities-database>; admin1.db is from L<http://download.geonames.org/export/dump/admin1CodesASCII.txt>; admin2.db is from L<http://download.geonames.org/export/dump/admin2Codes.txt>; See also L<http://download.geonames.org/export/dump/allCountries.zip> To significantly speed this up, gunzip cities.csv and run it through the db2sql script to create an SQLite file. =head1 METHODS =head2 new $geocoder = Geo::Coder::Free::MaxMind->new(); Takes one optional parameter, directory, which tells the library where to find the files admin1db, admin2.db and cities.[sql|csv.gz]. If that parameter isn't given, the module will attempt to find the databases, but that can't be guaranteed There are 3 levels to the Maxmind database. Here's the method to find the location of Sittingbourne, Kent, England: 1) admin1.db contains admin areas such as counties, states and provinces A typical line is: US.MD Maryland Maryland 4361885 So a look-up of 'Maryland' will get the concatenated code 'US.MD' Note that GB has England, Scotland and Wales at this level, not the counties GB.ENG England England 6269131 So a look-up of England will give the concatenated code of GB.ENG for use in admin2.db 2) admin2.db contains admin areas drilled down from the admin1 database such as US counties Note that GB has counties A typical line is: GB.ENG.G5 Kent Kent 3333158 So a look-up of 'Kent' with a concatenated code to start with 'GB.ENG' will code the region G5 for use in cities.sql 3) cities.sql contains the latitude and longitude of the place we want, so a search for 'sittingbourne' in the region 'g5' will give gb,sittingbourne,Sittingbourne,G5,41148,51.333333,.75 The admin2.db is far from comprehensive, see Makefile.PL for some entries that are added manually. =cut | ||||||
| 107 | |||||||
| 108 | sub new | ||||||
| 109 | { | ||||||
| 110 | 11 | 1 | 10 | my $class = shift; | |||
| 111 | |||||||
| 112 | # Handle hash or hashref arguments | ||||||
| 113 | 11 11 | 18 34 | my %args = (ref($_[0]) eq 'HASH') ? %{$_[0]} : @_; | ||||
| 114 | |||||||
| 115 | 11 | 33 | if(!defined($class)) { | ||||
| 116 | # Geo::Coder::Free::Local->new not Geo::Coder::Free::Local::new | ||||||
| 117 | # carp(__PACKAGE__, ' use ->new() not ::new() to instantiate'); | ||||||
| 118 | # return; | ||||||
| 119 | |||||||
| 120 | # FIXME: this only works when no arguments are given | ||||||
| 121 | 0 | 0 | $class = __PACKAGE__; | ||||
| 122 | } elsif(Scalar::Util::blessed($class)) { | ||||||
| 123 | # If $class is an object, clone it with new arguments | ||||||
| 124 | 0 0 | 0 0 | return bless { %{$class}, %args }, ref($class); | ||||
| 125 | } | ||||||
| 126 | |||||||
| 127 | 11 | 59 | my $directory = $args{'directory'} || Module::Info->new_from_loaded(__PACKAGE__)->file(); | ||||
| 128 | 11 | 561 | $directory =~ s/\.pm$//; | ||||
| 129 | |||||||
| 130 | 11 | 152 | if(!-d $directory) { | ||||
| 131 | 0 | 0 | Carp::croak(ref($class), ": directory $directory doesn't exist"); | ||||
| 132 | } | ||||||
| 133 | |||||||
| 134 | Database::Abstraction::init({ | ||||||
| 135 | cache_duration => '1 day', | ||||||
| 136 | %args, | ||||||
| 137 | directory => File::Spec->catfile($directory, 'databases'), | ||||||
| 138 | 11 | 99 | cache => $args{cache} || CHI->new(driver => 'Memory', global => 1) | ||||
| 139 | }); | ||||||
| 140 | |||||||
| 141 | # Return the blessed object | ||||||
| 142 | return bless { | ||||||
| 143 | 11 | 79218 | cache => $args{cache} || CHI->new(driver => 'Memory', global => 1), | ||||
| 144 | }, $class; | ||||||
| 145 | } | ||||||
| 146 | |||||||
| 147 - 163 | =head2 geocode
$location = $geocoder->geocode(location => $location);
print 'Latitude: ', $location->lat(), "\n";
print 'Longitude: ', $location->long(), "\n";
# TODO:
# @locations = $geocoder->geocode('Portland, USA');
# diag 'There are Portlands in ', join (', ', map { $_->{'state'} } @locations);
# This will return one place in New Brunwsick, not them all
# TODO: Arguably it should get them all from the database (or at least say the first 100) and return the central location
my @locations = $geocoder->geocode({ location => 'New Brunswick, Canada' });
die if(scalar(@locations) != 1);
=cut | ||||||
| 164 | |||||||
| 165 | sub geocode { | ||||||
| 166 | 1 | 1 | 1 | my $self = shift; | |||
| 167 | 1 | 1 | my %params; | ||||
| 168 | |||||||
| 169 | # Try hard to support whatever API that the user wants to use | ||||||
| 170 | 1 | 4 | if(!ref($self)) { | ||||
| 171 | 0 | 0 | if(scalar(@_)) { | ||||
| 172 | 0 | 0 | return(__PACKAGE__->new()->geocode(@_)); | ||||
| 173 | } elsif(!defined($self)) { | ||||||
| 174 | # Geo::Coder::Free->geocode() | ||||||
| 175 | 0 | 0 | Carp::croak('Usage: ', __PACKAGE__, '::geocode(location => $location|scantext => $text)'); | ||||
| 176 | } elsif($self eq __PACKAGE__) { | ||||||
| 177 | 0 | 0 | Carp::croak("Usage: $self", '::geocode(location => $location|scantext => $text)'); | ||||
| 178 | } | ||||||
| 179 | 0 | 0 | return(__PACKAGE__->new()->geocode($self)); | ||||
| 180 | } elsif(ref($self) eq 'HASH') { | ||||||
| 181 | 0 | 0 | return(__PACKAGE__->new()->geocode($self)); | ||||
| 182 | } elsif(ref($_[0]) eq 'HASH') { | ||||||
| 183 | 1 1 | 1 2 | %params = %{$_[0]}; | ||||
| 184 | # } elsif(ref($_[0]) && (ref($_[0] !~ /::/))) { | ||||||
| 185 | } elsif(ref($_[0])) { | ||||||
| 186 | 0 | 0 | Carp::croak('Usage: ', __PACKAGE__, '::geocode(location => $location|scantext => $text)'); | ||||
| 187 | } elsif(scalar(@_) && (scalar(@_) % 2 == 0)) { | ||||||
| 188 | 0 | 0 | %params = @_; | ||||
| 189 | } else { | ||||||
| 190 | 0 | 0 | $params{'location'} = shift; | ||||
| 191 | } | ||||||
| 192 | |||||||
| 193 | my $location = $params{location} | ||||||
| 194 | 1 | 2 | or Carp::croak('Usage: geocode(location => $location)'); | ||||
| 195 | |||||||
| 196 | # Fail when the input is just a set of numbers | ||||||
| 197 | 1 | 20 | if($location !~ /\D/) { | ||||
| 198 | 0 | 0 | Carp::croak('Usage: ', __PACKAGE__, ": invalid input to geocode(), $location"); | ||||
| 199 | 0 | 0 | return; | ||||
| 200 | } | ||||||
| 201 | |||||||
| 202 | 1 | 2 | if($location =~ /^(.+),\s*Washington\s*DC,(.+)$/) { | ||||
| 203 | 0 | 0 | $location = "$1, Washington, DC, $2"; | ||||
| 204 | } | ||||||
| 205 | |||||||
| 206 | 1 | 3 | if(my $rc = $known_locations{$location}) { | ||||
| 207 | # return $known_locations{$location}; | ||||||
| 208 | return Geo::Location::Point->new({ | ||||||
| 209 | 'lat' => $rc->{'latitude'}, | ||||||
| 210 | 'long' => $rc->{'longitude'}, | ||||||
| 211 | 'lon' => $rc->{'longitude'}, | ||||||
| 212 | 0 | 0 | 'lng' => $rc->{'longitude'}, | ||||
| 213 | 'location' => $location, | ||||||
| 214 | 'database' => 'MaxMind' | ||||||
| 215 | }); | ||||||
| 216 | } | ||||||
| 217 | |||||||
| 218 | # ::diag(__LINE__, ": $location"); | ||||||
| 219 | 1 | 5 | return unless(($location =~ /,/) || $params{'region'}); # Not well formed, or an attempt to find the location of an entire country | ||||
| 220 | |||||||
| 221 | # Check cache first | ||||||
| 222 | 0 | my $cached_result = $self->{'cache'}->get($location); | |||||
| 223 | 0 | return $cached_result if($cached_result); | |||||
| 224 | |||||||
| 225 | 0 | my $county; | |||||
| 226 | my $state; | ||||||
| 227 | 0 | my $country; | |||||
| 228 | 0 | my $country_code; | |||||
| 229 | 0 | my $concatenated_codes; | |||||
| 230 | 0 | my $region_only; | |||||
| 231 | |||||||
| 232 | 0 | if($location =~ /^([\w\s\-]+),([\w\s]+),([\w\s]+)?$/) { | |||||
| 233 | # Turn 'Ramsgate, Kent, UK' into 'Ramsgate' | ||||||
| 234 | 0 | $location = $1; | |||||
| 235 | 0 | $county = $2; | |||||
| 236 | 0 | $country = $3; | |||||
| 237 | 0 | $location =~ s/\-/ /g; | |||||
| 238 | 0 | $county =~ s/^\s//g; | |||||
| 239 | 0 | $county =~ s/\s$//g; | |||||
| 240 | 0 | $country =~ s/^\s//g; | |||||
| 241 | 0 | $country =~ s/\s$//g; | |||||
| 242 | 0 | if($location =~ /^St\.? (.+)/) { | |||||
| 243 | 0 | $location = "Saint $1"; | |||||
| 244 | } | ||||||
| 245 | 0 | if(($country =~ /^(Canada|United States|USA|US)$/)) { | |||||
| 246 | 0 | $state = $county; | |||||
| 247 | 0 | $county = undef; | |||||
| 248 | } | ||||||
| 249 | } elsif($location =~ /^([\w\s\-]+),([\w\s]+),([\w\s]+),\s*(Canada|United States|USA|US)?$/) { | ||||||
| 250 | 0 | $location = $1; | |||||
| 251 | 0 | $county = $2; | |||||
| 252 | 0 | $state = $3; | |||||
| 253 | 0 | $country = $4; | |||||
| 254 | 0 | $county =~ s/^\s//g; | |||||
| 255 | 0 | $county =~ s/\s$//g; | |||||
| 256 | 0 | $state =~ s/^\s//g; | |||||
| 257 | 0 | $state =~ s/\s$//g; | |||||
| 258 | # $country =~ s/^\s//g; | ||||||
| 259 | 0 | $country =~ s/\s$//g; | |||||
| 260 | } elsif($location =~ /^[\w\s-],[\w\s-]/) { | ||||||
| 261 | 0 | Carp::carp(__PACKAGE__, ": can't parse and handle $location"); | |||||
| 262 | 0 | return; | |||||
| 263 | } elsif(($location =~ /^[\w\s-]+$/) && (my $region = $params{'region'})) { | ||||||
| 264 | 0 | $location =~ s/^\s//g; | |||||
| 265 | 0 | $location =~ s/\s$//g; | |||||
| 266 | 0 | $country = uc($region); | |||||
| 267 | } elsif($location =~ /^([\w\s-]+),\s*(\w+)$/) { | ||||||
| 268 | # e.g. a county in the UK or a state in the US | ||||||
| 269 | 0 | $county = $1; | |||||
| 270 | 0 | $country = $2; | |||||
| 271 | 0 | $county =~ s/^\s//g; | |||||
| 272 | 0 | $county =~ s/\s$//g; | |||||
| 273 | # $country =~ s/^\s//g; | ||||||
| 274 | 0 | $country =~ s/\s$//g; | |||||
| 275 | # ::diag(__LINE__, "$county, $country"); | ||||||
| 276 | 0 | $region_only = 1; # Will only return one match, not every match in the region | |||||
| 277 | } else { | ||||||
| 278 | # Carp::croak(__PACKAGE__, ' only supports towns, not full addresses'); | ||||||
| 279 | 0 | return; | |||||
| 280 | } | ||||||
| 281 | 0 | my $countrycode; | |||||
| 282 | 0 | if($country) { | |||||
| 283 | 0 | if(defined($country) && (($country eq 'UK') || ($country eq 'United Kingdom') || ($country eq 'England'))) { | |||||
| 284 | 0 | $country = 'Great Britain'; | |||||
| 285 | 0 | $concatenated_codes = 'GB'; | |||||
| 286 | } | ||||||
| 287 | 0 | $countrycode = country2code($country); | |||||
| 288 | # ::diag(__LINE__, ": country $countrycode, county $county, state $state, location $location"); | ||||||
| 289 | # if($county && $countrycode) { | ||||||
| 290 | # ::diag(__LINE__, ": country $countrycode, county $county, location $location"); | ||||||
| 291 | # } | ||||||
| 292 | |||||||
| 293 | 0 | if($state && $admin1cache{$state}) { | |||||
| 294 | 0 | $concatenated_codes = $admin1cache{$state}; | |||||
| 295 | } elsif($admin1cache{$country} && !defined($state)) { | ||||||
| 296 | 0 | $concatenated_codes = $admin1cache{$country}; | |||||
| 297 | } else { | ||||||
| 298 | 0 | $self->{'admin1'} //= Geo::Coder::Free::DB::MaxMind::admin1->new(no_entry => 1) or die "Can't open the admin1 database"; | |||||
| 299 | |||||||
| 300 | 0 | if(my $admin1 = $self->{'admin1'}->fetchrow_hashref(asciiname => $country)) { | |||||
| 301 | 0 | $concatenated_codes = $admin1->{'concatenated_codes'}; | |||||
| 302 | 0 | $admin1cache{$country} = $concatenated_codes; | |||||
| 303 | } elsif($state) { | ||||||
| 304 | 0 | $concatenated_codes = uc($countrycode); | |||||
| 305 | 0 | if($state =~ /^[A-Z]{2}$/) { | |||||
| 306 | 0 | $concatenated_codes .= ".$state"; | |||||
| 307 | } else { | ||||||
| 308 | 0 | $country_code = $concatenated_codes; | |||||
| 309 | 0 0 | my @admin1s = @{$self->{'admin1'}->selectall_hashref(asciiname => $state)}; | |||||
| 310 | 0 | foreach my $admin1(@admin1s) { | |||||
| 311 | 0 | if($admin1->{'concatenated_codes'} =~ /^$concatenated_codes\./i) { | |||||
| 312 | 0 | $concatenated_codes = $admin1->{'concatenated_codes'}; | |||||
| 313 | 0 | last; | |||||
| 314 | } | ||||||
| 315 | } | ||||||
| 316 | } | ||||||
| 317 | 0 | $admin1cache{$state} = $concatenated_codes; | |||||
| 318 | } elsif($countrycode) { | ||||||
| 319 | 0 | $concatenated_codes = uc($countrycode); | |||||
| 320 | 0 | $admin1cache{$country} = $concatenated_codes; | |||||
| 321 | } elsif(Locale::Country::code2country($country)) { | ||||||
| 322 | 0 | $concatenated_codes = uc($country); | |||||
| 323 | 0 | $admin1cache{$country} = $concatenated_codes; | |||||
| 324 | } | ||||||
| 325 | } | ||||||
| 326 | } | ||||||
| 327 | 0 | return unless(defined($concatenated_codes)); | |||||
| 328 | # ::diag(__LINE__, ": $concatenated_codes"); | ||||||
| 329 | |||||||
| 330 | 0 | my @admin2s; | |||||
| 331 | my $region; | ||||||
| 332 | 0 | my @regions; | |||||
| 333 | # ::diag(__LINE__, ": $country"); | ||||||
| 334 | 0 | if($country =~ /^(United States|USA|US)$/) { | |||||
| 335 | 0 | if($county && (length($county) > 2)) { | |||||
| 336 | 0 | if(my $twoletterstate = Locale::US->new()->{state2code}{uc($county)}) { | |||||
| 337 | 0 | $county = $twoletterstate; | |||||
| 338 | } | ||||||
| 339 | # ::diag(__LINE__, ": $location, $county, $country"); | ||||||
| 340 | } | ||||||
| 341 | 0 | if($state && (length($state) > 2)) { | |||||
| 342 | 0 | if(my $twoletterstate = Locale::US->new()->{state2code}{uc($state)}) { | |||||
| 343 | 0 | $state = $twoletterstate; | |||||
| 344 | } | ||||||
| 345 | # ::diag(__LINE__, ": $location, $state, $country"); | ||||||
| 346 | } | ||||||
| 347 | } elsif(($country eq 'Canada') && $state && (length($state) > 2)) { | ||||||
| 348 | # ::diag(__LINE__, ": $state"); | ||||||
| 349 | 0 | if(Locale::CA->new()->{province2code}{uc($state)}) { | |||||
| 350 | # FIXME: I can't see that province locations are stored in cities.csv | ||||||
| 351 | 0 | return unless(defined($location)); # OK if searching for a city, that works | |||||
| 352 | } | ||||||
| 353 | } | ||||||
| 354 | |||||||
| 355 | 0 | $self->{'admin2'} //= Geo::Coder::Free::DB::MaxMind::admin2->new(no_entry => 1) or die "Can't open the admin2 database"; | |||||
| 356 | |||||||
| 357 | 0 | if(defined($county) && ($county =~ /^[A-Z]{2}$/) && ($country =~ /^(United States|USA|US)$/)) { | |||||
| 358 | # US state. Not Canadian province. | ||||||
| 359 | 0 | $region = $county; | |||||
| 360 | } elsif($county && $admin1cache{$county}) { | ||||||
| 361 | # ::diag(__LINE__); | ||||||
| 362 | 0 | $region = $admin1cache{$county}; | |||||
| 363 | } elsif($county && $admin2cache{$county}) { | ||||||
| 364 | 0 | $region = $admin2cache{$county}; | |||||
| 365 | # ::diag(__LINE__, ": $county"); | ||||||
| 366 | } elsif(defined($state) && $admin2cache{$state} && !defined($county)) { | ||||||
| 367 | # ::diag(__LINE__); | ||||||
| 368 | 0 | $region = $admin2cache{$state}; | |||||
| 369 | } else { | ||||||
| 370 | # ::diag(__PACKAGE__, ': ', __LINE__); | ||||||
| 371 | 0 | if(defined($county) && ($county eq 'London')) { | |||||
| 372 | 0 | @admin2s = $self->{'admin2'}->selectall_hash(asciiname => $location); | |||||
| 373 | } elsif(defined($county)) { | ||||||
| 374 | # ::diag(__PACKAGE__, ': ', __LINE__, ": $county"); | ||||||
| 375 | 0 | @admin2s = $self->{'admin2'}->selectall_hash(asciiname => $county); | |||||
| 376 | } | ||||||
| 377 | # ::diag(__LINE__, Data::Dumper->new([\@admin2s])->Dump()); | ||||||
| 378 | 0 | foreach my $admin2(@admin2s) { | |||||
| 379 | # ::diag(__LINE__, Data::Dumper->new([$admin2])->Dump()); | ||||||
| 380 | 0 | if($admin2->{'concatenated_codes'} =~ $concatenated_codes) { | |||||
| 381 | 0 | $region = $admin2->{'concatenated_codes'}; | |||||
| 382 | 0 | if($region =~ /^[A-Z]{2}\.([A-Z]{2})\./) { | |||||
| 383 | 0 | my $rc = $1; | |||||
| 384 | 0 | if(defined($state) && ($state =~ /^[A-Z]{2}$/)) { | |||||
| 385 | 0 | if($state eq $rc) { | |||||
| 386 | 0 | $region = $rc; | |||||
| 387 | 0 | @regions = (); | |||||
| 388 | 0 | last; | |||||
| 389 | } | ||||||
| 390 | } else { | ||||||
| 391 | 0 | push @regions, $region; | |||||
| 392 | 0 | push @regions, $rc; | |||||
| 393 | } | ||||||
| 394 | } else { | ||||||
| 395 | 0 | push @regions, $region; | |||||
| 396 | } | ||||||
| 397 | } | ||||||
| 398 | } | ||||||
| 399 | 0 | if($state && !defined($region)) { | |||||
| 400 | 0 | if($state =~ /^[A-Z]{2}$/) { | |||||
| 401 | 0 | $region = $state; | |||||
| 402 | 0 | @regions = (); | |||||
| 403 | } else { | ||||||
| 404 | 0 | @admin2s = $self->{'admin2'}->selectall_hash(asciiname => $state); | |||||
| 405 | 0 | foreach my $admin2(@admin2s) { | |||||
| 406 | 0 | if($admin2->{'concatenated_codes'} =~ $concatenated_codes) { | |||||
| 407 | 0 | $region = $admin2->{'concatenated_codes'}; | |||||
| 408 | 0 | last; | |||||
| 409 | } | ||||||
| 410 | } | ||||||
| 411 | } | ||||||
| 412 | } | ||||||
| 413 | } | ||||||
| 414 | |||||||
| 415 | 0 | if((scalar(@regions) == 0) && !defined($region)) { | |||||
| 416 | # e.g. Unitary authorities in the UK | ||||||
| 417 | # admin[12].db columns are labelled ['concatenated_codes', 'name', 'asciiname', 'geonameId'] | ||||||
| 418 | # ::diag(__PACKAGE__, ': ', __LINE__, ": $location"); | ||||||
| 419 | 0 | @admin2s = $self->{'admin2'}->selectall_hash(asciiname => $location); | |||||
| 420 | 0 | if((scalar(@admin2s) == 0) && ($country =~ /^(Canada|United States|USA|US)$/) && ($location !~ /\sCounty/i)) { | |||||
| 421 | 0 | $location .= ' County'; | |||||
| 422 | # ::diag(__PACKAGE__, ': ', __LINE__, ": $location"); | ||||||
| 423 | 0 | @admin2s = $self->{'admin2'}->selectall_hash(asciiname => $location); | |||||
| 424 | } | ||||||
| 425 | 0 | if(scalar(@admin2s) && defined($admin2s[0]->{'concatenated_codes'})) { | |||||
| 426 | 0 | foreach my $admin2(@admin2s) { | |||||
| 427 | 0 | my $concat = $admin2->{'concatenated_codes'}; | |||||
| 428 | 0 | if($concat =~ /^CA\.(\d\d)\./) { | |||||
| 429 | # Canadian provinces are not stored in the same way as US states | ||||||
| 430 | 0 | $region = $1; | |||||
| 431 | 0 | last; | |||||
| 432 | } elsif($concat =~ $concatenated_codes) { | ||||||
| 433 | 0 | $region = $concat; | |||||
| 434 | 0 | last; | |||||
| 435 | } | ||||||
| 436 | } | ||||||
| 437 | } elsif(defined($county)) { | ||||||
| 438 | # ::diag(__PACKAGE__, ': ', __LINE__, ": county $county"); | ||||||
| 439 | # e.g. states in the US | ||||||
| 440 | 0 | if(!defined($self->{'admin1'})) { | |||||
| 441 | 0 | $self->{'admin1'} = Geo::Coder::Free::DB::MaxMind::admin1->new(no_entry => 1) or die "Can't open the admin1 database"; | |||||
| 442 | } | ||||||
| 443 | 0 | my @admin1s = $self->{'admin1'}->selectall_hash(asciiname => $county); | |||||
| 444 | 0 | foreach my $admin1(@admin1s) { | |||||
| 445 | # ::diag(__LINE__, Data::Dumper->new([$admin1])->Dump()); | ||||||
| 446 | 0 | if($admin1->{'concatenated_codes'} =~ /^$concatenated_codes\./i) { | |||||
| 447 | 0 | $region = $admin1->{'concatenated_codes'}; | |||||
| 448 | 0 | if(scalar(@admin1s) == 1) { | |||||
| 449 | 0 | $admin1cache{$county} = $region; | |||||
| 450 | } | ||||||
| 451 | 0 | last; | |||||
| 452 | } | ||||||
| 453 | } | ||||||
| 454 | } | ||||||
| 455 | } | ||||||
| 456 | |||||||
| 457 | 0 | if(!defined($self->{'cities'})) { | |||||
| 458 | $self->{'cities'} = Geo::Coder::Free::DB::MaxMind::cities->new( | ||||||
| 459 | 0 | cache => $self->{cache} || CHI->new(driver => 'Memory', datastore => {}), | |||||
| 460 | no_entry => 1, | ||||||
| 461 | ); | ||||||
| 462 | } | ||||||
| 463 | |||||||
| 464 | 0 | my $options; | |||||
| 465 | 0 | if(defined($county) && ($county =~ /^[A-Z]{2}$/) && ($country =~ /^(United States|USA|US)$/)) { | |||||
| 466 | 0 | $options = { Country => 'us' }; | |||||
| 467 | } else { | ||||||
| 468 | 0 | if($region_only) { | |||||
| 469 | 0 | $options = {}; | |||||
| 470 | } else { | ||||||
| 471 | 0 | $options = { City => lc($location) }; | |||||
| 472 | 0 | $options->{'City'} =~ s/,\s*\w+$//; | |||||
| 473 | } | ||||||
| 474 | } | ||||||
| 475 | 0 | if($region) { | |||||
| 476 | 0 | if($region =~ /^.+\.(.+)$/) { | |||||
| 477 | 0 | $region = $1; | |||||
| 478 | } | ||||||
| 479 | 0 | $options->{'Region'} = $region; | |||||
| 480 | 0 | if($country_code) { | |||||
| 481 | 0 | $options->{'Country'} = lc($country_code); | |||||
| 482 | } | ||||||
| 483 | # If there's more than one match, don't cache as we don't | ||||||
| 484 | # know which one will be matched later | ||||||
| 485 | 0 | if(scalar(@admin2s) == 1) { | |||||
| 486 | 0 | if($state) { | |||||
| 487 | 0 | $admin2cache{$state} = $region; | |||||
| 488 | } elsif($county) { | ||||||
| 489 | 0 | $admin2cache{$county} = $region; | |||||
| 490 | } | ||||||
| 491 | } | ||||||
| 492 | } | ||||||
| 493 | |||||||
| 494 | 0 | my $confidence = 0.5; | |||||
| 495 | 0 | if(my $c = $params{'region'}) { | |||||
| 496 | 0 | $options->{'Country'} = lc($c); | |||||
| 497 | 0 | $confidence = 0.1; | |||||
| 498 | } elsif($countrycode) { | ||||||
| 499 | 0 | $options->{'Country'} = $countrycode; | |||||
| 500 | 0 | $confidence = 0.1; | |||||
| 501 | } | ||||||
| 502 | # ::diag(__PACKAGE__, ': ', __LINE__, ': ', Data::Dumper->new([$options])->Dump()); | ||||||
| 503 | # This case nonsense is because DBD::CSV changes the columns to lowercase, whereas DBD::SQLite does not | ||||||
| 504 | # if(wantarray && (!$options->{'City'}) && !$region_only) { | ||||||
| 505 | # if(0) { # We don't need to find all the cities in a state, which is what this would do | ||||||
| 506 | # # ::diag(__PACKAGE__, ': ', __LINE__); | ||||||
| 507 | # my @rc = $self->{'cities'}->selectall_hash($options); | ||||||
| 508 | # if(scalar(@rc) == 0) { | ||||||
| 509 | # if((!defined($region)) && !defined($params{'region'})) { | ||||||
| 510 | # # Add code for this area to Makefile.PL and rebuild | ||||||
| 511 | # Carp::carp(__PACKAGE__, ": didn't determine region from $location"); | ||||||
| 512 | # return; | ||||||
| 513 | # } | ||||||
| 514 | # # This would return all of the cities in the wrong region | ||||||
| 515 | # if($countrycode) { | ||||||
| 516 | # @rc = $self->{'cities'}->selectall_hash('Region' => ($region || $params{'region'}), 'Country' => $countrycode); | ||||||
| 517 | # if(scalar(@rc) == 0) { | ||||||
| 518 | # # ::diag(__PACKAGE__, ': ', __LINE__, ': no matches: ', Data::Dumper->new([$options])->Dump()); | ||||||
| 519 | # return; | ||||||
| 520 | # } | ||||||
| 521 | # } | ||||||
| 522 | # # ::diag(__LINE__, ': ', Data::Dumper->new([\@rc])->Dump()); | ||||||
| 523 | # } | ||||||
| 524 | # # ::diag(__LINE__, ': ', Data::Dumper->new([\@rc])->Dump()); | ||||||
| 525 | # foreach my $city(@rc) { | ||||||
| 526 | # if($city->{'Latitude'}) { | ||||||
| 527 | # $city->{'latitude'} = delete $city->{'Latitude'}; | ||||||
| 528 | # $city->{'longitude'} = delete $city->{'Longitude'}; | ||||||
| 529 | # } | ||||||
| 530 | # if($city->{'Country'}) { | ||||||
| 531 | # $city->{'country'} = uc(delete $city->{'Country'}); | ||||||
| 532 | # } | ||||||
| 533 | # if($city->{'Region'}) { | ||||||
| 534 | # $city->{'state'} = uc(delete $city->{'Region'}); | ||||||
| 535 | # } | ||||||
| 536 | # if($city->{'City'}) { | ||||||
| 537 | # $city->{'city'} = uc(delete $city->{'AccentCity'}); | ||||||
| 538 | # delete $city->{'City'}; | ||||||
| 539 | # # Less likely to get false positives with long words | ||||||
| 540 | # if(length($city->{'city'}) > 10) { | ||||||
| 541 | # if($confidence <= 0.8) { | ||||||
| 542 | # $confidence += 0.2; | ||||||
| 543 | # } else { | ||||||
| 544 | # $confidence = 1.0; | ||||||
| 545 | # } | ||||||
| 546 | # } | ||||||
| 547 | # } | ||||||
| 548 | # $city->{'confidence'} = $confidence; | ||||||
| 549 | # my $l = $options->{'City'}; | ||||||
| 550 | # if($options->{'Region'}) { | ||||||
| 551 | # $l .= ', ' . $options->{'Region'}; | ||||||
| 552 | # } | ||||||
| 553 | # if($options->{'Country'}) { | ||||||
| 554 | # $l .= ', ' . ucfirst($options->{'Country'}); | ||||||
| 555 | # } | ||||||
| 556 | # $city->{'location'} = $l; | ||||||
| 557 | # } | ||||||
| 558 | # # return @rc; | ||||||
| 559 | # my @locations; | ||||||
| 560 | # | ||||||
| 561 | # foreach my $l(@rc) { | ||||||
| 562 | # if(exists($l->{'latitude'})) { | ||||||
| 563 | # push @locations, Geo::Location::Point->new({ | ||||||
| 564 | # 'lat' => $l->{'latitude'}, | ||||||
| 565 | # 'long' => $l->{'longitude'}, | ||||||
| 566 | # 'lon' => $l->{'longitude'}, | ||||||
| 567 | # 'location' => $location, | ||||||
| 568 | # 'database' => 'MaxMind', | ||||||
| 569 | # 'maxmind' => $l, | ||||||
| 570 | # }); | ||||||
| 571 | # # } else { | ||||||
| 572 | # # Carp::carp(__PACKAGE__, ": $location has latitude of 0"); | ||||||
| 573 | # # return; | ||||||
| 574 | # } | ||||||
| 575 | # } | ||||||
| 576 | # | ||||||
| 577 | # return @locations; | ||||||
| 578 | # } | ||||||
| 579 | # ::diag(__PACKAGE__, ': ', __LINE__, ': ', Data::Dumper->new([$options])->Dump()); | ||||||
| 580 | 0 | my $city = $self->{'cities'}->fetchrow_hashref($options); | |||||
| 581 | 0 | if(!defined($city)) { | |||||
| 582 | # ::diag(__LINE__, ': ', scalar(@regions)); | ||||||
| 583 | 0 | foreach $region(@regions) { | |||||
| 584 | 0 | if($region =~ /^.+\.(.+)$/) { | |||||
| 585 | 0 | $region = $1; | |||||
| 586 | } | ||||||
| 587 | 0 | if($country =~ /^(United States|USA|US)$/) { | |||||
| 588 | 0 | next unless($region =~ /^[A-Z]{2}$/); # In the US, the regions are the states | |||||
| 589 | } | ||||||
| 590 | 0 | $options->{'Region'} = $region; | |||||
| 591 | 0 | $city = $self->{'cities'}->fetchrow_hashref($options); | |||||
| 592 | 0 | last if(defined($city)); | |||||
| 593 | } | ||||||
| 594 | } | ||||||
| 595 | |||||||
| 596 | # ::diag(__LINE__, ': ', Data::Dumper->new([$city])->Dump()); | ||||||
| 597 | 0 | if(defined($city) && defined($city->{'Latitude'})) { | |||||
| 598 | # Cache and return result | ||||||
| 599 | 0 | delete $city->{'Region'} if(defined($city->{'Region'}) && ($city->{'Region'} =~ /^[A-Z]\d$/)); # E.g. Region = G5 | |||||
| 600 | 0 | delete $city->{'Population'} if(defined($city->{'Population'}) && (length($city->{'Population'}) == 0)); | |||||
| 601 | my $rc = Geo::Location::Point->new({ | ||||||
| 602 | 0 0 | %{$city}, | |||||
| 603 | ('database' => 'MaxMind', 'confidence' => $confidence) | ||||||
| 604 | }); | ||||||
| 605 | 0 | $self->{cache}->set($location, $rc); | |||||
| 606 | 0 | return $rc; | |||||
| 607 | } | ||||||
| 608 | # return $city; | ||||||
| 609 | 0 | return; | |||||
| 610 | } | ||||||
| 611 | |||||||
| 612 - 618 | =head2 reverse_geocode $location = $geocoder->reverse_geocode(latlng => '37.778907,-122.39732'); Returns a string, or undef if it can't be found. =cut | ||||||
| 619 | |||||||
| 620 | sub reverse_geocode | ||||||
| 621 | { | ||||||
| 622 | 0 | 1 | my $self = shift; | ||||
| 623 | 0 | my %params; | |||||
| 624 | |||||||
| 625 | # Try hard to support whatever API that the user wants to use | ||||||
| 626 | 0 | if(!ref($self)) { | |||||
| 627 | 0 | if(scalar(@_)) { | |||||
| 628 | 0 | return(__PACKAGE__->new()->reverse_geocode(@_)); | |||||
| 629 | } elsif(!defined($self)) { | ||||||
| 630 | # Geo::Coder::Free->reverse_geocode() | ||||||
| 631 | 0 | Carp::croak('Usage: ', __PACKAGE__, '::reverse_geocode(latlng => "$lat,$long")'); | |||||
| 632 | } elsif($self eq __PACKAGE__) { | ||||||
| 633 | 0 | Carp::croak("Usage: $self", '::reverse_geocode(latlng => "$lat,$long")'); | |||||
| 634 | } | ||||||
| 635 | 0 | return(__PACKAGE__->new()->reverse_geocode($self)); | |||||
| 636 | } elsif(ref($self) eq 'HASH') { | ||||||
| 637 | 0 | return(__PACKAGE__->new()->reverse_geocode($self)); | |||||
| 638 | } elsif(ref($_[0]) eq 'HASH') { | ||||||
| 639 | 0 0 | %params = %{$_[0]}; | |||||
| 640 | # } elsif(ref($_[0]) && (ref($_[0] !~ /::/))) { | ||||||
| 641 | } elsif(ref($_[0])) { | ||||||
| 642 | 0 | Carp::croak('Usage: ', __PACKAGE__, '::reverse_geocode(latlng => "$lat,$long")'); | |||||
| 643 | } elsif(scalar(@_) && (scalar(@_) % 2 == 0)) { | ||||||
| 644 | 0 | %params = @_; | |||||
| 645 | } else { | ||||||
| 646 | 0 | $params{'latlng'} = shift; | |||||
| 647 | } | ||||||
| 648 | |||||||
| 649 | 0 | my $latlng = $params{'latlng'}; | |||||
| 650 | |||||||
| 651 | 0 | my $latitude; | |||||
| 652 | my $longitude; | ||||||
| 653 | |||||||
| 654 | 0 | if($latlng) { | |||||
| 655 | 0 | ($latitude, $longitude) = split(/,/, $latlng); | |||||
| 656 | } else { | ||||||
| 657 | 0 | $latitude //= $params{'lat'}; | |||||
| 658 | 0 | $longitude //= $params{'lon'}; | |||||
| 659 | 0 | $longitude //= $params{'long'}; | |||||
| 660 | } | ||||||
| 661 | |||||||
| 662 | 0 | if((!defined($latitude)) || !defined($longitude)) { | |||||
| 663 | 0 | Carp::croak('Usage: ', __PACKAGE__, '::reverse_geocode(latlng => "$lat,$long")'); | |||||
| 664 | } | ||||||
| 665 | |||||||
| 666 | 0 | if(!defined($self->{'cities'})) { | |||||
| 667 | $self->{'cities'} = Geo::Coder::Free::DB::MaxMind::cities->new( | ||||||
| 668 | 0 | cache => $self->{cache} || CHI->new(driver => 'Memory', datastore => {}), | |||||
| 669 | no_entry => 1, | ||||||
| 670 | ); | ||||||
| 671 | } | ||||||
| 672 | |||||||
| 673 | 0 | if(wantarray) { | |||||
| 674 | 0 | my @locs = $self->{'cities'}->execute("SELECT * FROM cities WHERE ((ABS(Latitude - $latitude)) < 0.01) AND ((ABS(Longitude - $longitude)) < 0.01)"); | |||||
| 675 | 0 | foreach my $loc(@locs) { | |||||
| 676 | 0 | $self->_prepare($loc); | |||||
| 677 | } | ||||||
| 678 | 0 0 | return map { Geo::Location::Point->new($_)->as_string() } @locs; | |||||
| 679 | } | ||||||
| 680 | # Try close in then zoom out, to a reasonable limit | ||||||
| 681 | 0 | foreach my $radius(0.000001, 0.00001, 0.0001, 0.001, 0.01) { | |||||
| 682 | 0 | if(my $rc = $self->{'cities'}->execute("SELECT * FROM cities WHERE ((ABS(Latitude - $latitude)) < $radius) AND ((ABS(Longitude - $longitude)) < $radius) LIMIT 1")) { | |||||
| 683 | 0 | $self->_prepare($rc); | |||||
| 684 | 0 | return Geo::Location::Point->new($rc)->as_string(); | |||||
| 685 | } | ||||||
| 686 | } | ||||||
| 687 | 0 | return; | |||||
| 688 | } | ||||||
| 689 | |||||||
| 690 | # Change 'Charing Cross, P5, Gb' to 'Charing Cross, London, Gb' | ||||||
| 691 | sub _prepare($$) { | ||||||
| 692 | 0 | my ($self, $loc) = @_; | |||||
| 693 | |||||||
| 694 | 0 | if(my $region = $loc->{'Region'}) { | |||||
| 695 | 0 | my $county; | |||||
| 696 | |||||||
| 697 | # Check if region is already cached in admin2cache | ||||||
| 698 | 0 | while(my ($key, $value) = each %admin2cache) { | |||||
| 699 | 0 | if($value eq $region) { | |||||
| 700 | 0 | $county = $key; | |||||
| 701 | 0 | last; | |||||
| 702 | } | ||||||
| 703 | } | ||||||
| 704 | 0 | if($county) { | |||||
| 705 | 0 | $loc->{'Region'} = $county; | |||||
| 706 | } else { | ||||||
| 707 | # Initialize admin2 object if not already initialized | ||||||
| 708 | 0 | $self->{'admin2'} //= Geo::Coder::Free::DB::MaxMind::admin2->new(no_entry => 1) or die "Can't open the admin2 database"; | |||||
| 709 | |||||||
| 710 | # Prepare and execute SQL query | ||||||
| 711 | |||||||
| 712 | 0 | my $row = $self->{'admin2'}->execute("SELECT name FROM admin2 WHERE concatenated_codes LIKE '" . uc($loc->{'Country'}) . '.%.' . uc($region) . "' LIMIT 1"); | |||||
| 713 | 0 | if(ref($row) && $row->{'name'}) { | |||||
| 714 | # Cache the result for future calls and update the location's region | ||||||
| 715 | 0 | $admin2cache{$row->{'name'}} = $region; | |||||
| 716 | 0 | $loc->{'Region'} = $row->{'name'}; | |||||
| 717 | } | ||||||
| 718 | } | ||||||
| 719 | } | ||||||
| 720 | } | ||||||
| 721 | |||||||
| 722 - 726 | =head2 ua Does nothing, here for compatibility with other geocoders =cut | ||||||
| 727 | |||||||
| 728 | 1 | sub ua { | |||||
| 729 | } | ||||||
| 730 | |||||||
| 731 - 778 | =head1 AUTHOR Nigel Horne, C<< <njh@bandsman.co.uk> >> This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =head1 BUGS Lots of lookups fail at the moment. The MaxMind data only contains cities. Can't parse and handle "London, England". The database contains Canadian cities, but not provinces, so a search for "New Brunswick, Canada" won't work The GeoNames admin databases are in this class, they should be in Geo::Coder::GeoNames. The data at L<https://github.com/apache/commons-csv/blob/master/src/test/resources/org/apache/commons/csv/perf/worldcitiespop.txt.gz?raw=true> are 7 years out of date, and are inconsistent with the Geonames database. If you search for something like "Sheppy, Kent, England" in list context, it returns them all. That's a lot! It should be limited to, say 10 results (that number should be tuneable, and be a LIMIT in DB.pm), and as the correct spelling in Sheppey, arguably it should return nothing. =head1 SEE ALSO VWF, MaxMind and geonames. =head1 LICENSE AND COPYRIGHT Copyright 2017-2025 Nigel Horne. The program code is released under the following licence: GPL for personal use on a single computer. All other users (including Commercial, Charity, Educational, Government) must apply in writing for a licence for use from Nigel Horne at `<njh at nigelhorne.com>`. This product includes GeoLite2 data created by MaxMind, available from L<https://www.maxmind.com/en/home>. (Note that this currently gives a 403 error - I need to find the latest URL). =cut | ||||||
| 779 | |||||||
| 780 | 1; | ||||||