File Coverage

File:	blib/lib/SEO/Inspector.pm
Coverage:	54.7%

line	stmt	bran	cond	sub	time	code
1						package SEO::Inspector;
2
3	6 6 6			6	347502 6 77	use strict;
4	6 6 6			6	7 3 119	use warnings;
5
6	6 6 6			6	27 13 124	use Carp;
7	6 6 6			6	1353 1027335 17	use Mojo::UserAgent;
8	6 6 6			6	146 2 9	use Mojo::URL;
9	6 6 6			6	1275 17762 17	use Module::Pluggable require => 1, search_path => 'SEO::Inspector::Plugin';
10	6 6 6			6	1503 152104 109	use Object::Configure 0.14;
11	6 6 6			6	22 24 94	use Params::Get 0.13;
12	6 6 6			6	10 26 11860	use Params::Validate::Strict 0.11;
13
14 - 22						=head1 NAME SEO::Inspector - Run SEO checks on HTML or URLs =head1 VERSION Version 0.02 =cut
23
24						our $VERSION = '0.02';
25
26 - 166						=head1 SYNOPSIS use SEO::Inspector; my $inspector = SEO::Inspector->new(url => 'https://example.com'); # Run plugins my $html = '<html><body>......</body></html>'; my $plugin_results = $inspector->check_html($html); # Run built-in checks my $builtin_results = $inspector->run_all($html); # Check a single URL and get all results my $all_results = $inspector->check_url('https://example.com'); =head1 DESCRIPTION SEO::Inspector provides: =over 4 =item * 14 built-in SEO checks =item * Plugin system: dynamically load modules under SEO::Inspector::Plugin namespace =item * Methods to check HTML strings or fetch and analyze a URL =back =head1 PLUGIN SYSTEM In addition to the built-in SEO checks, C<SEO::Inspector> supports a flexible plugin system. Plugins allow you to extend the checker with new rules or specialized analysis without modifying the core module. =head2 How Plugins Are Found Plugins are loaded dynamically from the C<SEO::Inspector::Plugin> namespace. For example, a module called: package SEO::Inspector::Plugin::MyCheck; will be detected and loaded automatically if it is available in C<@INC>. You can also tell the constructor to search additional directories by passing the C<plugin_dirs> argument: my $inspector = SEO::Inspector->new( plugin_dirs => ['t/lib', '/path/to/custom/plugins'], ); Each directory must contain files under a subpath corresponding to the namespace, for example: /path/to/custom/plugins/SEO/Inspector/Plugin/Foo.pm =head2 Plugin Interface A plugin must provide at least two methods: =over 4 =item * C<new> Constructor, called with no arguments. =item * C<run($html)> Given a string of raw HTML, return a hashref describing the result of the check. The hashref should have at least these keys: { name => 'My Check', status => 'ok' \| 'warn' \| 'error', notes => 'human-readable message', resolution => 'how to resolve' } =back =head2 Running Plugins You can run all loaded plugins against a piece of HTML with: my $results = $inspector->check_html($html); This returns a hashref keyed by plugin name (lowercased), each value being the hashref returned by the plugin's C<run> method. Plugins are also run automatically when you call C<check_url>: my $results = $inspector->check_url('https://example.com'); That result will include both built-in checks and plugin checks. =head2 Example Plugin Here is a minimal example plugin that checks whether the page contains the string "Hello": package SEO::Inspector::Plugin::HelloCheck; use strict; use warnings; sub new { bless {}, shift } sub run { my ($self, $html) = @_; if($html =~ /Hello/) { return { name => 'Hello Check', status => 'ok', notes => 'found Hello' }; } else { return { name => 'Hello Check', status => 'warn', notes => 'no Hello', resolution => 'add a hello field' }; } } 1; Place this file under C<lib/SEO/Inspector/Plugin/HelloCheck.pm> (or another directory listed in C<plugin_dirs>), and it will be discovered automatically. =head2 Naming Conventions The plugin key stored in C<< $inspector->{plugins} >> is derived from the final part of the package name, lowercased. For example: SEO::Inspector::Plugin::HelloCheck -> "hellocheck" This is the key you will see in the hashref returned by C<check_html> or C<check_url>. =head1 METHODS =head2 new(%args) Create a new inspector object. Accepts optional C<ua>, C<plugins>, C<url> and C<plugin_dirs> arguments. If C<plugin_dirs> isn't given, it tries hard to find the right place. =cut
167
168						sub new {
169	5			5	300903	my $class = shift;
170
171	5		100		12	my $params = Params::Validate::Strict::validate_strict({
172						args => Params::Get::get_params(undef, \@_) \|\| {},
173						schema => {
174						'ua' => {
175						'type' => 'object',
176						'can' => 'get',
177						'optional' => 1,
178						}, 'plugins' => {
179						'type' => 'hashref',
180						'optional' => 1,
181						}, 'url' => {
182						'type' => 'string',
183						'matches' => qr{
184						^
185						(?:https?://)? # Optional http or https scheme
186						(?:www\.)? # Optional www subdomain
187						[a-zA-Z0-9-]+ # Domain name part (alphanumeric and hyphens)
188						(?:\.[a-zA-Z0-9-]+)+ # Subdomains or TLDs (e.g., .com, .co.uk)
189						(?::\d{1,5})? # Optional port number
190						(?:/[^\s]*)? # Optional path (any non-whitespace characters)
191						$
192						}x,
193						'optional' => 1,
194						}, 'plugin_dirs' => {
195						'type' => 'arrayref',
196						'optional' => 1,
197						}
198						}
199						});
200
201	5				579	$params = Object::Configure::configure($class, $params);
202
203	5		33		109771	$params->{'ua'} \|\|= Mojo::UserAgent->new();
204	5		50		39	$params->{'plugins'} \|\|= {};
205
206	5 5				8 18	my $self = bless { %{$params} }, $class;
207
208	5				10	$self->load_plugins();
209
210	5				10	return $self;
211						}
212
213 - 217						=head2 load_plugins Loads plugins from the C<SEO::Inspector::Plugin> namespace. =cut
218
219						sub load_plugins
220						{
221	5			5	3	my $self = $_[0];
222
223	5				16	for my $plugin ($self->plugins()) {
224	13				4083	my $key = lc($plugin =~ s/.*:://r);
225	13				27	$self->{plugins}{$key} = $plugin->new();
226						}
227	5	100			10	if($self->{plugin_dirs}) {
228	2 2				1 3	for my $dir (@{$self->{plugin_dirs}}) {
229	2				5	local @INC = ($dir, @INC);
230
231	2				9	my $finder = Module::Pluggable::Object->new(
232						search_path => ['SEO::Inspector::Plugin'],
233						require => 1,
234						instantiate => 'new',
235						);
236
237	2				11	for my $plugin ($finder->plugins) {
238	6				25	my $key = lc(ref($plugin) =~ s/.*:://r);
239	6				26	$self->{plugins}{$key} = $plugin;
240						}
241						}
242						}
243						}
244
245						# Fetch HTML from URL or object default
246						sub _fetch_html {
247	0			0	0	my ($self, $url) = @_;
248
249	0		0		0	$url //= $self->{url};
250	0	0			0	croak 'URL missing' unless $url;
251
252	0				0	my $res = $self->{'ua'}->get($url)->result;
253	0	0			0	if ($res->is_error) {
254	0				0	croak 'Fetch failed: ', $res->message();
255						}
256	0				0	return $res->body;
257						}
258
259 - 263						=head2 check($check_name, $html) Run a single built-in check or plugin on provided HTML (or fetch from object URL if HTML not provided). =cut
264
265						# -------------------------------
266						# Run a single plugin or built-in check
267						# -------------------------------
268						sub check {
269	60			60	38	my ($self, $check_name, $html) = @_;
270	60		33		45	$html //= $self->_fetch_html();
271
272	60				128	my %dispatch = (
273						title => \&_check_title,
274						meta_description => \&_check_meta_description,
275						canonical => \&_check_canonical,
276						robots_meta => \&_check_robots_meta,
277						viewport => \&_check_viewport,
278						h1_presence => \&_check_h1_presence,
279						word_count => \&_check_word_count,
280						links_alt_text => \&_check_links_alt_text,
281						check_structured_data => \&_check_structured_data,
282						check_headings => \&_check_headings,
283						check_links => \&_check_links,
284						open_graph => \&_check_open_graph,
285						twitter_cards => \&_check_twitter_cards,
286						page_size => \&_check_page_size,
287						readability => \&_check_readability,
288						);
289
290						# built-in checks
291	60	50			49	if (exists $dispatch{$check_name}) {
292	60				61	return $dispatch{$check_name}->($self, $html);
293						} else {
294	0				0	croak "Unknown check $check_name";
295						}
296
297						# plugin checks
298	0	0			0	if (exists $self->{plugins}{$check_name}) {
299	0				0	my $plugin = $self->{plugins}{$check_name};
300	0				0	return $plugin->run($html);
301						}
302
303	0				0	return { name => $check_name, status => 'unknown', notes => '' };
304						}
305
306 - 310						=head2 run_all($html) Run all built-in checks on HTML (or object URL). =cut
311
312						# -------------------------------
313						# Run all built-in checks
314						# -------------------------------
315						sub run_all
316						{
317	4			4	793	my ($self, $html) = @_;
318	4		33		7	$html //= $self->_fetch_html();
319
320	4				3	my %results;
321	4				7	for my $check (qw(
322						title meta_description canonical robots_meta viewport h1_presence word_count links_alt_text
323						check_structured_data check_headings check_links
324						open_graph twitter_cards page_size readability
325						)) {
326	60				44	$results{$check} = $self->check($check, $html);
327						}
328
329	4				5	return \%results;
330						}
331
332 - 336						=head2 check_html($html) Run all loaded plugins on HTML. =cut
337
338						# -------------------------------
339						# Run all plugins on HTML
340						# -------------------------------
341						sub check_html {
342	5			5	779	my ($self, $html) = @_;
343	5		33		8	$html //= $self->_fetch_html();
344	5				17	my %results;
345
346	5 5				4 19	for my $key (keys %{ $self->{plugins} }) {
347	15				26	my $plugin = $self->{plugins}{$key};
348	15				18	$results{$key} = $plugin->run($html);
349						}
350
351	5				12	return \%results;
352						}
353
354 - 358						=head2 check_url($url) Fetch the URL and run all plugins and built-in checks. =cut
359
360						# -------------------------------
361						# Run URL: fetch and check
362						# -------------------------------
363						sub check_url {
364	1			1	6	my ($self, $url) = @_;
365
366	1		33		1	$url //= $self->{url};
367
368	1	50			2	croak('URL missing') unless $url;
369
370	1				1	my $html = $self->_fetch_html($url);
371
372	1				31	my $plugin_results = $self->check_html($html);
373	1				1	my $builtin_results = $self->run_all($html);
374
375						# merge all results
376	1				3	my %results = (%$plugin_results, %$builtin_results, _html => $html);
377	1				3	return \%results;
378						}
379
380						# -------------------------------
381						# Built-in check implementations
382						# -------------------------------
383						sub _check_title {
384	4			4	5	my ($self, $html) = @_;
385
386	4	50			14	if ($html =~ /<title>(.*?)<\/title>/is) {
387	4				6	my $title = $1;
388	4				9	$title =~ s/^\s+\|\s+$//g; # trim
389	4				15	$title =~ s/\s{2,}/ /g; # collapse spaces
390
391	4				4	my $len = length($title);
392	4				8	my $status = 'ok';
393	4				4	my $notes = "title present ($len chars)";
394
395	4	50 50 0			9	if ($len == 0) {
396	0				0	$status = 'error';
397	0				0	$notes = 'empty title';
398						} elsif ($len < 10) {
399	4				2	$status = 'warn';
400	4				5	$notes = "title too short ($len chars)";
401						} elsif ($len > 65) {
402	0				0	$status = 'warn';
403	0				0	$notes = "title too long ($len chars)";
404						}
405
406						# Flag really weak titles
407	4	50			14	if ($title =~ /^(home\|untitled\|index)$/i) {
408	0				0	$status = 'warn';
409	0				0	$notes = "generic title: $title";
410						}
411
412	4				14	return { name => 'Title', status => $status, notes => $notes };
413						}
414
415	0				0	return { name => 'Title', status => 'error', notes => 'missing title' };
416						}
417
418
419						sub _check_meta_description {
420	4			4	7	my ($self, $html) = @_;
421	4	100			11	if ($html =~ /<meta\s+name=["']description["']\s+content=["'](.*?)["']/is) {
422	3				3	my $desc = $1;
423	3				9	return { name => 'Meta Description', status => 'ok', notes => 'meta description present' };
424						}
425	1				3	return { name => 'Meta Description', status => 'warn', notes => 'missing meta description' };
426						}
427
428						sub _check_canonical
429						{
430	4			4	4	my ($self, $html) = @_;
431	4	50			8	if ($html =~ /<link\s+rel=["']canonical["']\s+href=["'](.*?)["']/is) {
432	0				0	return { name => 'Canonical', status => 'ok', notes => 'canonical link present' };
433						}
434						return {
435	4				11	name => 'Canonical',
436						status => 'warn',
437						notes => 'missing canonical link',
438						resolution => 'Add canonical link to <head>: <link rel="canonical" href="https://your-domain.com/this-page-url"> - use the preferred URL for this page to prevent duplicate content issues'
439						};
440						}
441
442						sub _check_robots_meta {
443	4			4	4	my ($self, $html) = @_;
444	4	50			11	if ($html =~ /<meta\s+name=["']robots["']\s+content=["'](.*?)["']/is) {
445	0				0	return { name => 'Robots Meta', status => 'ok', notes => 'robots meta present' };
446						}
447						return {
448	4				26	name => 'Robots Meta',
449						status => 'warn',
450						notes => 'missing robots meta',
451						resolution => 'Add robots meta tag to <head>: <meta name="robots" content="index, follow"> for normal indexing, or <meta name="robots" content="noindex, nofollow"> to prevent indexing - controls how search engines crawl and index this page'
452						};
453						}
454
455						sub _check_viewport {
456	4			4	6	my ($self, $html) = @_;
457	4	50			12	if ($html =~ /<meta\s+name=["']viewport["']\s+content=["'](.*?)["']/is) {
458	0				0	return { name => 'Viewport', status => 'ok', notes => 'viewport meta present' };
459						}
460						return {
461	4				13	name => 'Viewport',
462						status => 'warn',
463						notes => 'missing viewport meta',
464						resolution => 'Add viewport meta tag to <head>: <meta name="viewport" content="width=device-width, initial-scale=1.0"> - essential for mobile responsiveness and Google mobile-first indexing'
465						};
466						}
467
468						sub _check_h1_presence {
469	4			4	17	my ($self, $html) = @_;
470	4	100			12	if ($html =~ /<h1\b[^>]>(.?)<\/h1>/is) {
471	3				12	return { name => 'H1 Presence', status => 'ok', notes => 'h1 tag present' };
472						}
473	1				2	return { name => 'H1 Presence', status => 'warn', notes => 'missing h1' };
474						}
475
476						sub _check_word_count {
477	4			4	5	my ($self, $html) = @_;
478	4				2	my $text = $html;
479	4				24	$text =~ s/<[^>]+>//g;
480	4				6	my $words = scalar split /\s+/, $text;
481	4	50			18	return { name => 'Word Count', status => $words > 0 ? 'ok' : 'warn', notes => "$words words" };
482						}
483
484						sub _check_links_alt_text {
485	4			4	5	my ($self, $html) = @_;
486	4				3	my @missing;
487	4				13	while ($html =~ /<img\b(.*?)>/gis) {
488	0				0	my $attr = $1;
489	0	0			0	push @missing, $1 unless $attr =~ /alt=/i;
490						}
491	4	50			5	if(scalar(@missing)) {
492						return {
493	0				0	name => 'Links Alt Text',
494						status => 'warn',
495						notes => scalar(@missing) . ' images missing alt',
496						resolution => 'Add alt attributes to all images: <img src="image.jpg" alt="Descriptive text"> - describe the image content for screen readers and SEO. Use alt="" for decorative images that don\'t add meaning'
497						};
498						}
499
500						return {
501	4				9	name => 'Links Alt Text',
502						status => 'ok',
503						notes => 'all images have alt'
504						};
505						}
506
507						sub _check_structured_data {
508	4			4	5	my ($self, $html) = @_;
509
510	4				6	my @jsonld = ($html =~ /<script\b[^>]type=["']application\/ld\+json["'][^>]>(.*?)<\/script>/gis);
511
512	4	50			8	if(scalar(@jsonld)) {
513						return {
514	0				0	name => 'Structured Data',
515						status => 'ok',
516						notes => scalar(@jsonld) . ' JSON-LD block(s) found'
517						};
518						}
519
520						return {
521	4				12	name => 'Structured Data',
522						status => 'warn',
523						notes => 'no structured data found',
524						resolution => 'Add JSON-LD structured data to <head>: <script type="application/ld+json">{"@context": "https://schema.org", "@type": "WebPage", "name": "Page Title", "description": "Page description"}</script> - helps search engines understand your content better and enables rich snippets'
525						}
526						}
527
528						# _check_headings
529						# ----------------
530						# Analyzes the HTML document for heading structure and returns a structured
531						# SEO/a11y report.
532						#
533						# Checks performed:
534						# - Presence of headings (<h1>â€“<h6>), with counts of each level.
535						# - Ensures exactly one <h1> exists (warns if missing or multiple).
536						# - Validates heading hierarchy (no skipped levels, e.g. <h3> should not appear before an <h2>).
537						# - Flags suspicious heading text lengths (too short < 2 chars, or too long > 120 chars).
538						#
539						# Returns:
540						# {
541						# name => 'Headings',
542						# status => 'ok' \| 'warn',
543						# notes => 'summary of counts and issues'
544						# }
545						#
546						# Notes:
547						# - Status is 'warn' if issues are found, otherwise 'ok'.
548						# - 'error' status is reserved for future use (currently unused).
549
550						sub _check_headings {
551	4			4	2	my ($self, $html) = @_;
552
553	4				4	my %counts;
554						my @headings;
555
556						# Capture all headings and their order
557	4				13	while ($html =~ /<(h[1-6])\b[^>]>(.?)<\/\1>/gi) {
558	3				5	my $tag = lc $1;
559	3		50		6	my $text = $2 // '';
560	3				3	$text =~ s/\s+/ /g; # normalize whitespace
561	3				6	$text =~ s/^\s+\|\s+$//g;
562
563	3				3	$counts{$tag}++;
564	3				11	push @headings, { level => substr($tag, 1), text => $text };
565						}
566
567	4				3	my @issues;
568	4				3	my $status = 'ok';
569
570						# Check for no headings
571	4	100			5	if (!%counts) {
572						return {
573	1				5	name => 'Headings',
574						status => 'warn',
575						notes => 'no headings found',
576						};
577						}
578
579						# Check H1 presence/uniqueness
580	3	50 50			11	if (!$counts{h1}) {
581	0				0	push @issues, 'missing <h1>';
582	0				0	$status = 'warn';
583						}
584						elsif ($counts{h1} > 1) {
585	0				0	push @issues, 'multiple <h1> tags';
586	0				0	$status = 'warn';
587						}
588
589						# Check heading hierarchy (no skipped levels)
590	3				3	my $last_level = 0;
591	3				4	for my $h (@headings) {
592	3				4	my $level = $h->{level};
593	3	50	33		8	if ($last_level && $level > $last_level + 1) {
594	0				0	push @issues, "skipped heading level before <h$level>";
595	0				0	$status = 'warn';
596						}
597	3				4	$last_level = $level;
598						}
599
600						# Check heading text length (too short or too long)
601	3				3	for my $h (@headings) {
602	3				2	my $len = length($h->{text});
603	3	50 50			9	if ($len < 2) {
604	0				0	push @issues, "<h$h->{level}> too short";
605	0				0	$status = 'warn';
606						}
607						elsif ($len > 120) {
608	0				0	push @issues, "<h$h->{level}> too long";
609	0				0	$status = 'warn';
610						}
611						}
612
613						# Summarize counts and issues
614	3 3				5 7	my $summary = join ', ', map { "$_: $counts{$_}" } sort keys %counts;
615	3	50			11	$summary .= @issues ? " \| Issues: " . join('; ', @issues) : '';
616
617						return {
618	3				12	name => 'Headings',
619						status => $status,
620						notes => $summary,
621						};
622						}
623
624						sub _check_links {
625	4			4	3	my ($self, $html) = @_;
626
627	4				4	my $base_host;
628	4	50	33		12	if ($self->{url} && $self->{url} =~ m{^https?://}i) {
629	0				0	$base_host = Mojo::URL->new($self->{url})->host;
630						}
631
632	4				4	my ($total, $internal, $external, $badtext) = (0,0,0,0);
633
634						# common "bad" link text patterns (exact match or just punctuation around)
635	4				9	my $bad_rx = qr/^(?:click\shere\|read\smore\|more\|link\|here\|details)$/i;
636	4				3	my $badtext_content;
637
638	4				9	while ($html =~ m{<a\b([^>])>(.?)</a>}gis) {
639	0				0	my $attrs = $1;
640	0		0		0	my $text = $2 // '';
641
642	0				0	$total++;
643
644						# get href (prefer quoted values)
645	0				0	my ($href) = $attrs =~ /\bhref\s=\s"(.*?)"/i;
646	0	0	0		0	$href //= ($attrs =~ /\bhref\s=\s'(.*?)'/i ? $1 : undef);
647	0	0	0		0	$href //= ($attrs =~ /\bhref\s=\s([^\s>]+)/i ? $1 : undef);
648
649						# classify internal vs external
650	0	0	0		0	if (defined $href && $href =~ m{^\s*https?://}i) {
651						# attempt to compare host
652	0				0	my ($host) = $href =~ m{^\s*https?://([^/:\s]+)}i;
653	0	0	0		0	if (defined $base_host && defined $host) {
654	0	0			0	if (lc $host eq lc $base_host) {
655	0				0	$internal++;
656						} else {
657	0				0	$external++;
658						}
659						} else {
660						# no base host to compare; treat as external if absolute URL
661	0				0	$external++;
662						}
663						} else {
664						# relative URL or fragment or mailto/etc -> treat as internal
665	0				0	$internal++;
666						}
667
668						# normalize visible text: strip tags, trim whitespace, collapse spaces
669	0				0	$text =~ s/<[^>]+>//g;
670	0				0	$text =~ s/^\s+\|\s+$//g;
671	0				0	$text =~ s/\s+/ /g;
672
673						# check for bad link text (exact-ish)
674	0	0			0	if($text =~ $bad_rx) {
675	0				0	$badtext++;
676	0				0	$badtext_content = $text;
677						}
678						}
679
680	4	50 50	33		13	my $status = ($external \|\| $badtext) ? 'warn' : ($total ? 'ok' : 'warn');
681
682	4	50			5	if ($total) {
683	0	0			0	if($badtext == 0) {
684						return {
685	0				0	name => 'Links',
686						status => $status,
687						notes => sprintf('%d total (%d internal, %d external)',
688						$total, $internal, $external),
689						}
690						}
691	0	0			0	if($badtext == 1) {
692						return {
693	0				0	name => 'Links',
694						status => $status,
695						notes => sprintf('%d total (%d internal, %d external). 1 link with poor anchor text',
696						$total, $internal, $external),
697						resolution => "fix link text '$badtext_content'"
698						}
699						}
700						return {
701	0				0	name => 'Links',
702						status => $status,
703						notes => sprintf('%d total (%d internal, %d external). %d links with poor anchor text',
704						$total, $internal, $external, $badtext),
705						};
706						}
707						return {
708	4				15	name => 'Links',
709						status => $status,
710						notes => 'no links found'
711						}
712						}
713
714						# Checks for essential Open Graph tags that improve social media sharing
715						sub _check_open_graph {
716	4			4	4	my ($self, $html) = @_;
717
718	4				3	my %og_tags;
719	4				6	my @required = qw(title description image url);
720
721						# Extract all Open Graph meta tags
722	4				12	while ($html =~ /<meta\s+(?:property\|name)=["']og:([^"']+)["']\s+content=["']([^"']*)["']/gis) {
723	0				0	$og_tags{$1} = $2;
724						}
725
726	4 16		33		8 18	my @missing = grep { !exists $og_tags{$_} \|\| !$og_tags{$_} } @required;
727	4				6	my $found = keys %og_tags;
728
729	4	50			8	my $status = @missing ? 'warn' : 'ok';
730	4				6	my $notes;
731
732	4	50 0			8	if ($found == 0) {
733	4				4	$notes = 'no Open Graph tags found';
734	4				3	$status = 'warn';
735						} elsif (@missing) {
736	0				0	$notes = sprintf('%d OG tags found, missing: %s', $found, join(', ', @missing));
737						} else {
738	0				0	$notes = sprintf('all essential OG tags present (%d total)', $found);
739						}
740
741						return {
742	4				13	name => 'Open Graph',
743						status => $status,
744						notes => $notes,
745						resolution => 'Add missing tags to <head>: <meta property="og:title" content="Your Page Title">, <meta property="og:description" content="Brief page description">'
746						};
747						}
748
749						# Checks for Twitter Card meta tags for better Twitter sharing
750						sub _check_twitter_cards {
751	4			4	4	my ($self, $html) = @_;
752
753	4				4	my %twitter_tags;
754	4				3	my @recommended = qw(card title description);
755
756						# Extract Twitter Card meta tags
757	4				13	while ($html =~ /<meta\s+(?:property\|name)=["']twitter:([^"']+)["']\s+content=["']([^"']*)["']/gis) {
758	0				0	$twitter_tags{$1} = $2;
759						}
760
761	4 12		33		5 12	my @missing = grep { !exists $twitter_tags{$_} \|\| !$twitter_tags{$_} } @recommended;
762	4				5	my $found = keys %twitter_tags;
763
764	4	50			5	my $status = @missing ? 'warn' : 'ok';
765	4				3	my $notes;
766
767	4	50 0			5	if ($found == 0) {
768	4				4	$notes = 'no Twitter Card tags found';
769	4				1	$status = 'warn';
770						} elsif (@missing) {
771	0				0	$notes = sprintf('%d Twitter tags found, missing: %s', $found, join(', ', @missing));
772						} else {
773	0				0	$notes = sprintf('essential Twitter Card tags present (%d total)', $found);
774						}
775
776						return {
777	4				15	name => 'Twitter Cards',
778						status => $status,
779						notes => $notes,
780						resolution => 'Add missing tags to <head>: <meta name="twitter:card" content="summary">, <meta name="twitter:title" content="Your Page Title">'
781						};
782						}
783
784						# Checks HTML size and warns if too large (impacts loading speed)
785						sub _check_page_size {
786	4			4	4	my ($self, $html) = @_;
787
788	4				4	my $size_bytes = length($html);
789	4				7	my $size_kb = int($size_bytes / 1024);
790
791	4				6	my $status = 'ok';
792	4				4	my $notes = "${size_kb}KB HTML size";
793	4				8	my $resolution = '';
794
795	4	50 50 50			15	if ($size_bytes > 1_048_576) { # > 1MB
796	0				0	$status = 'error';
797	0				0	$notes .= ' (too large, over 1MB)';
798	0				0	$resolution = 'Consider optimizing: minify CSS/JS, compress images, remove unused elements, enable server compression';
799						} elsif ($size_bytes > 102_400) { # > 100KB
800	0				0	$status = 'warn';
801	0				0	$notes .= ' (large, consider optimization)';
802						} elsif ($size_bytes < 1024) { # < 1KB
803	4				4	$status = 'warn';
804	4				2	$notes .= ' (suspiciously small)';
805						} else {
806	0				0	$notes .= ' (good size)';
807						}
808
809						return {
810	4				11	name => 'Page Size',
811						status => $status,
812						notes => $notes,
813						resolution => $resolution
814						};
815						}
816
817						# Calculates approximate Flesch Reading Ease score for content readability
818						sub _check_readability {
819	4			4	5	my ($self, $html) = @_;
820
821						# Extract text content (remove scripts, styles, and HTML tags)
822	4				2	my $text = $html;
823	4				7	$text =~ s/<script\b[^>]>.?<\/script>//gis;
824	4				4	$text =~ s/<style\b[^>]>.?<\/style>//gis;
825	4				19	$text =~ s/<[^>]+>//g;
826	4				6	$text =~ s/\s+/ /g;
827	4				9	$text =~ s/^\s+\|\s+$//g;
828
829						return {
830	4	50			16	name => 'Readability',
831						status => 'warn',
832						notes => 'insufficient text for analysis',
833						resolution => 'Add more content to the page - aim for at least 300 words of meaningful text',
834						} if length($text) < 100;
835
836						# Count sentences (approximate)
837	0					my $sentences = () = $text =~ /[.!?]+/g;
838	0	0				$sentences = 1 if $sentences == 0; # avoid division by zero
839
840						# Count words
841	0					my @words = split /\s+/, $text;
842	0					my $word_count = @words;
843
844						return {
845	0	0				name => 'Readability',
846						status => 'warn',
847						notes => 'insufficient content for analysis',
848						resolution => 'Add more substantial content - aim for at least 300 words for proper SEO value',
849						} if $word_count < 50;
850
851						# Count syllables (very basic approximation)
852	0					my $syllables = 0;
853	0					for my $word (@words) {
854	0					$word = lc($word);
855	0					$word =~ s/[^a-z]//g; # remove punctuation
856	0	0				next if length($word) == 0;
857
858						# Simple syllable counting heuristic
859	0					my $vowels = () = $word =~ /[aeiouy]/g;
860	0	0				$syllables += $vowels > 0 ? $vowels : 1;
861	0	0				$syllables-- if $word =~ /e$/; # silent e
862						}
863	0	0				$syllables = $word_count if $syllables < $word_count; # minimum 1 syllable per word
864
865						# Flesch Reading Ease formula
866	0					my $avg_sentence_length = $word_count / $sentences;
867	0					my $avg_syllables_per_word = $syllables / $word_count;
868	0					my $flesch_score = 206.835 - (1.015 * $avg_sentence_length) - (84.6 * $avg_syllables_per_word);
869
870	0					my $status = 'ok';
871	0					my $level;
872						my $notes;
873	0					my $resolution = '';
874
875	0	0 0 0 0 0 0				if ($flesch_score >= 90) {
876	0					$level = 'very easy';
877						} elsif ($flesch_score >= 80) {
878	0					$level = 'easy';
879						} elsif ($flesch_score >= 70) {
880	0					$level = 'fairly easy';
881						} elsif ($flesch_score >= 60) {
882	0					$level = 'standard';
883						} elsif ($flesch_score >= 50) {
884	0					$level = 'fairly difficult';
885	0					$status = 'warn';
886	0					$resolution = 'Consider simplifying: use shorter sentences (aim for 15-20 words), choose simpler words, break up long paragraphs, add bullet points or lists';
887						} elsif ($flesch_score >= 30) {
888	0					$level = 'difficult';
889	0					$status = 'warn';
890	0					$resolution = 'Improve readability: use much shorter sentences (10-15 words), replace complex words with simpler alternatives, add more paragraph breaks, use active voice';
891						} else {
892	0					$level = 'very difficult';
893	0					$status = 'warn';
894	0					$resolution = 'Significantly simplify content: break long sentences into multiple short ones, replace jargon with plain language, add explanations for technical terms, use more white space and formatting';
895						}
896
897	0					$notes = sprintf('Flesch score: %.1f (%s) - %d words, %d sentences',
898						$flesch_score, $level, $word_count, $sentences);
899
900						return {
901	0					name => 'Readability',
902						status => $status,
903						notes => $notes,
904						resolution => $resolution,
905						};
906						}
907
908 - 982						=head1 AUTHOR Nigel Horne, C<< <njh at nigelhorne.com> >> =head1 SEE ALSO =over 4 =item * Test coverage report: L<https://nigelhorne.github.io/SEO-Inspector/coverage/> =item * L<https://github.com/nigelhorne/SEO-Checker> =item * L<https://github.com/sethblack/python-seo-analyzer> =back =head1 REPOSITORY L<https://github.com/nigelhorne/SEO-Inspector> =head1 SUPPORT This module is provided as-is without any warranty. Please report any bugs or feature requests to C<bug-seo-inspector at rt.cpan.org>, or through the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=SEO-Inspector>. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes. You can find documentation for this module with the perldoc command. perldoc SEO::Inspector You can also look for information at: =over 4 =item * MetaCPAN L<https://metacpan.org/dist/SEO-Inspector> =item * RT: CPAN's request tracker L<https://rt.cpan.org/NoAuth/Bugs.html?Dist=SEO-Inspector> =item * CPAN Testers' Matrix L<http://matrix.cpantesters.org/?dist=SEO-Inspector> =item * CPAN Testers Dependencies L<http://deps.cpantesters.org/?module=SEO::Inspector> =back =head1 LICENCE AND COPYRIGHT Copyright 2025 Nigel Horne. Usage is subject to licence terms. The licence terms of this software are as follows: =over 4 =item * Personal single user, single computer use: GPL2 =item * All other users (including Commercial, Charity, Educational, Government) must apply in writing for a licence for use from Nigel Horne at the above e-mail. =back =cut
983
984						1;
985