| File: | bin/submit_abuse_report |
| Coverage: | 48.8% |
| line | stmt | bran | cond | sub | time | code |
|---|---|---|---|---|---|---|
| 1 | #!/usr/bin/env perl | |||||
| 2 | # ----------------------------------------------------------------------- | |||||
| 3 | # submit_abuse_report -- analyse a spam/phishing email and send abuse | |||||
| 4 | # reports to all relevant parties. | |||||
| 5 | # | |||||
| 6 | # Usage: | |||||
| 7 | # submit_abuse_report [options] spam.eml | |||||
| 8 | # submit_abuse_report [options] < spam.eml | |||||
| 9 | # | |||||
| 10 | # Options: | |||||
| 11 | # --dry-run Show what would be sent and to whom; do not | |||||
| 12 | # actually connect to any mail server. | |||||
| 13 | # -i, --interactive Prompt for confirmation before each send. | |||||
| 14 | # --bcc Send a copy to the --from address for monitoring. | |||||
| 15 | # --bcc-to ADDRESS Send a copy to an explicit address. | |||||
| 16 | # --from ADDRESS Envelope / From: address for the outgoing | |||||
| 17 | # reports (required unless --dry-run). | |||||
| 18 | # --smtp HOST[:PORT] SMTP relay to use (default: localhost:25). | |||||
| 19 | # --trusted CIDR Trusted relay CIDR to skip in Received: chain. | |||||
| 20 | # May be repeated for multiple relays. | |||||
| 21 | # --timeout SECS Network timeout in seconds (default: 15). | |||||
| 22 | # --verbose Print analysis progress to STDERR. | |||||
| 23 | # --help Show this help text. | |||||
| 24 | # | |||||
| 25 | # Examples: | |||||
| 26 | # # Dry run -- see what would be sent without sending anything | |||||
| 27 | # submit_abuse_report --dry-run spam.eml | |||||
| 28 | # | |||||
| 29 | # # Send reports via the local MTA | |||||
| 30 | # submit_abuse_report --from postmaster@myisp.example spam.eml | |||||
| 31 | # | |||||
| 32 | # # Send via a specific SMTP relay, skipping our own outbound IP | |||||
| 33 | # submit_abuse_report \ | |||||
| 34 | # --from abuse-reporter@example.com \ | |||||
| 35 | # --smtp mail.example.com:587 \ | |||||
| 36 | # --trusted 203.0.113.0/24 \ | |||||
| 37 | # spam.eml | |||||
| 38 | # ----------------------------------------------------------------------- | |||||
| 39 | 6 6 | 12283 8 | use 5.010; | |||
| 40 | 6 6 6 | 12 5 53 | use strict; | |||
| 41 | 6 6 6 | 11 4 96 | use warnings; | |||
| 42 | 6 6 6 | 995 627 12 | use utf8; | |||
| 43 | 6 6 6 | 1113 3085 14 | use open qw(:std :encoding(UTF-8)); | |||
| 44 | ||||||
| 45 | 6 6 6 | 49698 27818 10 | use Getopt::Long qw(GetOptions); | |||
| 46 | 6 6 6 | 1719 115836 210 | use Pod::Usage qw(pod2usage); | |||
| 47 | 6 6 6 | 1358 290753 217 | use Net::SMTP; | |||
| 48 | 6 6 6 | 1182 1883 197 | use MIME::Base64 qw(encode_base64); | |||
| 49 | 6 6 6 | 15 11 17 | use POSIX qw(strftime); | |||
| 50 | 6 6 6 | 182 5 156 | use File::Basename qw(basename); | |||
| 51 | 6 6 6 | 1048 1691 17 | use lib '.'; # find Email/Abuse/Investigator.pm in development tree | |||
| 52 | 6 6 6 | 2685 12 321459 | use Email::Abuse::Investigator; | |||
| 53 | ||||||
| 54 | # ----------------------------------------------------------------------- | |||||
| 55 | # Command-line options | |||||
| 56 | # ----------------------------------------------------------------------- | |||||
| 57 | 6 | 179371 | my $dry_run = 0; | |||
| 58 | 6 | 8 | my $interactive = 0; | |||
| 59 | 6 | 5 | my $bcc = 0; # --bcc flag: copy to --from address | |||
| 60 | 6 | 8 | my $bcc_to = ''; # --bcc-to ADDRESS: copy to explicit address | |||
| 61 | 6 | 7 | my $from = ''; | |||
| 62 | 6 | 6 | my $smtp_arg = 'localhost:25'; | |||
| 63 | 6 | 5 | my $timeout = 15; | |||
| 64 | 6 | 6 | my $verbose = 0; | |||
| 65 | 6 | 6 | my $help = 0; | |||
| 66 | 6 | 6 | my @trusted; | |||
| 67 | ||||||
| 68 | 6 | 22 | GetOptions( | |||
| 69 | 'dry-run' => \$dry_run, | |||||
| 70 | 'interactive|i' => \$interactive, | |||||
| 71 | 'bcc' => \$bcc, | |||||
| 72 | 'bcc-to=s' => \$bcc_to, | |||||
| 73 | 'from=s' => \$from, | |||||
| 74 | 'smtp=s' => \$smtp_arg, | |||||
| 75 | 'trusted=s' => \@trusted, | |||||
| 76 | 'timeout=i' => \$timeout, | |||||
| 77 | 'verbose' => \$verbose, | |||||
| 78 | 'help' => \$help, | |||||
| 79 | ) or pod2usage(2); | |||||
| 80 | ||||||
| 81 | 6 | 3236 | pod2usage( -exitval => 0, -verbose => 2 ) if $help; | |||
| 82 | ||||||
| 83 | 4 | 7 | unless ($dry_run || $from) { | |||
| 84 | 0 | 0 | die basename($0) . ": --from ADDRESS is required unless --dry-run is used.\n" | |||
| 85 | . "Run with --help for usage.\n"; | |||||
| 86 | } | |||||
| 87 | ||||||
| 88 | 4 | 7 | if ($from && $from !~ /\@/) { | |||
| 89 | 0 | 0 | die basename($0) . ": --from value '$from' does not look like an email address.\n"; | |||
| 90 | } | |||||
| 91 | ||||||
| 92 | 4 | 7 | my ($smtp_host, $smtp_port) = split /:/, $smtp_arg, 2; | |||
| 93 | ||||||
| 94 | # Resolve BCC address. | |||||
| 95 | # --bcc alone copies to --from; --bcc-to ADDRESS uses an explicit address. | |||||
| 96 | 4 | 4 | my $bcc_addr = undef; | |||
| 97 | 4 | 9 | if ($bcc_to) { | |||
| 98 | 0 | 0 | die basename($0) . ": --bcc-to value '$bcc_to' does not look like an email address.\n" | |||
| 99 | unless $bcc_to =~ /\@/; | |||||
| 100 | 0 | 0 | $bcc_addr = $bcc_to; | |||
| 101 | } elsif ($bcc) { | |||||
| 102 | 0 | 0 | die basename($0) . ": --bcc requires --from to be set.\n" | |||
| 103 | unless $from; | |||||
| 104 | 0 | 0 | $bcc_addr = $from; | |||
| 105 | } | |||||
| 106 | 4 | 6 | $smtp_port //= 25; | |||
| 107 | ||||||
| 108 | # ----------------------------------------------------------------------- | |||||
| 109 | # Read the raw email | |||||
| 110 | # ----------------------------------------------------------------------- | |||||
| 111 | 4 | 4 | my $raw; | |||
| 112 | 4 | 5 | if (@ARGV) { | |||
| 113 | 4 | 66 | open my $fh, '<:raw', $ARGV[0] or die basename($0), ": cannot open '$ARGV[0]': $!"; | |||
| 114 | 4 | 8 | local $/; | |||
| 115 | 4 | 59 | $raw = <$fh>; | |||
| 116 | 4 | 21 | close $fh; | |||
| 117 | } else { | |||||
| 118 | 0 | 0 | binmode STDIN, ':raw'; | |||
| 119 | 0 | 0 | local $/; | |||
| 120 | 0 | 0 | $raw = <STDIN>; | |||
| 121 | } | |||||
| 122 | ||||||
| 123 | 4 | 13 | die basename($0) . ": no email data supplied.\n" | |||
| 124 | unless defined $raw && length $raw; | |||||
| 125 | ||||||
| 126 | # ----------------------------------------------------------------------- | |||||
| 127 | # Analyse | |||||
| 128 | # ----------------------------------------------------------------------- | |||||
| 129 | 4 | 6 | print STDERR "Analysing message...\n" if $verbose; | |||
| 130 | ||||||
| 131 | 4 | 18 | my $inv = Email::Abuse::Investigator->new( | |||
| 132 | timeout => $timeout, | |||||
| 133 | trusted_relays => \@trusted, | |||||
| 134 | verbose => $verbose, | |||||
| 135 | ); | |||||
| 136 | 4 | 11 | $inv->parse_email(\$raw); | |||
| 137 | ||||||
| 138 | 4 | 9 | my @contacts = $inv->abuse_contacts(); | |||
| 139 | ||||||
| 140 | 4 | 9 | unless (@contacts) { | |||
| 141 | 2 | 8 | print STDERR "No abuse contacts could be determined. Nothing to send.\n"; | |||
| 142 | ||||||
| 143 | # List any domains and URL hosts that were found but could not be | |||||
| 144 | # resolved to an abuse contact, so the user knows where to look. | |||||
| 145 | 2 | 7 | _print_unresolved($inv, \*STDERR); | |||
| 146 | 2 | 37 | exit 0; | |||
| 147 | } | |||||
| 148 | ||||||
| 149 | # ----------------------------------------------------------------------- | |||||
| 150 | # _print_unresolved( $inv, $fh ) | |||||
| 151 | # | |||||
| 152 | # Print a list of domains and URL hosts found in the message that could | |||||
| 153 | # not be resolved to an abuse contact, so the user knows where to look. | |||||
| 154 | # ----------------------------------------------------------------------- | |||||
| 155 | sub _print_unresolved { | |||||
| 156 | 4 | 5 | my ($inv, $fh) = @_; | |||
| 157 | ||||||
| 158 | # Delegate to the module's unresolved_contacts() method which handles | |||||
| 159 | # all the filtering logic (spoofable headers, already-covered domains). | |||||
| 160 | 4 | 11 | my @unresolved = $inv->unresolved_contacts(); | |||
| 161 | 4 | 9 | return unless @unresolved; | |||
| 162 | ||||||
| 163 | 4 | 9 | print $fh "\nDomains found in this message with no abuse contact determined\n"; | |||
| 164 | 4 | 5 | print $fh "(consider investigating or reporting manually):\n\n"; | |||
| 165 | 4 | 7 | for my $u (@unresolved) { | |||
| 166 | 4 | 9 | if ($u->{type} eq 'url_host') { | |||
| 167 | 3 | 6 | print $fh " URL host : $u->{domain}\n"; | |||
| 168 | } else { | |||||
| 169 | 1 | 2 | print $fh " Domain : $u->{domain}\n"; | |||
| 170 | } | |||||
| 171 | } | |||||
| 172 | 4 | 9 | print $fh "\n"; | |||
| 173 | } | |||||
| 174 | ||||||
| 175 | 2 | 6 | my $report_text = $inv->abuse_report_text(); | |||
| 176 | ||||||
| 177 | # Append a note to the human-readable part so recipients know the | |||||
| 178 | # original message is attached and why. | |||||
| 179 | 2 | 3 | $report_text .= join("\n", | |||
| 180 | "", | |||||
| 181 | "-" x 72, | |||||
| 182 | "The original spam/phishing message is attached below as a", | |||||
| 183 | "message/rfc822 MIME part. Please use the full Received: headers", | |||||
| 184 | "to locate the relevant SMTP session in your mail logs.", | |||||
| 185 | "-" x 72, | |||||
| 186 | ""); | |||||
| 187 | ||||||
| 188 | 2 | 3 | my $orig = $inv->originating_ip(); | |||
| 189 | 2 | 3 | my $risk = $inv->risk_assessment(); | |||
| 190 | ||||||
| 191 | # Build a consistent subject line for all outgoing reports | |||||
| 192 | 2 | 5 | my $subject = _build_subject($orig, $risk); | |||
| 193 | ||||||
| 194 | # ----------------------------------------------------------------------- | |||||
| 195 | # Dry run: describe what would be sent | |||||
| 196 | # ----------------------------------------------------------------------- | |||||
| 197 | 2 | 2 | if ($dry_run) { | |||
| 198 | 2 | 7 | _dry_run_report(\@contacts, $subject, $report_text, \$raw, $inv, $bcc_addr); | |||
| 199 | 2 | 42 | exit 0; | |||
| 200 | } | |||||
| 201 | ||||||
| 202 | # ----------------------------------------------------------------------- | |||||
| 203 | # Live run: send one email per unique abuse contact | |||||
| 204 | # ----------------------------------------------------------------------- | |||||
| 205 | 0 | 0 | my $sent = 0; | |||
| 206 | 0 | 0 | my $failed = 0; | |||
| 207 | ||||||
| 208 | 0 | 0 | for my $contact (@contacts) { | |||
| 209 | 0 | 0 | my $to = $contact->{address}; | |||
| 210 | 0 | 0 | my $role = $contact->{role}; | |||
| 211 | ||||||
| 212 | 0 | 0 | if ($interactive) { | |||
| 213 | 0 | 0 | printf "\nSend abuse report to: %s\n", $to; | |||
| 214 | 0 | 0 | printf "Reason : %s\n", $role; | |||
| 215 | 0 | 0 | print "Send? [y/N] "; | |||
| 216 | 0 | 0 | my $ans = _read_tty(); | |||
| 217 | 0 | 0 | unless (defined $ans && lc($ans) eq 'y') { | |||
| 218 | 0 | 0 | printf "Skipped: %-45s %s\n", $to, $role; | |||
| 219 | 0 | 0 | next; | |||
| 220 | } | |||||
| 221 | } | |||||
| 222 | ||||||
| 223 | 0 | 0 | print STDERR "Sending to $to ($role)...\n" if $verbose; | |||
| 224 | ||||||
| 225 | 0 | 0 | my $ok = _send_report( | |||
| 226 | smtp_host => $smtp_host, | |||||
| 227 | smtp_port => $smtp_port, | |||||
| 228 | from => $from, | |||||
| 229 | to => $to, | |||||
| 230 | bcc => $bcc_addr, | |||||
| 231 | subject => $subject, | |||||
| 232 | body => $report_text, | |||||
| 233 | original => \$raw, | |||||
| 234 | inv => $inv, | |||||
| 235 | timeout => $timeout, | |||||
| 236 | ); | |||||
| 237 | ||||||
| 238 | 0 | 0 | if ($ok) { | |||
| 239 | 0 | 0 | printf "Sent : %-45s %s\n", $to, $role; | |||
| 240 | 0 | 0 | $sent++; | |||
| 241 | } else { | |||||
| 242 | 0 | 0 | printf "FAILED : %-45s %s\n", $to, $role; | |||
| 243 | 0 | 0 | $failed++; | |||
| 244 | } | |||||
| 245 | } | |||||
| 246 | ||||||
| 247 | 0 | 0 | printf "\nDone: %d sent, %d failed.\n", $sent, $failed; | |||
| 248 | 0 | 0 | _print_unresolved($inv, \*STDOUT); | |||
| 249 | 0 | 0 | exit( $failed ? 1 : 0 ); | |||
| 250 | ||||||
| 251 | # ----------------------------------------------------------------------- | |||||
| 252 | # _dry_run_report( \@contacts, $subject, $report_text, \$raw, $inv ) | |||||
| 253 | # | |||||
| 254 | # Print a full description of what would be sent without sending anything. | |||||
| 255 | # ----------------------------------------------------------------------- | |||||
| 256 | sub _dry_run_report { | |||||
| 257 | 2 | 4 | my ($contacts, $subject, $body, $raw_ref, $inv, $bcc) = @_; | |||
| 258 | ||||||
| 259 | 2 | 2 | my $bar = '=' x 72; | |||
| 260 | 2 | 10 | print "$bar\n"; | |||
| 261 | 2 | 2 | print " DRY RUN -- no email will be sent\n"; | |||
| 262 | 2 | 3 | print "$bar\n\n"; | |||
| 263 | ||||||
| 264 | 2 | 6 | printf "Subject : %s\n\n", $subject; | |||
| 265 | 2 | 2 | print "Envelope sender: <> (null reverse-path per RFC 6650 s.3)\n"; | |||
| 266 | 2 | 4 | printf "BCC copy to : %s\n", $bcc if $bcc; | |||
| 267 | 2 | 2 | print "\n"; | |||
| 268 | ||||||
| 269 | 2 | 5 | printf "%-3s %-45s %s\n", '#', 'Recipient', 'Role'; | |||
| 270 | 2 | 5 | printf "%-3s %-45s %s\n", '-' x 3, '-' x 45, '-' x 24; | |||
| 271 | ||||||
| 272 | 2 | 2 | my $n = 1; | |||
| 273 | 2 2 | 1 57 | for my $c (@{$contacts}) { | |||
| 274 | 2 | 6 | printf "%-3d %-45s %s\n", $n++, $c->{address}, $c->{role}; | |||
| 275 | printf " via: %-42s note: %s\n", | |||||
| 276 | 2 | 5 | $c->{via}, ($c->{note} || ''); | |||
| 277 | 2 | 3 | print "\n"; | |||
| 278 | } | |||||
| 279 | ||||||
| 280 | # Part 1 | |||||
| 281 | 2 | 4 | print "$bar\n"; | |||
| 282 | 2 | 1 | print " PART 1: HUMAN-READABLE REPORT (text/plain)\n"; | |||
| 283 | 2 | 2 | print " (identical text sent to every recipient above)\n"; | |||
| 284 | 2 | 3 | print "$bar\n\n"; | |||
| 285 | 2 | 48 | print $body; | |||
| 286 | ||||||
| 287 | # Part 2: feedback-report fields | |||||
| 288 | 2 | 4 | print "\n$bar\n"; | |||
| 289 | 2 | 2 | print " PART 2: ARF METADATA (message/feedback-report)\n"; | |||
| 290 | 2 | 6 | print " RFC 5965 s.3 -- machine-readable fields for automated processing\n"; | |||
| 291 | 2 | 3 | print "$bar\n\n"; | |||
| 292 | 2 | 5 | my $fbr = _build_feedback_report($inv); | |||
| 293 | 2 | 17 | print " $_\n" for split /\r?\n/, $fbr; | |||
| 294 | ||||||
| 295 | # Part 3: original message preview | |||||
| 296 | 2 | 3 | print "\n$bar\n"; | |||
| 297 | 2 | 3 | print " PART 3: ORIGINAL MESSAGE (message/rfc822)\n"; | |||
| 298 | 2 | 2 | print " RFC 5965 s.2 -- verbatim original; MUST be included\n"; | |||
| 299 | 2 | 3 | print "$bar\n"; | |||
| 300 | 2 | 6 | if (defined $raw_ref && length $$raw_ref) { | |||
| 301 | 2 | 9 | my @lines = split /\r?\n/, $$raw_ref; | |||
| 302 | 2 | 2 | my $preview = 20; | |||
| 303 | 2 | 2 | my $total = scalar @lines; | |||
| 304 | 2 | 6 | printf " [Showing first %d of %d lines]\n\n", | |||
| 305 | ($total < $preview ? $total : $preview), $total; | |||||
| 306 | 2 | 25 | print " $_\n" for @lines[ 0 .. ($total < $preview ? $total - 1 : $preview - 1) ]; | |||
| 307 | 2 | 5 | print " ...\n" if $total > $preview; | |||
| 308 | } else { | |||||
| 309 | 0 | 0 | print " (no original message available)\n"; | |||
| 310 | } | |||||
| 311 | ||||||
| 312 | 2 | 3 | print "\n$bar\n"; | |||
| 313 | ||||||
| 314 | # abuse_contacts() now merges duplicate addresses, so @contacts has one | |||||
| 315 | # entry per unique address with 'role' already set to the combined string. | |||||
| 316 | # Count the total discovery routes (sum of all roles arrays) to show | |||||
| 317 | # in the annotation when merging occurred. | |||||
| 318 | 2 2 | 2 3 | my $n_contacts = scalar @{$contacts}; | |||
| 319 | 2 | 4 | my $n_routes = 0; | |||
| 320 | 2 2 2 | 2 3 4 | $n_routes += scalar @{ $_->{roles} // [$_->{role}] } for @{$contacts}; | |||
| 321 | ||||||
| 322 | 2 | 3 | if ($n_routes > $n_contacts) { | |||
| 323 | # Some addresses cover more than one discovery route -- note the merge | |||||
| 324 | 0 | 0 | printf " Total: %d recipient%s (%d contact route%s merged)\n", | |||
| 325 | $n_contacts, $n_contacts == 1 ? '' : 's', | |||||
| 326 | $n_routes, $n_routes == 1 ? '' : 's'; | |||||
| 327 | } else { | |||||
| 328 | 2 | 7 | printf " Total: %d recipient%s\n", | |||
| 329 | $n_contacts, $n_contacts == 1 ? '' : 's'; | |||||
| 330 | } | |||||
| 331 | 2 | 3 | print "\n"; | |||
| 332 | ||||||
| 333 | # One line per recipient; 'role' is already the merged string | |||||
| 334 | 2 2 | 51 4 | for my $c (@{$contacts}) { | |||
| 335 | 2 | 4 | printf " %s (%s)\n", $c->{address}, $c->{role}; | |||
| 336 | } | |||||
| 337 | ||||||
| 338 | # Web-form contacts -- providers that do not accept email | |||||
| 339 | 2 | 4 | my @form_cs = $inv->form_contacts(); | |||
| 340 | 2 | 3 | if (@form_cs) { | |||
| 341 | 0 | 0 | print "\n$bar\n"; | |||
| 342 | 0 | 0 | print " MANUAL ACTION REQUIRED -- WEB FORM SUBMISSION\n"; | |||
| 343 | 0 | 0 | print " The following parties do not accept email abuse reports.\n"; | |||
| 344 | 0 | 0 | print " Open each URL in a browser and complete the form as instructed.\n"; | |||
| 345 | 0 | 0 | print "$bar\n\n"; | |||
| 346 | 0 | 0 | for my $c (@form_cs) { | |||
| 347 | 0 | 0 | printf " Role : %s\n", $c->{role}; | |||
| 348 | 0 | 0 | printf " Form URL : %s\n", $c->{form}; | |||
| 349 | 0 | 0 | printf " Domain : %s\n", $c->{form_domain} if $c->{form_domain}; | |||
| 350 | 0 | 0 | if ($c->{form_paste}) { | |||
| 351 | 0 | 0 | printf " Paste : %s\n", $c->{form_paste}; | |||
| 352 | } | |||||
| 353 | 0 | 0 | if ($c->{form_upload}) { | |||
| 354 | 0 | 0 | printf " Upload : %s\n", $c->{form_upload}; | |||
| 355 | } | |||||
| 356 | 0 | 0 | print "\n"; | |||
| 357 | } | |||||
| 358 | } | |||||
| 359 | ||||||
| 360 | 2 | 4 | print "$bar\n"; | |||
| 361 | ||||||
| 362 | 2 | 5 | _print_unresolved($inv, \*STDOUT); | |||
| 363 | } | |||||
| 364 | ||||||
| 365 | # ----------------------------------------------------------------------- | |||||
| 366 | # _read_tty() -> string | undef | |||||
| 367 | # | |||||
| 368 | # Read a single line from the controlling terminal. Extracted into its | |||||
| 369 | # own sub so tests can override it without spawning a pseudo-terminal. | |||||
| 370 | # Returns the chomped input string, or undef if /dev/tty is unavailable. | |||||
| 371 | # ----------------------------------------------------------------------- | |||||
| 372 | sub _read_tty { | |||||
| 373 | 0 | 0 | my $tty; | |||
| 374 | 0 | 0 | unless (open $tty, '<', '/dev/tty') { | |||
| 375 | 0 | 0 | warn "Cannot open /dev/tty for interactive prompt; skipping\n"; | |||
| 376 | 0 | 0 | return undef; | |||
| 377 | } | |||||
| 378 | 0 | 0 | my $ans = <$tty>; | |||
| 379 | 0 | 0 | close $tty; | |||
| 380 | 0 | 0 | chomp $ans if defined $ans; | |||
| 381 | 0 | 0 | return $ans; | |||
| 382 | } | |||||
| 383 | ||||||
| 384 | # ----------------------------------------------------------------------- | |||||
| 385 | # _send_report( %args ) -> bool | |||||
| 386 | # | |||||
| 387 | # Send a single ARF-compliant abuse report via SMTP. | |||||
| 388 | # Returns 1 on success, 0 on failure. | |||||
| 389 | # Errors are printed to STDERR; the caller continues to the next recipient. | |||||
| 390 | # | |||||
| 391 | # Envelope sender is the null reverse-path (<>) per RFC 6650 s.3, which | |||||
| 392 | # requires this to prevent mail loops and DSN storms. The From: header | |||||
| 393 | # still carries the reporter's address for human reply purposes. | |||||
| 394 | # ----------------------------------------------------------------------- | |||||
| 395 | sub _send_report { | |||||
| 396 | 0 | 0 | my (%a) = @_; | |||
| 397 | ||||||
| 398 | 0 | 0 | my $date = strftime('%a, %d %b %Y %H:%M:%S +0000', gmtime); | |||
| 399 | my $msg_id = sprintf '<%s.%d@%s>', | |||||
| 400 | strftime('%Y%m%d%H%M%S', gmtime), | |||||
| 401 | $$, | |||||
| 402 | 0 0 0 | 0 0 0 | do { (my $h = $a{from}) =~ s/.*\@//; $h }; | |||
| 403 | ||||||
| 404 | my $mail = _build_mime_message( | |||||
| 405 | date => $date, | |||||
| 406 | from => $a{from}, | |||||
| 407 | to => $a{to}, | |||||
| 408 | bcc => $a{bcc}, | |||||
| 409 | subject => $a{subject}, | |||||
| 410 | msg_id => $msg_id, | |||||
| 411 | body => $a{body}, | |||||
| 412 | original => $a{original}, | |||||
| 413 | inv => $a{inv}, | |||||
| 414 | 0 | 0 | ); | |||
| 415 | ||||||
| 416 | # Debug: if SUBMIT_ABUSE_DUMP_MIME is set, write the raw MIME to a file | |||||
| 417 | 0 | 0 | if (my $dump = $ENV{SUBMIT_ABUSE_DUMP_MIME}) { | |||
| 418 | 0 | 0 | if (open my $fh, '>', $dump) { | |||
| 419 | 0 | 0 | print $fh $mail; | |||
| 420 | 0 | 0 | close $fh; | |||
| 421 | 0 | 0 | warn "MIME message written to $dump\n"; | |||
| 422 | } | |||||
| 423 | } | |||||
| 424 | ||||||
| 425 | 0 | 0 | my $smtp = eval { | |||
| 426 | Net::SMTP->new( | |||||
| 427 | $a{smtp_host}, | |||||
| 428 | Port => $a{smtp_port}, | |||||
| 429 | Timeout => $a{timeout}, | |||||
| 430 | 0 | 0 | ); | |||
| 431 | }; | |||||
| 432 | ||||||
| 433 | 0 | 0 | unless ($smtp) { | |||
| 434 | 0 | 0 | warn "SMTP connect to $a{smtp_host}:$a{smtp_port} failed: $@\n"; | |||
| 435 | 0 | 0 | return 0; | |||
| 436 | } | |||||
| 437 | ||||||
| 438 | 0 | 0 | my $ok = eval { | |||
| 439 | # Null reverse-path per RFC 6650 s.3 -- prevents DSN/mail loops | |||||
| 440 | 0 | 0 | $smtp->mail( '' ) or die "MAIL FROM failed\n"; | |||
| 441 | 0 | 0 | $smtp->to( $a{to} ) or die "RCPT TO failed\n"; | |||
| 442 | $smtp->to( $a{bcc} ) or die "RCPT TO (bcc) failed\n" | |||||
| 443 | 0 | 0 | if $a{bcc}; | |||
| 444 | 0 | 0 | $smtp->data() or die "DATA failed\n"; | |||
| 445 | 0 | 0 | $smtp->datasend($mail) or die "datasend failed\n"; | |||
| 446 | 0 | 0 | $smtp->dataend() or die "dataend failed\n"; | |||
| 447 | 0 | 0 | $smtp->quit(); | |||
| 448 | 0 | 0 | 1; | |||
| 449 | }; | |||||
| 450 | ||||||
| 451 | 0 | 0 | unless ($ok) { | |||
| 452 | 0 | 0 | my $err = $@ || 'unknown SMTP error'; | |||
| 453 | 0 | 0 | $err =~ s/\s+$//; | |||
| 454 | 0 | 0 | warn "SMTP error sending to $a{to}: $err\n"; | |||
| 455 | 0 | 0 | return 0; | |||
| 456 | } | |||||
| 457 | ||||||
| 458 | 0 | 0 | return 1; | |||
| 459 | } | |||||
| 460 | ||||||
| 461 | # ----------------------------------------------------------------------- | |||||
| 462 | # _build_feedback_report( $inv ) -> string | |||||
| 463 | # | |||||
| 464 | # Constructs the body of the message/feedback-report MIME part (Part 2) | |||||
| 465 | # as defined in RFC 5965 s.3. The returned string uses CRLF line endings | |||||
| 466 | # and contains only 7-bit ASCII, as required by the RFC. | |||||
| 467 | # | |||||
| 468 | # Fields included: | |||||
| 469 | # Feedback-Type -- always "abuse" | |||||
| 470 | # User-Agent -- module name and version | |||||
| 471 | # Version -- always "1" (RFC 5965 version) | |||||
| 472 | # Source-IP -- originating IP from Received: chain analysis | |||||
| 473 | # Original-Mail-From -- Return-Path: or From: of the spam | |||||
| 474 | # Original-Rcpt-To -- To: of the spam | |||||
| 475 | # Arrival-Date -- Date: header of the spam (as received) | |||||
| 476 | # Reported-Domain -- primary contact domain (first of all_domains()) | |||||
| 477 | # Reported-Uri -- each HTTP/HTTPS URL found in the spam body | |||||
| 478 | # Authentication-Results -- forwarded from the spam's own header | |||||
| 479 | # ----------------------------------------------------------------------- | |||||
| 480 | sub _build_feedback_report { | |||||
| 481 | 2 | 4 | my ($inv) = @_; | |||
| 482 | ||||||
| 483 | 2 | 1 | my @fields; | |||
| 484 | ||||||
| 485 | # Required fields (RFC 5965 s.3.1) | |||||
| 486 | 2 | 3 | push @fields, 'Feedback-Type: abuse'; | |||
| 487 | 2 | 4 | push @fields, 'User-Agent: Email::Abuse::Investigator/' | |||
| 488 | . $Email::Abuse::Investigator::VERSION; | |||||
| 489 | 2 | 2 | push @fields, 'Version: 1'; | |||
| 490 | ||||||
| 491 | # Source-IP -- the identified originating address | |||||
| 492 | 2 | 5 | my $orig = $inv->originating_ip(); | |||
| 493 | 2 | 3 | push @fields, "Source-IP: $orig->{ip}" if $orig && $orig->{ip}; | |||
| 494 | ||||||
| 495 | # Original-Mail-From -- envelope sender of the spam | |||||
| 496 | # Prefer Return-Path (true envelope sender); fall back to From: | |||||
| 497 | 2 | 4 | my $mail_from = $inv->_header_value('return-path') | |||
| 498 | // $inv->_header_value('from') | |||||
| 499 | // ''; | |||||
| 500 | 2 | 15 | $mail_from =~ s/^\s*<?\s*|\s*>?\s*$//g; # strip angle brackets and whitespace | |||
| 501 | 2 | 5 | push @fields, "Original-Mail-From: <$mail_from>" if $mail_from; | |||
| 502 | ||||||
| 503 | # Original-Rcpt-To -- envelope recipient(s) of the spam | |||||
| 504 | # Use the To: header; for each distinct address found | |||||
| 505 | 2 | 2 | my $rcpt_to = $inv->_header_value('to') // ''; | |||
| 506 | # Extract individual addresses (bare addr@domain or <addr@domain>) | |||||
| 507 | 2 | 2 | my %rcpt_seen; | |||
| 508 | 2 | 10 | while ($rcpt_to =~ /<?([^\s<>,;]+\@[\w.-]+)>?/g) { | |||
| 509 | 2 | 3 | my $addr = lc $1; | |||
| 510 | 2 | 9 | push @fields, "Original-Rcpt-To: <$addr>" unless $rcpt_seen{$addr}++; | |||
| 511 | } | |||||
| 512 | ||||||
| 513 | # Arrival-Date -- when we (the reporter) received the spam | |||||
| 514 | 2 | 3 | my $arrival = $inv->_header_value('date') // ''; | |||
| 515 | 2 | 4 | push @fields, "Arrival-Date: $arrival" if $arrival; | |||
| 516 | ||||||
| 517 | # Reported-Domain -- the primary contact domain | |||||
| 518 | 2 | 5 | my ($rdomain) = $inv->all_domains(); | |||
| 519 | 2 | 4 | push @fields, "Reported-Domain: $rdomain" if $rdomain; | |||
| 520 | ||||||
| 521 | # Reported-Uri -- each distinct URL in the spam body | |||||
| 522 | 2 | 2 | my %uri_seen; | |||
| 523 | 2 | 2 | for my $u ($inv->embedded_urls()) { | |||
| 524 | push @fields, "Reported-Uri: $u->{url}" | |||||
| 525 | 2 | 10 | unless $uri_seen{ $u->{url} }++; | |||
| 526 | } | |||||
| 527 | ||||||
| 528 | # Authentication-Results -- forwarded verbatim from the spam headers | |||||
| 529 | 2 | 4 | my $auth_res = $inv->_header_value('authentication-results') // ''; | |||
| 530 | 2 | 3 | push @fields, "Authentication-Results: $auth_res" if $auth_res; | |||
| 531 | ||||||
| 532 | 2 | 6 | return join("\r\n", @fields) . "\r\n"; | |||
| 533 | } | |||||
| 534 | ||||||
| 535 | # ----------------------------------------------------------------------- | |||||
| 536 | # _build_mime_message( %args ) -> string | |||||
| 537 | # | |||||
| 538 | # Constructs a fully RFC 5965 compliant multipart/report MIME message | |||||
| 539 | # suitable for transmission via Net::SMTP->datasend(). | |||||
| 540 | # | |||||
| 541 | # Three-part structure per RFC 5965 s.2: | |||||
| 542 | # Part 1 text/plain; charset=UTF-8 -- human-readable abuse report | |||||
| 543 | # Part 2 message/feedback-report -- ARF machine-readable metadata | |||||
| 544 | # Part 3 message/rfc822 -- original spam message verbatim | |||||
| 545 | # | |||||
| 546 | # All line endings in the returned string are CRLF (\r\n). | |||||
| 547 | # Part 2 uses 7bit encoding (required by RFC 5965 s.3). | |||||
| 548 | # ----------------------------------------------------------------------- | |||||
| 549 | sub _build_mime_message { | |||||
| 550 | 0 | 0 | my (%a) = @_; | |||
| 551 | ||||||
| 552 | # Boundary unique per message; must not appear in any part body | |||||
| 553 | 0 | 0 | my $boundary = sprintf 'arf_report_%s_%d', | |||
| 554 | strftime('%Y%m%d%H%M%S', gmtime), $$; | |||||
| 555 | ||||||
| 556 | # Normalise line endings to CRLF throughout, then encode to raw UTF-8 | |||||
| 557 | # bytes. Net::SMTP->datasend() calls syswrite() on a raw socket and | |||||
| 558 | # cannot handle Perl strings with the Unicode flag set (wide characters). | |||||
| 559 | # The body may contain non-ASCII characters (e.g. emoji in decoded subject | |||||
| 560 | # lines) so we must encode explicitly rather than rely on the socket layer. | |||||
| 561 | 0 | 0 | (my $body_crlf = $a{body}) =~ s/\r?\n/\r\n/g; | |||
| 562 | 0 | 0 | my $original_crlf = ''; | |||
| 563 | 0 0 | 0 0 | if (defined $a{original} && length ${ $a{original} }) { | |||
| 564 | 0 0 | 0 0 | ($original_crlf = ${ $a{original} }) =~ s/\r?\n/\r\n/g; | |||
| 565 | } | |||||
| 566 | 0 | 0 | my $feedback_report = _build_feedback_report($a{inv}); | |||
| 567 | ||||||
| 568 | # ---- Outer envelope headers ---- | |||||
| 569 | 0 | 0 | my @msg; | |||
| 570 | 0 | 0 | push @msg, "Date: $a{date}"; | |||
| 571 | 0 | 0 | push @msg, "From: $a{from}"; | |||
| 572 | 0 | 0 | push @msg, "To: $a{to}"; | |||
| 573 | 0 | 0 | push @msg, "Subject: $a{subject}"; | |||
| 574 | 0 | 0 | push @msg, "Message-ID: $a{msg_id}"; | |||
| 575 | 0 | 0 | push @msg, "MIME-Version: 1.0"; | |||
| 576 | # multipart/report with report-type=feedback-report per RFC 5965 s.2 | |||||
| 577 | 0 | 0 | push @msg, "Content-Type: multipart/report;"; | |||
| 578 | 0 | 0 | push @msg, " report-type=feedback-report;"; | |||
| 579 | 0 | 0 | push @msg, " boundary=\"$boundary\""; | |||
| 580 | 0 | 0 | push @msg, "X-Mailer: Email::Abuse::Investigator submit_abuse_report"; | |||
| 581 | 0 | 0 | push @msg, "X-Report-Monitor: $a{bcc}" if $a{bcc}; | |||
| 582 | 0 | 0 | push @msg, ""; | |||
| 583 | ||||||
| 584 | # Preamble for non-MIME clients | |||||
| 585 | 0 | 0 | push @msg, "This is an ARF (Abuse Reporting Format) feedback report."; | |||
| 586 | 0 | 0 | push @msg, "See https://datatracker.ietf.org/doc/html/rfc5965"; | |||
| 587 | 0 | 0 | push @msg, ""; | |||
| 588 | ||||||
| 589 | # ---- Part 1: human-readable summary (RFC 5965 s.2 "first part") ---- | |||||
| 590 | 0 | 0 | push @msg, "--$boundary"; | |||
| 591 | 0 | 0 | push @msg, "Content-Type: text/plain; charset=UTF-8"; | |||
| 592 | 0 | 0 | push @msg, "Content-Transfer-Encoding: 8bit"; | |||
| 593 | 0 | 0 | push @msg, "Content-Disposition: inline"; | |||
| 594 | 0 | 0 | push @msg, ""; | |||
| 595 | 0 | 0 | push @msg, $body_crlf; | |||
| 596 | ||||||
| 597 | # ---- Part 2: ARF machine-readable metadata (RFC 5965 s.3) ---- | |||||
| 598 | 0 | 0 | push @msg, "--$boundary"; | |||
| 599 | 0 | 0 | push @msg, "Content-Type: message/feedback-report"; | |||
| 600 | 0 | 0 | push @msg, "Content-Transfer-Encoding: 7bit"; # required by RFC 5965 | |||
| 601 | 0 | 0 | push @msg, ""; | |||
| 602 | 0 | 0 | push @msg, $feedback_report; | |||
| 603 | ||||||
| 604 | # ---- Part 3: original spam message (RFC 5965 s.2 "third part") ---- | |||||
| 605 | # Use base64 encoding so Outlook and other clients reliably show | |||||
| 606 | # it as a downloadable .eml attachment rather than rendering it | |||||
| 607 | # inline or discarding it. | |||||
| 608 | 0 | 0 | my $original_b64 = MIME::Base64::encode_base64($original_crlf, "\r\n"); | |||
| 609 | 0 | 0 | push @msg, "--$boundary"; | |||
| 610 | 0 | 0 | push @msg, "Content-Type: application/octet-stream; name=\"original_message.eml.txt\""; | |||
| 611 | 0 | 0 | push @msg, "Content-Transfer-Encoding: base64"; | |||
| 612 | 0 | 0 | push @msg, "Content-Disposition: attachment;"; | |||
| 613 | 0 | 0 | push @msg, " filename=\"original_message.eml.txt\""; | |||
| 614 | 0 | 0 | push @msg, "Content-Description: Original spam/phishing message"; | |||
| 615 | 0 | 0 | push @msg, ""; | |||
| 616 | 0 | 0 | push @msg, $original_b64; | |||
| 617 | ||||||
| 618 | # Closing boundary | |||||
| 619 | 0 | 0 | push @msg, "--${boundary}--"; | |||
| 620 | 0 | 0 | push @msg, ""; | |||
| 621 | ||||||
| 622 | 0 | 0 | require Encode; | |||
| 623 | 0 | 0 | my $result = join("\r\n", @msg); | |||
| 624 | 0 | 0 | return Encode::encode('UTF-8', $result); | |||
| 625 | } | |||||
| 626 | ||||||
| 627 | # ----------------------------------------------------------------------- | |||||
| 628 | # _build_subject( $orig_hashref, $risk_hashref ) -> string | |||||
| 629 | # | |||||
| 630 | # Build a concise, informative subject line for the abuse report. | |||||
| 631 | # ----------------------------------------------------------------------- | |||||
| 632 | sub _build_subject { | |||||
| 633 | 2 | 3 | my ($orig, $risk) = @_; | |||
| 634 | ||||||
| 635 | 2 | 4 | my $ip_part = $orig ? $orig->{ip} : 'unknown origin'; | |||
| 636 | 2 | 4 | my $level = $risk->{level}; | |||
| 637 | 2 | 50 | my $date_part = strftime('%Y-%m-%d', gmtime); | |||
| 638 | ||||||
| 639 | 2 | 5 | return "Abuse report [$level]: spam/phishing from $ip_part ($date_part)"; | |||
| 640 | } | |||||
| 641 | ||||||