File Coverage

File:bin/submit_abuse_report
Coverage:48.8%

linestmtbrancondsubtimecode
1#!/usr/bin/env perl
2# -----------------------------------------------------------------------
3# submit_abuse_report -- analyse a spam/phishing email and send abuse
4#                           reports to all relevant parties.
5#
6# Usage:
7#   submit_abuse_report [options] spam.eml
8#   submit_abuse_report [options] < spam.eml
9#
10# Options:
11#   --dry-run              Show what would be sent and to whom; do not
12#                          actually connect to any mail server.
13#   -i, --interactive      Prompt for confirmation before each send.
14#   --bcc                  Send a copy to the --from address for monitoring.
15#   --bcc-to ADDRESS       Send a copy to an explicit address.
16#   --from ADDRESS         Envelope / From: address for the outgoing
17#                          reports (required unless --dry-run).
18#   --smtp HOST[:PORT]     SMTP relay to use (default: localhost:25).
19#   --trusted CIDR         Trusted relay CIDR to skip in Received: chain.
20#                          May be repeated for multiple relays.
21#   --timeout SECS         Network timeout in seconds (default: 15).
22#   --verbose              Print analysis progress to STDERR.
23#   --help                 Show this help text.
24#
25# Examples:
26#   # Dry run -- see what would be sent without sending anything
27#   submit_abuse_report --dry-run spam.eml
28#
29#   # Send reports via the local MTA
30#   submit_abuse_report --from postmaster@myisp.example spam.eml
31#
32#   # Send via a specific SMTP relay, skipping our own outbound IP
33#   submit_abuse_report \
34#       --from abuse-reporter@example.com \
35#       --smtp mail.example.com:587 \
36#       --trusted 203.0.113.0/24 \
37#       spam.eml
38# -----------------------------------------------------------------------
39
6
6
12283
8
use 5.010;
40
6
6
6
12
5
53
use strict;
41
6
6
6
11
4
96
use warnings;
42
6
6
6
995
627
12
use utf8;
43
6
6
6
1113
3085
14
use open qw(:std :encoding(UTF-8));
44
45
6
6
6
49698
27818
10
use Getopt::Long qw(GetOptions);
46
6
6
6
1719
115836
210
use Pod::Usage   qw(pod2usage);
47
6
6
6
1358
290753
217
use Net::SMTP;
48
6
6
6
1182
1883
197
use MIME::Base64 qw(encode_base64);
49
6
6
6
15
11
17
use POSIX        qw(strftime);
50
6
6
6
182
5
156
use File::Basename qw(basename);
51
6
6
6
1048
1691
17
use lib '.';     # find Email/Abuse/Investigator.pm in development tree
52
6
6
6
2685
12
321459
use Email::Abuse::Investigator;
53
54# -----------------------------------------------------------------------
55# Command-line options
56# -----------------------------------------------------------------------
57
6
179371
my $dry_run     = 0;
58
6
8
my $interactive = 0;
59
6
5
my $bcc      = 0;      # --bcc flag: copy to --from address
60
6
8
my $bcc_to   = '';     # --bcc-to ADDRESS: copy to explicit address
61
6
7
my $from     = '';
62
6
6
my $smtp_arg = 'localhost:25';
63
6
5
my $timeout  = 15;
64
6
6
my $verbose  = 0;
65
6
6
my $help     = 0;
66
6
6
my @trusted;
67
68
6
22
GetOptions(
69    'dry-run'    => \$dry_run,
70    'interactive|i' => \$interactive,
71    'bcc'        => \$bcc,
72    'bcc-to=s'   => \$bcc_to,
73    'from=s'     => \$from,
74    'smtp=s'     => \$smtp_arg,
75    'trusted=s'  => \@trusted,
76    'timeout=i'  => \$timeout,
77    'verbose'    => \$verbose,
78    'help'       => \$help,
79) or pod2usage(2);
80
81
6
3236
pod2usage( -exitval => 0, -verbose => 2 ) if $help;
82
83
4
7
unless ($dry_run || $from) {
84
0
0
    die basename($0) . ": --from ADDRESS is required unless --dry-run is used.\n"
85      . "Run with --help for usage.\n";
86}
87
88
4
7
if ($from && $from !~ /\@/) {
89
0
0
    die basename($0) . ": --from value '$from' does not look like an email address.\n";
90}
91
92
4
7
my ($smtp_host, $smtp_port) = split /:/, $smtp_arg, 2;
93
94# Resolve BCC address.
95# --bcc alone copies to --from; --bcc-to ADDRESS uses an explicit address.
96
4
4
my $bcc_addr = undef;
97
4
9
if ($bcc_to) {
98
0
0
    die basename($0) . ": --bcc-to value '$bcc_to' does not look like an email address.\n"
99        unless $bcc_to =~ /\@/;
100
0
0
    $bcc_addr = $bcc_to;
101} elsif ($bcc) {
102
0
0
    die basename($0) . ": --bcc requires --from to be set.\n"
103        unless $from;
104
0
0
    $bcc_addr = $from;
105}
106
4
6
$smtp_port //= 25;
107
108# -----------------------------------------------------------------------
109# Read the raw email
110# -----------------------------------------------------------------------
111
4
4
my $raw;
112
4
5
if (@ARGV) {
113
4
66
        open my $fh, '<:raw', $ARGV[0] or die basename($0), ": cannot open '$ARGV[0]': $!";
114
4
8
        local $/;
115
4
59
        $raw = <$fh>;
116
4
21
        close $fh;
117} else {
118
0
0
        binmode STDIN, ':raw';
119
0
0
        local $/;
120
0
0
        $raw = <STDIN>;
121}
122
123
4
13
die basename($0) . ": no email data supplied.\n"
124    unless defined $raw && length $raw;
125
126# -----------------------------------------------------------------------
127# Analyse
128# -----------------------------------------------------------------------
129
4
6
print STDERR "Analysing message...\n" if $verbose;
130
131
4
18
my $inv = Email::Abuse::Investigator->new(
132    timeout        => $timeout,
133    trusted_relays => \@trusted,
134    verbose        => $verbose,
135);
136
4
11
$inv->parse_email(\$raw);
137
138
4
9
my @contacts = $inv->abuse_contacts();
139
140
4
9
unless (@contacts) {
141
2
8
    print STDERR "No abuse contacts could be determined.  Nothing to send.\n";
142
143    # List any domains and URL hosts that were found but could not be
144    # resolved to an abuse contact, so the user knows where to look.
145
2
7
    _print_unresolved($inv, \*STDERR);
146
2
37
    exit 0;
147}
148
149# -----------------------------------------------------------------------
150# _print_unresolved( $inv, $fh )
151#
152# Print a list of domains and URL hosts found in the message that could
153# not be resolved to an abuse contact, so the user knows where to look.
154# -----------------------------------------------------------------------
155sub _print_unresolved {
156
4
5
    my ($inv, $fh) = @_;
157
158    # Delegate to the module's unresolved_contacts() method which handles
159    # all the filtering logic (spoofable headers, already-covered domains).
160
4
11
    my @unresolved = $inv->unresolved_contacts();
161
4
9
    return unless @unresolved;
162
163
4
9
    print $fh "\nDomains found in this message with no abuse contact determined\n";
164
4
5
    print $fh "(consider investigating or reporting manually):\n\n";
165
4
7
    for my $u (@unresolved) {
166
4
9
        if ($u->{type} eq 'url_host') {
167
3
6
            print $fh "  URL host : $u->{domain}\n";
168        } else {
169
1
2
            print $fh "  Domain   : $u->{domain}\n";
170        }
171    }
172
4
9
    print $fh "\n";
173}
174
175
2
6
my $report_text = $inv->abuse_report_text();
176
177# Append a note to the human-readable part so recipients know the
178# original message is attached and why.
179
2
3
$report_text .= join("\n",
180    "",
181    "-" x 72,
182    "The original spam/phishing message is attached below as a",
183    "message/rfc822 MIME part.  Please use the full Received: headers",
184    "to locate the relevant SMTP session in your mail logs.",
185    "-" x 72,
186    "");
187
188
2
3
my $orig        = $inv->originating_ip();
189
2
3
my $risk        = $inv->risk_assessment();
190
191# Build a consistent subject line for all outgoing reports
192
2
5
my $subject = _build_subject($orig, $risk);
193
194# -----------------------------------------------------------------------
195# Dry run: describe what would be sent
196# -----------------------------------------------------------------------
197
2
2
if ($dry_run) {
198
2
7
    _dry_run_report(\@contacts, $subject, $report_text, \$raw, $inv, $bcc_addr);
199
2
42
    exit 0;
200}
201
202# -----------------------------------------------------------------------
203# Live run: send one email per unique abuse contact
204# -----------------------------------------------------------------------
205
0
0
my $sent  = 0;
206
0
0
my $failed = 0;
207
208
0
0
for my $contact (@contacts) {
209
0
0
    my $to   = $contact->{address};
210
0
0
    my $role = $contact->{role};
211
212
0
0
    if ($interactive) {
213
0
0
        printf "\nSend abuse report to: %s\n", $to;
214
0
0
        printf "Reason             : %s\n", $role;
215
0
0
        print  "Send? [y/N] ";
216
0
0
        my $ans = _read_tty();
217
0
0
        unless (defined $ans && lc($ans) eq 'y') {
218
0
0
            printf "Skipped: %-45s  %s\n", $to, $role;
219
0
0
            next;
220        }
221    }
222
223
0
0
    print STDERR "Sending to $to ($role)...\n" if $verbose;
224
225
0
0
    my $ok = _send_report(
226        smtp_host   => $smtp_host,
227        smtp_port   => $smtp_port,
228        from        => $from,
229        to          => $to,
230        bcc         => $bcc_addr,
231        subject     => $subject,
232        body        => $report_text,
233        original    => \$raw,
234        inv         => $inv,
235        timeout     => $timeout,
236    );
237
238
0
0
    if ($ok) {
239
0
0
        printf "Sent   : %-45s  %s\n", $to, $role;
240
0
0
        $sent++;
241    } else {
242
0
0
        printf "FAILED : %-45s  %s\n", $to, $role;
243
0
0
        $failed++;
244    }
245}
246
247
0
0
printf "\nDone: %d sent, %d failed.\n", $sent, $failed;
248
0
0
_print_unresolved($inv, \*STDOUT);
249
0
0
exit( $failed ? 1 : 0 );
250
251# -----------------------------------------------------------------------
252# _dry_run_report( \@contacts, $subject, $report_text, \$raw, $inv )
253#
254# Print a full description of what would be sent without sending anything.
255# -----------------------------------------------------------------------
256sub _dry_run_report {
257
2
4
    my ($contacts, $subject, $body, $raw_ref, $inv, $bcc) = @_;
258
259
2
2
    my $bar = '=' x 72;
260
2
10
    print "$bar\n";
261
2
2
    print "  DRY RUN -- no email will be sent\n";
262
2
3
    print "$bar\n\n";
263
264
2
6
    printf "Subject  : %s\n\n", $subject;
265
2
2
    print  "Envelope sender: <> (null reverse-path per RFC 6650 s.3)\n";
266
2
4
    printf "BCC copy to    : %s\n", $bcc if $bcc;
267
2
2
    print  "\n";
268
269
2
5
    printf "%-3s  %-45s  %s\n", '#', 'Recipient', 'Role';
270
2
5
    printf "%-3s  %-45s  %s\n", '-' x 3, '-' x 45, '-' x 24;
271
272
2
2
    my $n = 1;
273
2
2
1
57
    for my $c (@{$contacts}) {
274
2
6
        printf "%-3d  %-45s  %s\n", $n++, $c->{address}, $c->{role};
275        printf "     via: %-42s  note: %s\n",
276
2
5
            $c->{via}, ($c->{note} || '');
277
2
3
        print "\n";
278    }
279
280    # Part 1
281
2
4
    print "$bar\n";
282
2
1
    print "  PART 1: HUMAN-READABLE REPORT (text/plain)\n";
283
2
2
    print "  (identical text sent to every recipient above)\n";
284
2
3
    print "$bar\n\n";
285
2
48
    print $body;
286
287    # Part 2: feedback-report fields
288
2
4
    print "\n$bar\n";
289
2
2
    print "  PART 2: ARF METADATA (message/feedback-report)\n";
290
2
6
    print "  RFC 5965 s.3 -- machine-readable fields for automated processing\n";
291
2
3
    print "$bar\n\n";
292
2
5
    my $fbr = _build_feedback_report($inv);
293
2
17
    print "  $_\n" for split /\r?\n/, $fbr;
294
295    # Part 3: original message preview
296
2
3
    print "\n$bar\n";
297
2
3
    print "  PART 3: ORIGINAL MESSAGE (message/rfc822)\n";
298
2
2
    print "  RFC 5965 s.2 -- verbatim original; MUST be included\n";
299
2
3
    print "$bar\n";
300
2
6
    if (defined $raw_ref && length $$raw_ref) {
301
2
9
        my @lines = split /\r?\n/, $$raw_ref;
302
2
2
        my $preview = 20;
303
2
2
        my $total   = scalar @lines;
304
2
6
        printf "  [Showing first %d of %d lines]\n\n",
305            ($total < $preview ? $total : $preview), $total;
306
2
25
        print "  $_\n" for @lines[ 0 .. ($total < $preview ? $total - 1 : $preview - 1) ];
307
2
5
        print "  ...\n" if $total > $preview;
308    } else {
309
0
0
        print "  (no original message available)\n";
310    }
311
312
2
3
    print "\n$bar\n";
313
314    # abuse_contacts() now merges duplicate addresses, so @contacts has one
315    # entry per unique address with 'role' already set to the combined string.
316    # Count the total discovery routes (sum of all roles arrays) to show
317    # in the annotation when merging occurred.
318
2
2
2
3
    my $n_contacts = scalar @{$contacts};
319
2
4
    my $n_routes   = 0;
320
2
2
2
2
3
4
    $n_routes += scalar @{ $_->{roles} // [$_->{role}] } for @{$contacts};
321
322
2
3
    if ($n_routes > $n_contacts) {
323        # Some addresses cover more than one discovery route -- note the merge
324
0
0
        printf "  Total: %d recipient%s (%d contact route%s merged)\n",
325            $n_contacts, $n_contacts == 1 ? '' : 's',
326            $n_routes,   $n_routes   == 1 ? '' : 's';
327    } else {
328
2
7
        printf "  Total: %d recipient%s\n",
329            $n_contacts, $n_contacts == 1 ? '' : 's';
330    }
331
2
3
    print "\n";
332
333    # One line per recipient; 'role' is already the merged string
334
2
2
51
4
    for my $c (@{$contacts}) {
335
2
4
        printf "  %s (%s)\n", $c->{address}, $c->{role};
336    }
337
338    # Web-form contacts -- providers that do not accept email
339
2
4
    my @form_cs = $inv->form_contacts();
340
2
3
    if (@form_cs) {
341
0
0
        print "\n$bar\n";
342
0
0
        print "  MANUAL ACTION REQUIRED -- WEB FORM SUBMISSION\n";
343
0
0
        print "  The following parties do not accept email abuse reports.\n";
344
0
0
        print "  Open each URL in a browser and complete the form as instructed.\n";
345
0
0
        print "$bar\n\n";
346
0
0
        for my $c (@form_cs) {
347
0
0
            printf "  Role     : %s\n", $c->{role};
348
0
0
            printf "  Form URL : %s\n", $c->{form};
349
0
0
            printf "  Domain   : %s\n", $c->{form_domain} if $c->{form_domain};
350
0
0
            if ($c->{form_paste}) {
351
0
0
                printf "  Paste    : %s\n", $c->{form_paste};
352            }
353
0
0
            if ($c->{form_upload}) {
354
0
0
                printf "  Upload   : %s\n", $c->{form_upload};
355            }
356
0
0
            print "\n";
357        }
358    }
359
360
2
4
    print "$bar\n";
361
362
2
5
    _print_unresolved($inv, \*STDOUT);
363}
364
365# -----------------------------------------------------------------------
366# _read_tty() -> string | undef
367#
368# Read a single line from the controlling terminal.  Extracted into its
369# own sub so tests can override it without spawning a pseudo-terminal.
370# Returns the chomped input string, or undef if /dev/tty is unavailable.
371# -----------------------------------------------------------------------
372sub _read_tty {
373
0
0
    my $tty;
374
0
0
    unless (open $tty, '<', '/dev/tty') {
375
0
0
        warn "Cannot open /dev/tty for interactive prompt; skipping\n";
376
0
0
        return undef;
377    }
378
0
0
    my $ans = <$tty>;
379
0
0
    close $tty;
380
0
0
    chomp $ans if defined $ans;
381
0
0
    return $ans;
382}
383
384# -----------------------------------------------------------------------
385# _send_report( %args ) -> bool
386#
387# Send a single ARF-compliant abuse report via SMTP.
388# Returns 1 on success, 0 on failure.
389# Errors are printed to STDERR; the caller continues to the next recipient.
390#
391# Envelope sender is the null reverse-path (<>) per RFC 6650 s.3, which
392# requires this to prevent mail loops and DSN storms.  The From: header
393# still carries the reporter's address for human reply purposes.
394# -----------------------------------------------------------------------
395sub _send_report {
396
0
0
    my (%a) = @_;
397
398
0
0
    my $date = strftime('%a, %d %b %Y %H:%M:%S +0000', gmtime);
399    my $msg_id = sprintf '<%s.%d@%s>',
400        strftime('%Y%m%d%H%M%S', gmtime),
401        $$,
402
0
0
0
0
0
0
        do { (my $h = $a{from}) =~ s/.*\@//; $h };
403
404    my $mail = _build_mime_message(
405        date     => $date,
406        from     => $a{from},
407        to       => $a{to},
408        bcc      => $a{bcc},
409        subject  => $a{subject},
410        msg_id   => $msg_id,
411        body     => $a{body},
412        original => $a{original},
413        inv      => $a{inv},
414
0
0
    );
415
416    # Debug: if SUBMIT_ABUSE_DUMP_MIME is set, write the raw MIME to a file
417
0
0
    if (my $dump = $ENV{SUBMIT_ABUSE_DUMP_MIME}) {
418
0
0
        if (open my $fh, '>', $dump) {
419
0
0
            print $fh $mail;
420
0
0
            close $fh;
421
0
0
            warn "MIME message written to $dump\n";
422        }
423    }
424
425
0
0
    my $smtp = eval {
426        Net::SMTP->new(
427            $a{smtp_host},
428            Port    => $a{smtp_port},
429            Timeout => $a{timeout},
430
0
0
        );
431    };
432
433
0
0
    unless ($smtp) {
434
0
0
        warn "SMTP connect to $a{smtp_host}:$a{smtp_port} failed: $@\n";
435
0
0
        return 0;
436    }
437
438
0
0
    my $ok = eval {
439        # Null reverse-path per RFC 6650 s.3 -- prevents DSN/mail loops
440
0
0
        $smtp->mail( '' )         or die "MAIL FROM failed\n";
441
0
0
        $smtp->to(   $a{to} )     or die "RCPT TO failed\n";
442        $smtp->to(   $a{bcc} )    or die "RCPT TO (bcc) failed\n"
443
0
0
            if $a{bcc};
444
0
0
        $smtp->data()             or die "DATA failed\n";
445
0
0
        $smtp->datasend($mail)    or die "datasend failed\n";
446
0
0
        $smtp->dataend()          or die "dataend failed\n";
447
0
0
        $smtp->quit();
448
0
0
        1;
449    };
450
451
0
0
    unless ($ok) {
452
0
0
        my $err = $@ || 'unknown SMTP error';
453
0
0
        $err =~ s/\s+$//;
454
0
0
        warn "SMTP error sending to $a{to}: $err\n";
455
0
0
        return 0;
456    }
457
458
0
0
    return 1;
459}
460
461# -----------------------------------------------------------------------
462# _build_feedback_report( $inv ) -> string
463#
464# Constructs the body of the message/feedback-report MIME part (Part 2)
465# as defined in RFC 5965 s.3.  The returned string uses CRLF line endings
466# and contains only 7-bit ASCII, as required by the RFC.
467#
468# Fields included:
469#   Feedback-Type        -- always "abuse"
470#   User-Agent           -- module name and version
471#   Version              -- always "1" (RFC 5965 version)
472#   Source-IP            -- originating IP from Received: chain analysis
473#   Original-Mail-From   -- Return-Path: or From: of the spam
474#   Original-Rcpt-To     -- To: of the spam
475#   Arrival-Date         -- Date: header of the spam (as received)
476#   Reported-Domain      -- primary contact domain (first of all_domains())
477#   Reported-Uri         -- each HTTP/HTTPS URL found in the spam body
478#   Authentication-Results -- forwarded from the spam's own header
479# -----------------------------------------------------------------------
480sub _build_feedback_report {
481
2
4
    my ($inv) = @_;
482
483
2
1
    my @fields;
484
485    # Required fields (RFC 5965 s.3.1)
486
2
3
    push @fields, 'Feedback-Type: abuse';
487
2
4
    push @fields, 'User-Agent: Email::Abuse::Investigator/'
488                . $Email::Abuse::Investigator::VERSION;
489
2
2
    push @fields, 'Version: 1';
490
491    # Source-IP -- the identified originating address
492
2
5
    my $orig = $inv->originating_ip();
493
2
3
    push @fields, "Source-IP: $orig->{ip}" if $orig && $orig->{ip};
494
495    # Original-Mail-From -- envelope sender of the spam
496    # Prefer Return-Path (true envelope sender); fall back to From:
497
2
4
    my $mail_from = $inv->_header_value('return-path')
498                 // $inv->_header_value('from')
499                 // '';
500
2
15
    $mail_from =~ s/^\s*<?\s*|\s*>?\s*$//g;   # strip angle brackets and whitespace
501
2
5
    push @fields, "Original-Mail-From: <$mail_from>" if $mail_from;
502
503    # Original-Rcpt-To -- envelope recipient(s) of the spam
504    # Use the To: header; for each distinct address found
505
2
2
    my $rcpt_to = $inv->_header_value('to') // '';
506    # Extract individual addresses (bare addr@domain or <addr@domain>)
507
2
2
    my %rcpt_seen;
508
2
10
    while ($rcpt_to =~ /<?([^\s<>,;]+\@[\w.-]+)>?/g) {
509
2
3
        my $addr = lc $1;
510
2
9
        push @fields, "Original-Rcpt-To: <$addr>" unless $rcpt_seen{$addr}++;
511    }
512
513    # Arrival-Date -- when we (the reporter) received the spam
514
2
3
    my $arrival = $inv->_header_value('date') // '';
515
2
4
    push @fields, "Arrival-Date: $arrival" if $arrival;
516
517    # Reported-Domain -- the primary contact domain
518
2
5
    my ($rdomain) = $inv->all_domains();
519
2
4
    push @fields, "Reported-Domain: $rdomain" if $rdomain;
520
521    # Reported-Uri -- each distinct URL in the spam body
522
2
2
    my %uri_seen;
523
2
2
    for my $u ($inv->embedded_urls()) {
524        push @fields, "Reported-Uri: $u->{url}"
525
2
10
            unless $uri_seen{ $u->{url} }++;
526    }
527
528    # Authentication-Results -- forwarded verbatim from the spam headers
529
2
4
    my $auth_res = $inv->_header_value('authentication-results') // '';
530
2
3
    push @fields, "Authentication-Results: $auth_res" if $auth_res;
531
532
2
6
    return join("\r\n", @fields) . "\r\n";
533}
534
535# -----------------------------------------------------------------------
536# _build_mime_message( %args ) -> string
537#
538# Constructs a fully RFC 5965 compliant multipart/report MIME message
539# suitable for transmission via Net::SMTP->datasend().
540#
541# Three-part structure per RFC 5965 s.2:
542#   Part 1  text/plain; charset=UTF-8  -- human-readable abuse report
543#   Part 2  message/feedback-report    -- ARF machine-readable metadata
544#   Part 3  message/rfc822             -- original spam message verbatim
545#
546# All line endings in the returned string are CRLF (\r\n).
547# Part 2 uses 7bit encoding (required by RFC 5965 s.3).
548# -----------------------------------------------------------------------
549sub _build_mime_message {
550
0
0
    my (%a) = @_;
551
552    # Boundary unique per message; must not appear in any part body
553
0
0
    my $boundary = sprintf 'arf_report_%s_%d',
554        strftime('%Y%m%d%H%M%S', gmtime), $$;
555
556    # Normalise line endings to CRLF throughout, then encode to raw UTF-8
557    # bytes.  Net::SMTP->datasend() calls syswrite() on a raw socket and
558    # cannot handle Perl strings with the Unicode flag set (wide characters).
559    # The body may contain non-ASCII characters (e.g. emoji in decoded subject
560    # lines) so we must encode explicitly rather than rely on the socket layer.
561
0
0
    (my $body_crlf     = $a{body}) =~ s/\r?\n/\r\n/g;
562
0
0
    my $original_crlf  = '';
563
0
0
0
0
    if (defined $a{original} && length ${ $a{original} }) {
564
0
0
0
0
        ($original_crlf = ${ $a{original} }) =~ s/\r?\n/\r\n/g;
565    }
566
0
0
    my $feedback_report = _build_feedback_report($a{inv});
567
568    # ---- Outer envelope headers ----
569
0
0
    my @msg;
570
0
0
    push @msg, "Date: $a{date}";
571
0
0
    push @msg, "From: $a{from}";
572
0
0
    push @msg, "To: $a{to}";
573
0
0
    push @msg, "Subject: $a{subject}";
574
0
0
    push @msg, "Message-ID: $a{msg_id}";
575
0
0
    push @msg, "MIME-Version: 1.0";
576    # multipart/report with report-type=feedback-report per RFC 5965 s.2
577
0
0
    push @msg, "Content-Type: multipart/report;";
578
0
0
    push @msg, "    report-type=feedback-report;";
579
0
0
    push @msg, "    boundary=\"$boundary\"";
580
0
0
    push @msg, "X-Mailer: Email::Abuse::Investigator submit_abuse_report";
581
0
0
    push @msg, "X-Report-Monitor: $a{bcc}" if $a{bcc};
582
0
0
    push @msg, "";
583
584    # Preamble for non-MIME clients
585
0
0
    push @msg, "This is an ARF (Abuse Reporting Format) feedback report.";
586
0
0
    push @msg, "See https://datatracker.ietf.org/doc/html/rfc5965";
587
0
0
    push @msg, "";
588
589    # ---- Part 1: human-readable summary (RFC 5965 s.2 "first part") ----
590
0
0
    push @msg, "--$boundary";
591
0
0
    push @msg, "Content-Type: text/plain; charset=UTF-8";
592
0
0
    push @msg, "Content-Transfer-Encoding: 8bit";
593
0
0
    push @msg, "Content-Disposition: inline";
594
0
0
    push @msg, "";
595
0
0
    push @msg, $body_crlf;
596
597    # ---- Part 2: ARF machine-readable metadata (RFC 5965 s.3) ----
598
0
0
    push @msg, "--$boundary";
599
0
0
    push @msg, "Content-Type: message/feedback-report";
600
0
0
    push @msg, "Content-Transfer-Encoding: 7bit";   # required by RFC 5965
601
0
0
    push @msg, "";
602
0
0
    push @msg, $feedback_report;
603
604    # ---- Part 3: original spam message (RFC 5965 s.2 "third part") ----
605    # Use base64 encoding so Outlook and other clients reliably show
606    # it as a downloadable .eml attachment rather than rendering it
607    # inline or discarding it.
608
0
0
    my $original_b64 = MIME::Base64::encode_base64($original_crlf, "\r\n");
609
0
0
    push @msg, "--$boundary";
610
0
0
    push @msg, "Content-Type: application/octet-stream; name=\"original_message.eml.txt\"";
611
0
0
    push @msg, "Content-Transfer-Encoding: base64";
612
0
0
    push @msg, "Content-Disposition: attachment;";
613
0
0
    push @msg, "    filename=\"original_message.eml.txt\"";
614
0
0
    push @msg, "Content-Description: Original spam/phishing message";
615
0
0
    push @msg, "";
616
0
0
    push @msg, $original_b64;
617
618    # Closing boundary
619
0
0
    push @msg, "--${boundary}--";
620
0
0
    push @msg, "";
621
622
0
0
    require Encode;
623
0
0
    my $result = join("\r\n", @msg);
624
0
0
    return Encode::encode('UTF-8', $result);
625}
626
627# -----------------------------------------------------------------------
628# _build_subject( $orig_hashref, $risk_hashref ) -> string
629#
630# Build a concise, informative subject line for the abuse report.
631# -----------------------------------------------------------------------
632sub _build_subject {
633
2
3
    my ($orig, $risk) = @_;
634
635
2
4
    my $ip_part   = $orig ? $orig->{ip} : 'unknown origin';
636
2
4
    my $level     = $risk->{level};
637
2
50
    my $date_part = strftime('%Y-%m-%d', gmtime);
638
639
2
5
    return "Abuse report [$level]: spam/phishing from $ip_part ($date_part)";
640}
641