GitBucket
4.21.2
Toggle navigation
Snippets
Sign in
Files
Branches
1
Releases
Issues
Pull requests
Labels
Priorities
Milestones
Wiki
Forks
nigel.stanger
/
spamdb
Browse code
Initial import of spam DB stuff.
master
1 parent
74e3125
commit
d57a8d2ae5e9dd64cca17bf26443a63b737aca34
nstanger
authored
on 6 Dec 2002
Patch
Showing
1 changed file
Scripts/parse_email
Ignore Space
Show notes
View
Scripts/parse_email
0 → 100755
#!/usr/bin/perl use Mail::Address; use Mail::Field; open INFILE, "<$ARGV[0]" or die "Argh!\n"; # TRUE if we're currently processing the headers. $process_headers = 1; # Variable for holding header information. $current_header_name = ''; $current_header_value = ''; $header_count = 0; @header_names = (); @header_values = (); # Variables for holding body information. $body_line_count = 0; # Set up some defaults. $message_priority = 'Normal'; $message_charset = 'iso-8859-1'; $message_raw_headers = ''; $message_raw_body = ''; $message_raw_source = ''; $message_has_html = 0; $message_is_multipart = 0; $message_part_boundary = ''; %priorities_lookup = ( 1 => 'Highest', 2 => 'High', 3 => 'Normal', 4 => 'Low', 5 => 'Lowest', ); while (<INFILE>) { # Read lines until we hit the first blank line, which separates the # headers from the body. if ($process_headers) { if (/^$/) { # End of headers, switch to body processing mode. $process_headers = 0; # But don't forget to store the last header that we found! @header_names[$header_count] = $current_header_name; @header_values[$header_count++] = $current_header_value; # Drop out now so that we don't append the blank line # to $message_raw_headers. next; } elsif (/^([-\w]+): (.*)$/) { # We've found the beginning of a new header. File away the current # header for later reference. Also store the "special" headers (like # date, sender, etc.) in the appropriate variables. if ($current_header_name) { @header_names[$header_count] = $current_header_name; @header_values[$header_count++] = $current_header_value; } $current_header_name = $1; $current_header_value = $2; if ($current_header_name eq 'Subject') { $message_subject = $current_header_value; } elsif ($current_header_name eq 'From') { $message_sender = $current_header_value; } elsif ($current_header_name eq 'Date') { $message_time_sent = $current_header_value; } elsif (/Priority: ([1-5])/) { $message_priority = $priorities_lookup{$1}; } elsif (/Priority:/) { $message_priority = $current_header_value; } # this needs work elsif (/Content-Type: (.*); [Cc]harset ?= ?"(.*)"/) { $message_has_html = ($1 eq 'text/html'); $message_is_multipart $message_charset = $2; } } elsif (/^((\s)+.*)$/) { # This line's still part of the current header (i.e., the header's # been folded). Just add this line to the current header. $current_header_value .= "\n$1"; if (/[Cc]harset ?= ?"?(.*)"?/) { $message_charset = $1; } } $message_raw_headers .= $_; } else { $message_raw_body .= $_; $body_line_count++; } } $message_raw_source = "$message_raw_headers\n$message_raw_body"; print "Found $header_count headers:\n"; for ($i = 0; $i < $header_count; $i++) { print "[$header_names[$i]] = [$header_values[$i]]\n"; } print "\nSubject: $message_subject\n"; print "Sender: $message_sender\n"; print "Time sent: $message_time_sent\n"; print "Priority: $message_priority\n"; print "Character set: $message_charset\n"; print ($message_has_html ? "Message contains HTML\n" : "Message doesn't contain HTML\n"); print "\nBody has $body_line_count lines.\n";
Show line notes below