diff --git a/Scripts/parse_email b/Scripts/parse_email new file mode 100755 index 0000000..9ce3912 --- /dev/null +++ b/Scripts/parse_email @@ -0,0 +1,140 @@ +#!/usr/bin/perl + +use Mail::Address; +use Mail::Field; + +open INFILE, "<$ARGV[0]" or die "Argh!\n"; + +# TRUE if we're currently processing the headers. +$process_headers = 1; + +# Variable for holding header information. +$current_header_name = ''; +$current_header_value = ''; +$header_count = 0; +@header_names = (); +@header_values = (); + +# Variables for holding body information. +$body_line_count = 0; + +# Set up some defaults. +$message_priority = 'Normal'; +$message_charset = 'iso-8859-1'; +$message_raw_headers = ''; +$message_raw_body = ''; +$message_raw_source = ''; +$message_has_html = 0; + +$message_is_multipart = 0; +$message_part_boundary = ''; + +%priorities_lookup = ( 1 => 'Highest', + 2 => 'High', + 3 => 'Normal', + 4 => 'Low', + 5 => 'Lowest', + ); + +while () +{ + # Read lines until we hit the first blank line, which separates the + # headers from the body. + if ($process_headers) + { + if (/^$/) + { + # End of headers, switch to body processing mode. + $process_headers = 0; + + # But don't forget to store the last header that we found! + @header_names[$header_count] = $current_header_name; + @header_values[$header_count++] = $current_header_value; + + # Drop out now so that we don't append the blank line + # to $message_raw_headers. + next; + } + + elsif (/^([-\w]+): (.*)$/) + { + # We've found the beginning of a new header. File away the current + # header for later reference. Also store the "special" headers (like + # date, sender, etc.) in the appropriate variables. + if ($current_header_name) + { + @header_names[$header_count] = $current_header_name; + @header_values[$header_count++] = $current_header_value; + } + + $current_header_name = $1; + $current_header_value = $2; + + if ($current_header_name eq 'Subject') + { + $message_subject = $current_header_value; + } + elsif ($current_header_name eq 'From') + { + $message_sender = $current_header_value; + } + elsif ($current_header_name eq 'Date') + { + $message_time_sent = $current_header_value; + } + elsif (/Priority: ([1-5])/) + { + $message_priority = $priorities_lookup{$1}; + } + elsif (/Priority:/) + { + $message_priority = $current_header_value; + } + + # this needs work + elsif (/Content-Type: (.*); [Cc]harset ?= ?"(.*)"/) + { + $message_has_html = ($1 eq 'text/html'); + + $message_is_multipart + $message_charset = $2; + } + } + + elsif (/^((\s)+.*)$/) + { + # This line's still part of the current header (i.e., the header's + # been folded). Just add this line to the current header. + $current_header_value .= "\n$1"; + + if (/[Cc]harset ?= ?"?(.*)"?/) + { + $message_charset = $1; + } + } + $message_raw_headers .= $_; + } + else + { + $message_raw_body .= $_; + $body_line_count++; + } +} + +$message_raw_source = "$message_raw_headers\n$message_raw_body"; + +print "Found $header_count headers:\n"; + +for ($i = 0; $i < $header_count; $i++) +{ + print "[$header_names[$i]] = [$header_values[$i]]\n"; +} + +print "\nSubject: $message_subject\n"; +print "Sender: $message_sender\n"; +print "Time sent: $message_time_sent\n"; +print "Priority: $message_priority\n"; +print "Character set: $message_charset\n"; +print ($message_has_html ? "Message contains HTML\n" : "Message doesn't contain HTML\n"); + +print "\nBody has $body_line_count lines.\n";