diff --git a/Scripts/Extract messages.applescript b/Scripts/Extract messages.applescript new file mode 100644 index 0000000..4826ddf --- /dev/null +++ b/Scripts/Extract messages.applescript @@ -0,0 +1,197 @@ +property WorkingDirectory : "Waiareka HD:Users:nstanger:tmp:" +property WorkingPOSIX : POSIX path of "Waiareka HD:Users:nstanger:tmp:" + +global c, cQ, Qc, QcQ + +on run + set c to ", " + set cQ to ", '" + set Qc to "', " + set QcQ to "', '" + + + try + set msgFile to open for access file (WorkingDirectory & "message.sql") with write permission + on error e + my doError(e) + return + end try + + tell application "Microsoft Entourage" to � + set theMessages to get current messages + + repeat with thisMsg in theMessages + + -- Most of the headers we can just grab from the standard message properties. + tell application "Microsoft Entourage" + set theSubject to change "'" into "''" in ((subject of thisMsg) as string) + set theSender to change "'" into "''" in ((address of sender of thisMsg) as string) + set thePriority to (priority of thisMsg) as string + set theHeaders to change "'" into "''" in ((headers of thisMsg) as string) + set theContent to change "'" into "''" in ((content of thisMsg) as text) + set theSource to change "'" into "''" in ((source of thisMsg) as string) + set theCharacterSet to character set of thisMsg + set theHTML to has html of thisMsg + end tell -- application "Microsoft Entourage" + + -- Grab the headers as a list so we can extract individual headers. + set headerList to my parseHeaders(paragraphs of theHeaders) + + -- We can't just use the time sent property because if the date is weird, Entourage + -- invariably mangles into something else. We therefore manually extract the sent date + -- from the headers; at least this way we have a fighting chance of getting something + -- resembling the actual message date (as long as it's not _too_ misguided). + set theTimeSent to my extractSentDate(headerList) + + set insertMessage to "insert into message (subject, sender, time_sent, " & � + "priority, headers, content, source, character_set, has_html) " & � + "values ('" & theSubject & QcQ & theSender & QcQ & theTimeSent & QcQ & � + thePriority & QcQ & theHeaders & QcQ & theContent & QcQ & theSource & � + QcQ & theCharacterSet & Qc & theHTML & ");" + + try + set eof msgFile to 0 + write insertMessage to msgFile + on error e + my doError(e) + close access msgFile + return + end try + + do shell script "cat " & WorkingPOSIX & "message.sql | tr '\\r' '\\n' > " & WorkingPOSIX & � + "message_lf.sql; rm " & WorkingPOSIX & "message.sql" + + set theResult to (do shell script "psql -d spam -f " & WorkingPOSIX & "message_lf.sql") + + if (theResult � "") then + set theMessageID to my retrieveMessageID(theResult) + my processRecipients(thisMsg, theMessageID) + my processHeaders(headerList, theMessageID) + else + beep + end if + + end repeat -- with thisMsg in theMessages + + close access msgFile + +end run + + +-- Get the recipients of a message. Return as a list in the form +-- {{"blah@foo.com", "to"}, "bar@quux.net", "cc"} +on getRecipients(thisMsg) + local recipientList + + set recipientList to {} + + tell application "Microsoft Entourage" + repeat with thisRecipient in recipients of thisMsg + set recipientList to recipientList & � + {{address of address of thisRecipient, first word of (recipient type of thisRecipient as string)}} + end repeat + end tell + + return recipientList +end getRecipients + + +-- Build and return a list of message headers in the format +-- {{"header name", "header value"}, {"header name", "header value"}, {...}}. +on parseHeaders(headerList) + local parsedHeaders, headerName, headerValue + local thisHeader, oldDelim + + set parsedHeaders to {} + set headerName to "" + set headerValue to "" + + set oldDelim to AppleScript's text item delimiters + set AppleScript's text item delimiters to {":"} + + -- Skip the last item of headerList because it's the blank line terminating the headers. + repeat with thisHeader in items 1 thru -2 of headerList + + -- If a line starts with white space, the header has been folded, so add this line to + -- the current header instead of creating a new header. + if (character 1 of thisHeader is in {" ", " "}) then + set headerValue to headerValue & return & " " & contents of thisHeader + + else -- It's a new header. + + -- If we're already working on a header, store it before starting the new one. + if (headerName � "") then � + set parsedHeaders to parsedHeaders & {{headerName, headerValue}} + + -- The header name is the text before the first colon (text item 1). + set headerName to text item 1 of (contents of thisHeader) + + -- Everything else is the header value, but we need to strip off the first space; + -- hence text 2 thru -1. + set headerValue to text 2 thru -1 of ((text items 2 thru -1 of (contents of thisHeader)) as string) + + end if -- character 1 of thisHeader is in {" ", " "} + + end repeat -- with thisHeader in items 1 thru -2 of headerList + + set AppleScript's text item delimiters to oldDelim + + -- Add the last header to the list before we finish. + set parsedHeaders to parsedHeaders & {{headerName, headerValue}} + + return parsedHeaders + +end parseHeaders + + +-- Grabs the date header from the header list. Returns "" if it isn't found. +on extractSentDate(headerList) + repeat with thisHeader in headerList + if (item 1 of thisHeader = "Date") then return item 2 of thisHeader + end repeat -- with thisHeader in headerList + return "" +end extractSentDate + + +-- Retrieve the message ID for a message given the result string from inserting it +-- (which contains the PostgreSQL OID of the row). +on retrieveMessageID(theOIDString) + + set theOID to second word of theOIDString + + -- Use the OID to retrieve the message ID of the message row we just inserted. + return (do shell script "psql -d spam -c \"select message_id from message where oid = " & � + theOID & ";\" -P tuples_only -P format=unaligned") + +end retrieveMessageID + + +-- Process and store the recipients of a message. +on processRecipients(thisMsg, theMessageID) + tell application "Microsoft Entourage" + repeat with thisRecipient in recipients of thisMsg + set recipientAddress to address of address of thisRecipient + set recipientName to change "'" into "''" in (display name of address of thisRecipient as string) + set recipientType to first word of (recipient type of thisRecipient as string) + do shell script "psql -d spam -c \"insert into recipient (address, recipient_name, recipient_type, message_id) " & � + "values ('" & recipientAddress & QcQ & recipientName & QcQ & recipientType & Qc & theMessageID & ");\"" + end repeat + end tell +end processRecipients + + +-- Process and store the headers of a message. +on processHeaders(headerList, theMessageID) + repeat with thisHeader in headerList + set theResult to do shell script "psql -d spam -c \"insert into header (header_name, header_value, message_id) " & � + "values ('" & item 1 of thisHeader & QcQ & item 2 of thisHeader & Qc & theMessageID & ");\"" + tell me to display dialog theResult + end repeat +end processHeaders + + +-- Error handler. +on doError(e) + beep + display dialog e +end doError