#!/usr/bin/perl -w

# Usage: procreturns.pl

# David Harris, Version of 11 Aug 2005 pm 3:00

# Goes thru the returned emails in all the filtered files, extracting email names
# and specifying the action to be taken [possibly in SQL for MySQL database].
# Email messages that had addresses successfully extracted go into one tree,
#        and other messages go into a tree of problem messages for future processing.


# Where are the returned, filtered, emails?:
# This hard coded location is the only line that needs to be changed for new loc'n.
$BASEDIR = "C:\\Documents and Settings\\User 2\\Application Data\\Thunderbird\\Profiles\\nijx0stm.default\\Mail\\pop.darwinday.org";
$inputline = '';
$prevtype = '';
$msgarray[0] = '';  # Initialize to be global
#................................................................
sub get1msg {
  $lines_in_msg = 0; # Line pointer for array that holds the  lines of 1 message.
  if ((-z MAILFILE) # If an empty file or no more lines.
  or (eof MAILFILE)) { return $lines_in_msg }
  # Did the last time in this routine end with a saved "From" line?
  if ($inputline =~ m/^From - /) {
    $msgarray[$lines_in_msg] = $inputline;
    $lines_in_msg++;
  }
  # Now read lines into array until hit the first line of next message or EOF:
  # These lines may be continuation of reading from same file:
  while ($inputline = <MAILFILE>) {
    if ($inputline =~ m/^Date: / ) {
      ($datedate = $inputline) =~ s/^Date: // ;
      chomp $datedate; # Now we have the Date of receipt of the return message
    }
    if ( (eof MAILFILE)
      or (($inputline =~ m/^From - /) and ($lines_in_msg > 0)) ) {
      return $lines_in_msg;
    } else {
      #print $inputline;
      $msgarray[$lines_in_msg] = $inputline;
      $lines_in_msg++;
    }
  } # Closing while loop
  close MAILFILE;
} # End of sub get1msg.
#.......................................................................
# Find the line with a particular string pattern:
# The search may be for a regular expression.
# Returns the line number of the first matching array line, or FALSE.
sub findline {
  my $searchstring = shift(@_);  # Get the regular expression to search for.
  $k = $searchstart;
  chomp($searchstring);
  while ($k < $lines_in_msg) {
    if ($msgarray[$k] =~ m/$searchstring/ ) { return $k; }
    else {$k++;}
  }
  return 0; # Didn't match, so FALSE.
} # End sub findline.
#...........................................................................
sub write1msg {
  my $extorprob = shift @_;  # Where should the whole message go?
  $EXTFILE = "$BASEDIR\\ExtractedReturns.sbd\\$returntype";
  if ($extorprob eq 'extracted'){
    for ($m = 0; $m < $lines_in_msg; $m++) {
      print EXTFILE "$msgarray[$m]";
    }
  } else {  # problem msgs
    for ($m = 0; $m <= $lines_in_msg; $m++) {
      print PROBFILE "$msgarray[$m]";
    }
  }
} # End sub writ1emsg
#.............................................................................
sub writeaction  {
  ($whichaction) = @_ ;
  $writeto = 'extracted'; #Flag that causes message to be written to 'Extracted' or 'Problem'
  $email = lc $email;  #Lower case the email, so it matches the database email later
  if ($whichaction eq 'DROP') {
    print ACTIONDELSQL
"\/\* $returntype #$nmsgs \[$datedate\] \*\/
DELETE personfacts FROM `personfacts`,`person`
WHERE person.email=\'$email\'
AND person.personsernum = personfacts.personsernum;\n";
    print ACTIONDELSQL
"DELETE FROM `person`
WHERE email=\'$email\';\n";
  }
  if ($whichaction eq 'RETRY') {
    print ACTIONRETTXT "$returntype \[$datedate\]\|RETRY\|$email\n";
  }
    if ($whichaction eq 'FINDDECIDE') {
    print ACTIONRETTXT "$returntype \[$datedate\]\|FINDDECIDE\|$email\n";
  }
      if ($whichaction eq 'NOOP') {
    print ACTIONRETTXT "$returntype \[$datedate\]\|NOOP\|$email\n";
  }
  print ACTIONSTXT "$nmsgs: $returntype \[$datedate\] ($lines_in_msg)\|$whichaction\|$email\n";
  return;
} # End sub writeaction
#==============================================================================

# MAIN ROUTINE starts here.

# All extracted action-emails from this run will have identical GMT time stamp field:
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime(time);
$tstmp = ($year+'1900').'/'.($mon+'1').'/'.($mday+'1').' '.($hour).':'.$min.':'.$sec ;

opendir RETDIRNAME, "$BASEDIR\\Returns.sbd" or die "procreturns.pl: Can't read that directory!";
    @alltypes = grep !/\./, readdir RETDIRNAME; # Removes all '.'-containing filenames
    print "\n$tstmp\n@alltypes\n";

# For emails that will have email addresses extracted:
mkdir "$BASEDIR\\ExtractedReturns.sbd", 700;
open EXTDIRFILE, ">$BASEDIR\\ExtractedReturns" or die "procreturns.pl: Can't read that directory!";
# For emails that will NOT have email addresses extracted:
mkdir "$BASEDIR\\ProblemReturns.sbd", 700;
open PROBDIRFILE, ">$BASEDIR\\ProblemReturns" or die "procreturns.pl: Can't read that directory!";
# The actions for the database of names (emailbase).  This file is written in
#  the "My Documents" folder/directory:
open ACTIONSTXT, ">ACTIONSTXT" or die "procreturns.pl: Can't open ACTIONSTXT\.\n";
open ACTIONDELSQL, ">ACTIONDELSQL" or die "procreturns.pl: Can't open ACTIONDELSQL\.\n";
open ACTIONRETTXT, ">ACTIONRETTXT" or die "procreturns.pl: Can't open ACTIONRETTXT\.\n";

$prevtype = '';
# One variety of return messages at a time:
foreach $returntype (@alltypes) {
  $nmsgs = 0;
  if ($prevtype ne $returntype) {
    # First message in this file. Prepare to read (and write both the extracted
    #                 and problematic ones).
    $MAILFILE = "$BASEDIR\\Returns.sbd\\$returntype";
    open MAILFILE, "<$MAILFILE" or die "procreturns.pl: Can't open $MAILFILE\n";
    mkdir "$BASEDIR\\ExtractedReturns.sbd\\$returntype.sbd", 700;
    $EXTFILE = "$BASEDIR\\ExtractedReturns.sbd\\$returntype";
    open EXTFILE, ">>$EXTFILE" or die "procreturns.pl: Can't open $EXTFILE\n";
    mkdir "$BASEDIR\\ProblemReturns.sbd\\$returntype.sbd", 700;
    $PROBFILE = "$BASEDIR\\ProblemReturns.sbd\\$returntype";
    open PROBFILE, ">>$PROBFILE" or die "procreturns.pl: Can't open $PROBFILE\n";
    $prevtype = $returntype; # So just one set of opens per file of messages.
  }
  # Read in @msgarray with lines_in_msg of 1 msg:
  while (&get1msg) { # Until 0 lines returned
    $nmsgs++;
    $email = '';  #So if no extraction succeeded, later code will know from 0 length
    $writeto = 'problem';  # Unless an extraction succeeded
    SWITCH: { # The custom extracting for each kind of return message:
#--------------------------
      if ($returntype eq "AOLUnknown") {
        # EXTRACT FROM 1 MESSAGE!
        $searchstart = 40;
        VERSION: {
          # ------------
          if ($line = &findline('<<< 550 MAILBOX NOT FOUND')) {
            # Get the email address:
            ($email = $msgarray[$line+1]) =~ s/550 <([a-zA-Z0-9_\-\.]+@.+)>\.\.\. User unknown$/$1/ ;
            chomp($email);
            if (length($email) > 0) {
              writeaction ('DROP',$email); #The subroutine writes SQL for DELETEing
            }  # Extraction of info from 1 msg
            last VERSION;
          }
          # ------------
          if ($line = &findline('^<<< 550 .+ IS NOT ACCEPTING ANY MAIL$')) {
            # Get the email address:
            ($email = $msgarray[$line+1]) =~ s/550 <([a-zA-Z0-9_\-\.]+@.+)>\.\.\. User unknown$/$1/ ;
            chomp($email);
            if (length($email) > 0) {
              writeaction('RETRY');
            }
            last VERSION;
          }
          # ------------
        }  # End of VERSIONs
      last SWITCH; } # AOLUnknown
#--------------------------
      if ($returntype eq "Blockedbybulk") {
        $searchstart = 20;
        if ($line = &findline(
          '^Diagnostic-Code: smtp; 550 5\.7\.1.*$')) {
          # Get the email address:
          ($email = $msgarray[$line-3]) =~ s/^Final-Recipient: rfc822; ([a-zA-Z0-9_\-\.]+@.+).*$/$1/ ;
          chomp($email);
          if (length($email) > 0) {
            writeaction ('DROP',$email); #The subroutine writes SQL for DELETEing
          }
        }
      last SWITCH; }
#--------------------------
      if ($returntype eq "DeliveryStatFailure") { # print "$nmsgs: $returntype $lines_in_msg\n";
        $searchstart = 20;
        if ($line = &findline(
          '^This is an automatically generated Delivery Status Notification\.')) {
          # Get the email address:
          ($email = $msgarray[$line+4]) =~ s/^ +([a-zA-Z0-9_\-\.]+@.+)$/$1/ ;
          chomp($email);
          if (length($email) > 0) {
            writeaction ('DROP',$email);
          }
        }
      last SWITCH; }
#--------------------------
      if ($returntype eq "DelivStatNotifDelay") {
        $searchstart = 2;
        if ($line = &findline(
        '^Delivery to the following recipients has been delayed\.')) {
          # Get the email address:
          ($email = $msgarray[$line+2]) =~ s/^ +([a-zA-Z0-9_\-\.]+@.+)$/$1/ ;
          chomp($email);
          if (length($email) > 0) {
            writeaction ('RETRY',$email);
          }
        }
      last SWITCH; }
#--------------------------
      if ($returntype eq "ErrorDuringDelivery") {
        $searchstart = 2;
        VERSION: {
          if ($line = &findline(
          'Requested action not taken: mailbox unavailable\. \[SMTP Error Code 550\]$')) {
            # Get the email address:
            ($email = $msgarray[$line-3]) =~ s/^<([a-zA-Z0-9_\-\.]+@.+)>$/$1/ ;
            chomp($email);
            if (length($email) > 0) {
              writeaction ('DROP',$email);
            }
          }
          #--------------
          if ($line = &findline('^Mail Server is down or unreachable. error: 11004$')) {
            # Get the email address:
            ($email = $msgarray[$line-3]) =~ s/^<([a-zA-Z0-9_\-\.]+@.+)>$/$1/ ;
            chomp($email);
            if (length($email) > 0) {
              writeaction ('RETRY',$email);
            }
          }
          #--------------
        } # End of VERSIONs
      last SWITCH; }
#--------------------------
      if ($returntype eq "EximDelivFailed") {
        $searchstart = 2;
        if ($line = &findline(
          'This message was created automatically by mail delivery software \(Exim\)\.')) {
          # Get the email address:
          ($email = $msgarray[$line+5]) =~ s/ +([a-zA-Z0-9_\-\.]+@.+)$/$1/ ;
          chomp($email);
          if (length($email) > 0) {
            writeaction('DROP');
          }
        }
      last SWITCH; }
#--------------------------
      if ($returntype eq "FailureDeliveryYahoo") {
        $searchstart = 1;
        $line1 = &findline('Message from  yahoo\.com\.');
        $line2 = &findline('Unable to deliver message to the following address\(es\)\.');
        if ($line1+1 == $line2) {
          $searchstart = $line2;
          # Tremendous variety in the response messages from Yahoo. Intentional?
          # Get the original email address from the copy of original msg.
          if ($line = &findline('^To: ')) {
            # Get the email address:
            ($email = $msgarray[$line+0]) =~ s/^To: +([a-zA-Z0-9_\-\.]+@[^@]+)$/$1/ ;
            chomp($email);
            if (length($email) > 0) {
              writeaction('DROP');
            }
          }
        }
      last SWITCH; }
#--------------------------
      if ($returntype eq "MailAdminMailSysError") {
        $searchstart = 2;
        if ($line = &findline(
        '^This Message was undeliverable due to the following reason:')) {
          $searchstart = $line;  # Go forward from 1st match line
          if ($line = &findline(
            '^SMTP <.+@.+>$' )) {
            # Get the email address:
            ($email = $msgarray[$line+0]) =~ s/^SMTP <([a-zA-Z0-9_\-\.]+@.+)>$/$1/ ;
            chomp($email);
            if (length($email) > 0) {
              writeaction('RETRY');
            }
          }
        }
        last SWITCH; }
#--------------------------
      if ($returntype eq "MailRouteUndeliveredReturned") {
        $searchstart = 8;
        if ($line = &findline(
        '^\t\t\tThe MailRoute program$')) {
          # Get the email address:
          ($email = $msgarray[$line+2]) =~ s/^<([a-zA-Z0-9_\-\.]+@.+)>: host .+ said: 550.*$/$1/ ;
          chomp($email);
          if (length($email) > 0) {
            writeaction('RETRY');
          }
        }
        last SWITCH; }
#--------------------------
      if ($returntype eq "MsgStatusundeliverableMailerDaemon") {
        $searchstart = 2;
        if ($line = &findline(
          '^The message that you sent was undeliverable to the following:')) {
          # Get the email address:
          ($email = $msgarray[$line+1]) =~ s/^\t+([a-zA-Z0-9_\-\.]+@.+)[\t ]+\(user not found\)$/$1/ ;
          chomp($email);
          if (length($email) > 0) {
            writeaction('DROP');
          }
        }
        last SWITCH; }
#--------------------------
      if ($returntype eq "NDNMailerDaemon") {
        $searchstart = 2;
        if ($line = &findline(
          '^Sorry\. Your message could not be delivered to:.*')) {
          # Get the email address:
          ($email = $msgarray[$line+2]) =~ s/^([a-zA-Z0-9_ \-\.]+,[^(]*)\(The name was not found at the remote site\..*$/$1/ ;
          chomp($email);
          ($email = $email) =~ s/ /_/g ;
          if (length($email) > 0) {
            writeaction('FINDDECIDE');
          }
        }
        last SWITCH; }
#--------------------------
      if ($returntype eq "NoSenderDelivStatus") {
        $searchstart = 2;
        if ($line = &findline(
          '^    --- The following addresses had delivery problems ---$')) {
          # Get the email address:
          ($email = $msgarray[$line+2]) =~ s/^<([a-zA-Z0-9_\-\.]+@.+)>\   \(No such recipient\)$/$1/ ;
          chomp($email);
          if (length($email) > 0) {
            writeaction('DROP');
          }
        }
        last SWITCH; }
#--------------------------
      if ($returntype eq "PostfixUndeliveredReturned") {
        $searchstart = 2;
        if ($line = &findline(
          '^			The Postfix program$')) {
          # Get the email address:
          VERSION: {
            if ($msgarray[$line] =~ m/expanded from/ ) {
              ($email = $msgarray[$line+2]) =~ s/^.+ \(expanded from <([a-zA-Z0-9_\-\.]+@.+)>.*$/$1/ ;
              last VERSION;
            }
         #   if (m/expanded from$/) {
         #     ($email = $msgarray[$line+3]) =~ s/^.+ \.+<([a-zA-Z0-9_\-\.]+@.+)>.*$/$1/ ;
         #   last VERSION; }
            # All others
              ($email = $msgarray[$line+2]) =~ s/^.*<([a-zA-Z0-9_\-\.]+@.+)>.*$/$1/ ;
          } # End of VERSIONs.
          chomp($email);
          if (length($email) > 0) {
            writeaction('DROP');
          }
        }
        last SWITCH; }
#--------------------------
# Note that DD was added to the original name of this type
      if ($returntype eq "PostmasterDELIVERYFAILUREDD") {
        $searchstart = 0;
        if ($line = &findline('^Subject: .*DELIVERY FAILURE: User ')) {
          # Get the email address:
          ($email = $msgarray[$line+0]) =~
          s/^Subject: .*DELIVERY FAILURE: User .+\(([a-zA-Z0-9_\-\.]+@[a-zA-Z0-9\.\-]+)\).*$/$1/ ;
          chomp($email);
          if (length($email) > 0) {
            writeaction('DROP');
          }
        }
      last SWITCH; }
#--------------------------
# Note that Other was added to the original name of this type
      if ($returntype eq "PostmasterDELIVERYFAILUREOther") {
        $searchstart = 0;
        if ($line = &findline('^Subject: .*DELIVERY FAILURE: User ')) {
          # Get the email address:
          ($email = $msgarray[$line+0]) =~
          s/^Subject: .*DELIVERY FAILURE: User .+\(([a-zA-Z0-9_\-\.]+@[a-zA-Z0-9\.\-]+)\).*$/$1/ ;
          chomp($email);
          if (length($email) > 0) {
            writeaction('DROP');
          }
        }
      last SWITCH; }
#--------------------------
      if ($returntype eq "Qmailfailurenotice") {
        $searchstart = 2;
        if ($line = &findline('^Hi. This is the qmail-send program at')) {
          $searchstart = $line+1;
          # Versions
          VERSION: {
            if ($line = &findline('^Sorry, no mailbox here by that name\.')) {
              ($email = $msgarray[$line-1]) =~ s/^<([a-zA-Z0-9_\-\.]+@.+)>:$/$1/ ;
              chomp($email);
              if (length($email) > 0) {
                writeaction('DROP');
              }
            last VERSION;
            }
            # ----------  Customize!! :
            if ($line = &findline('^Sorry, I couldn\'t find any host named')) {
              ($email = $msgarray[$line-1]) =~ s/^<([a-zA-Z0-9_\-\.]+@.+)>:$/$1/ ;
              chomp($email);
              if (length($email) > 0) {
                writeaction('DROP');
              }
            last VERSION;
            }
            # ----------
            if ($line = &findline('^Disk Quota Exceeded\.')) {
              ($email = $msgarray[$line-1]) =~ s/^<([a-zA-Z0-9_\-\.]+@.+)>:$/$1/ ;
              chomp($email);
              if (length($email) > 0) {
                writeaction('DROP');
              }
            last VERSION;
            }
            # ----------
            if ($line = &findline('Message contains invalid header$')) {
              ($email = $msgarray[$line-1]) =~ s/^<([a-zA-Z0-9_\-\.]+@.+)>:$/$1/ ;
              chomp($email);
              if (length($email) > 0) {

                writeaction('DROP');
              }
            last VERSION;
            }
            # ----------
            if ($line = &findline('not going to try again; this message has been in the queue too long\.$')) {
              ($email = $msgarray[$line-2]) =~ s/^<([a-zA-Z0-9_\-\.]+@.+)>:$/$1/ ;
              chomp($email);
              if (length($email) > 0) {
                writeaction('RETRY');
              }
            last VERSION;
            }
            # ----------
            if ($line = &findline('\.\.\. User unknown$')) {
              ($email = $msgarray[$line-2]) =~ s/^<([a-zA-Z0-9_\-\.]+@.+)>:$/$1/ ;
              chomp($email);
              if (length($email) > 0) {
                writeaction('DROP');
              }
            last VERSION;
            }
            # ----------
            if ($line = &findline('\.\.\. User unknown$')) {
              ($email = $msgarray[$line-2]) =~ s/^<([a-zA-Z0-9_\-\.]+@.+)>:$/$1/ ;
              chomp($email);
              if (length($email) > 0) {
                writeaction('DROP');
              }
            last VERSION;
            }
            # ----------

          } # End of all Qmail... VERSIONs.
        }  # End of things done after matching Qmail greeting line
      last SWITCH; }
#--------------------------
      if ($returntype eq "ReturnedSeeTranscript") {
        $searchstart = 2;
        if ($line = &findline(
          '   ----- The following addresses had permanent fatal errors -----')) {
          # Because there are "expanded" names, get the email address from the
          #  original message:
          $searchstart = $line+1;
          if ($line = &findline(
            '^To: +[a-zA-Z0-9_\-\.]+@\S+$')) {
            # Get the email address:
            ($email = $msgarray[$line+0]) =~ s/^To: +([a-zA-Z0-9_\-\.]+@.+)$/$1/ ;
            chomp($email);
            if (length($email) > 0) {
              writeaction('DROP');
            }
          }
        }
      last SWITCH; }
#--------------------------
      if ($returntype eq "SpamFirewallUndeliveredReturned") {
        $searchstart = 1;
        $line1 = &findline('^This is the Spam Firewall at');
        $line2 = &findline('^I\'m sorry to inform you that the message below could not be delivered\.');
        if ($line1+2 == $line2) {
          $searchstart = $line2;
          # Get the original email address from the copy of original msg.
          if ($line = &findline('^To: ')) {
            # Get the email address:
            ($email = $msgarray[$line+0]) =~ s/^To: +([a-zA-Z0-9_\-\.]+@[^@]+)$/$1/ ;
            chomp($email);
            if (length($email) > 0) {
              writeaction('DROP');
            }
          }
        }
      last SWITCH; }
#--------------------------
      if ($returntype eq "Symantec_AntiVirus_for_SMTP") {
        $searchstart = 1;
        $line = &findline('^--- The message cannot be delivered to the following address. ---$');
        VERSION: {
          if ($line = &findline('Mailbox unknown or not accepting mail\.$')) {
            # Get the email address:
            ($email = $msgarray[$line+0]) =~
                s/^([a-zA-Z0-9_\-\.]+@[^@ ]+)    Mailbox unknown or not accepting mail\.$/$1/ ;
            chomp($email);
            if (length($email) > 0) {
              writeaction('DROP');
            }
            last VERSION;
          }
          #------------
          if ($line = &findline('Could not deliver in a reasonable time\.$')) {
            # Get the email address:
            ($email = $msgarray[$line+0]) =~
                s/^([a-zA-Z0-9_\-\.]+@[^@ ]+)    Could not deliver in a reasonable time\.$/$1/ ;
            chomp($email);
            if (length($email) > 0) {
              writeaction('RETRY');
            }
            last VERSION;
          }
          #------------
        }  # End of Symantec... VERSIONs
      last SWITCH; }
#--------------------------
      if ($returntype eq "UndelivSysAdmin") {
        $searchstart = 1;
        $line1 = &findline('^Your message$');
        $line2 = &findline('^did not reach the following recipient\(s\):$');
        if ($line1+6 == $line2) {
          $searchstart = $line1;
          # Get the original email address from the copy of original msg.
          if ($line = &findline('^To: ')) {
            # Get the email address:
            ($email = $msgarray[$line+0]) =~ s/^To: +([a-zA-Z0-9_\-\.]+@[^@]+)$/$1/ ;
            chomp($email);
            if (length($email) > 0) {
              writeaction('DROP');
            }
          }
        }
      last SWITCH; }
#--------------------------
      if ($returntype eq "WarningExim") {
        $searchstart = 2;
        if ($line = &findline('No action is required on your part')) {
          ($email = $msgarray[$line-3]) =~ s/^ +([a-zA-Z0-9_\-\.]+@+.+)$/$1/ ;
          chomp($email);
          if (length($email) > 0) {
            writeaction('NOOP');
          }
        }
        last SWITCH; }
#--------------------------
      if ($returntype eq "WarningFromMAILER-DAEMON") {
        $searchstart = 2;
        if ($line = &findline(
        '^    \*\*  YOU DO NOT NEED TO RESEND YOUR MESSAGE  \*\*$')) {
          $searchstart = $line;
          if ($line = &findline('^To: ')) {
            # Get the email address:
            ($email = $msgarray[$line+0]) =~ s/^To: +<*([a-zA-Z0-9_\-\.]+@.+)>*.*$/$1/ ;
            chomp($email);
            if (length($email) > 0) {
              writeaction('NOOP');
            }
          }
        }
      last SWITCH; }
#--------------------------
    } # SWITCH
    # Put the message in appropriate folder:
    if ($writeto eq 'extracted') {
      &write1msg('extracted');
    } else {
      &write1msg('problem');
    }
  } #while processing one email
  # Code to place original Returns folder back in place ???? Not currently.
} # foreach type of return (similar in each file) loop.

close ACTIONSTXT;
close ACTIONDELSQL;
close ACTIONRETTXT;
exit;  # End of program
