#!/usr/bin/env python # # Copyright (c) 2003 by Stonetics, Inc. All Rights Reserved. # # A log summarizer for Communigate Pro # # 2003-08-08 Now analyzes as it parses, which takes 1/10th the memory and is 50% faster # 2003-08-09 Small correction in how transactions ends are determined. Added commas to # summary numbers. Removed localhost SMTPI transactions from stats # 2003-08-14 Fixed bug caused by the 1st part of a spam SMTPI occurring before log starts # Added total bytes transferred statistic # Spammer report now sorted and grouped by domain # 2003-08-15 Fixed bug causing Outgoing Recipient count to be skipped # 2003-08-25 Handle multi-part log files # Report only the erronious outgoing sub-transactions (such as with a list manager) # 2003-08-26 Lower-case the spammed addresses to avoid duplicates # 2003-08-31 Rearranged report sections # Added IP address of sender or receiver to transaction tags # Added responsible blacklist(s) and spamtraps to spammer list # Added spamtraps to Blacklist Hits list # Spaced spammer list to make it readable # Added recipients email addresses to spammer list # 2003-09-01 Cleaned up Spamtrap reporting # Added Spamtrap Hits section # 2003-09-15 Fix bug with wrong FROM tag when multiple FROM tags were sent # 2003-10-04 Added ability to base64-encode the report to avoid RFC822 filtering # Added destination server name to SMTP (outgoing) error reports # 2003-11-30 Added summaries for relay rejections and content rejections # User can now suppress detailed reports by error code # 2004-02-06 Fixed crash that happens if a blacklisted transaction starts before # the current log. # # Reads CommuniGate Pro logs, extracts interesting tidbits, like errors and stats # and emails the results to someone. # Intended to be run shortly after midnight by cron. # # Tested against CGP V4.1.5, on OS X Server 10.2.6, with Python v2.3 # # You must set the SMTP log level to "low level" to get the detail that this needs. # # Written August 5, 2003 by Kevin and Stonewall Ballard # Based on cgp_log_reporter.pl by Stonewall Ballard # # This program may be freely used by anyone for any purpose, so long as the above # copyright notice is retained. # # Please send bug reports, enhancements, and requests to # # ********* To Use ********** # # Change the reportFromAddress and reportToAddress below as appropriate. # Check that the logdir and submitdir are correct, and change if not. # Ensure that CGP is set to log "low level" SMTP # Copy this file into your Communicate scripts directory. # Run it as root to ensure that it works. # Set up cron to run it after midnight, after CGP has rolled its log # # ******** Testing ********* # # Run the script with a -t flag to generate an output file instead of emailing it # This also produces some trace information. It's best to copy a log file into a convenient # place when running it. # # Syntax is something like: # # ./CGPLogReporter.py -t 2003-10-01 # which creates 2003-10-01.log.summary in the same directory # # If you don't name a log file, it looks in the logdir (defined below) for a log with yesterday's date # You can also use a single integer to offset this date. "0" is today, "1" is yesterday (the default), etc. # # ********** Possible Errors ********** # # If you get an error that "False" is not defined, then your Python is too old # On OS X, be sure that your /usr/bin/python is (or is symlinked to) Python 2.3 # # Python is not Perl! You don't need to escape the "@" character in strings. # # If you use this on Mac OS X, be sure that this file has Unix newlines. BBEdit # can show and fix this. # # If you use RFC822 body line rejection, it may reject these summaries because they # contain the names of spammers. Set useBase64 to True to encode the messages and evade # this checking. # # If you don't get any errors from running the script, but nothing is mailed, check your # CGP Submitted directory for .bad files. This indicates that there was something about the message # that CGP didn't like, such as with the RFC822 rejection. # # *********** configuration vars ************* reportFromAddress = "CGP_Log_Summary@dom.ain" reportToAddress = "administrator@dom.ain" # Communigate directories logdir = "/var/CommuniGate/SystemLogs/" submitdir = "/var/CommuniGate/Submitted/" # Enable base64 encoding useBase64 = True # 5xx errors that should not receive detailed error reporting # # Some common error numbers # 501 - unknown command # 550 - unknown user account # 554 - no valid RCPT address (usually goes with others) # 571 - relaying prohibited # 572 - host name unknown # 573 - unknown user account # 579 - message content unacceptable # 591 - blacklisted # 597 - CGP blacklist (we have a reason to believe that this message is unwanted here) # this is a list, formatted like [501, 502, 503] boring5errs = [501, 550, 554, 571, 572, 573, 579, 591, 597] # enable this to report multiple 501 errors, where the sender is ignoring the reponses reportSpews = False # *********** No configuration needed below this line **************** # imports import sre import sys import os import time import string import base64 from os.path import * # globals testing = False # classes class LogEntry : """Parent class for every log entry type.""" recipientRegex = sre.compile( "RCPT TO: * ]*)>?", sre.I ) senderRegex = sre.compile( "MAIL FROM: * ]*)>?", sre.I ) instanceCount = 0 def __init__( self, parts, line, sequenceID ) : """Inits instance vars from regex parts.""" self.timestamp = parts[0] self.type = parts[1] self.id = parts[2] self.server = parts[3] self.text = parts[4] self.source = line self.responseCode = None self.command = None self.commandText = None self.dataType = None self.endTransaction = False # this must follow above assignments self.seqid = sequenceID LogEntry.instanceCount += 1 def spaceUsed( self ) : return len( self.source ) def __str__( self ) : return self.source def key( self ) : """Returns the key (transaction ID) for this entry""" return self.type + "-" + self.id + "-" + `self.seqid` def parseText( self ) : """Parses text of entry for SMTP and SMTPI""" if self.text[0:4] == "cmd:" : self.dataType = "cmd" txt = self.text[5:].split( ":" ) sep = ":" if len( txt ) == 1 : txt = txt[0].split( " " ) sep = " " self.command = txt[0].upper() self.commandText = sep.join( txt[1:] ) elif self.text[0:4] == "rsp:" : self.dataType = "rsp" self.response = self.text[9:] self.responseCode = self.text[5:8] elif self.text == "releasing stream" : self.endTransaction = True else: self.dataType = "txt" def appendError5Response( self, errList ) : """Appends any 5xx error response code to the provided list""" if self.dataType == "rsp" and len( self.responseCode ) >= 3 and self.responseCode[0] == "5" : errList.append( self.responseCode ) def appendRecipients( self, recipients ) : """Append the recipient named on this entry, if any, to given list""" rcp = self.toAddress() if rcp : recipients.append( rcp ) def toAddress( self ) : """Return the TO address, if any""" if self.dataType == "cmd" : res = self.recipientRegex.search( self.text ) if res : return res.group( 1 ) def fromAddress( self ) : """Return From address, if any""" if self.dataType == "cmd" : res = self.senderRegex.search( self.text ) if res : addr = res.group( 1 ) if addr == "" : addr = "<>" return addr def isEndTransaction( self ) : return self.endTransaction def isReportable( self ) : """A line that may appear in a detail report""" return self.dataType in ["cmd", "rsp"] class SMTPOutEntry( LogEntry ) : """Represents outgoing SMTP entries.""" def __init__( self, parts, line, sequenceID ) : LogEntry.__init__( self, parts, line, sequenceID ) self.parseText() def newTransaction( self ) : return SMTPOutTransaction() class SMTPInEntry( LogEntry ) : """Represents incoming SMTPI entries.""" def __init__( self, parts, line, sequenceID ) : LogEntry.__init__( self, parts, line, sequenceID ) self.parseText() def newTransaction( self ) : return SMTPInTransaction() class Spammer : """Information about a spammer taken from the log""" domainRegex = sre.compile( "@([^@]+)$" ) tldRegex = sre.compile( "[^.]+$" ) hostTldRegex = sre.compile( "[^.]+\.[^.]+$" ) hostCRegex = sre.compile( "[^.]+\.[^.]+\.[^.]+$" ) gTLDs = ["com", "edu", "gov", "mil", "org", "biz", "net", "int", "aero", "coop", "info", "museum", "name", "pro"] def __init__( self, email, host, verified, IPAddr, recipients, rejector ) : self.email = email self.host = host self.verified = verified self.IPAddr = IPAddr self.recipients = recipients self.rejector = rejector res = self.domainRegex.search( email ) domain = res and res.group( 1 ) or host if domain : res = self.tldRegex.search( domain ) if res : tld = res.group( 0 ) if tld in self.gTLDs : res = self.hostTldRegex.search( domain ) if res : domain = res.group( 0 ) else : res = self.hostCRegex.search( domain ) if res : domain = res.group( 0 ) else : domain = IPAddr domain = domain.lower() self.domain = domain if self.recipients and len( self.recipients ) : self.recipients = map( lambda s : s.lower(), self.recipients ) else : self.recipients = [] if verified : tag = " - verified" else : tag = "" self.desc = "%s (%s%s, %s)" % ( email, host, tag, IPAddr ) class LogTransaction : """Container for LogEntry instances. Holds all the LogEntries (lines) that are part of the same transaction. Currently applicable only to SMTP and SMTPI transactions""" instanceCount = 0 def AddEntry( cls, entry, transactions ) : """Appends entry to existing or new LogTransaction subclass instance""" key = entry.key() if not transactions.has_key( key ) : transactions[key] = entry.newTransaction() trans = transactions[key] if entry.isEndTransaction() : trans.closed = True; trans.analyze() trans.purge() elif not trans.closed : transactions[key].append( entry ) elif testing : print "Entry for closed transaction seen " + key AddEntry = classmethod( AddEntry ) def __init__( self ) : self.id = None self.entries = [] self.recipients = [] self.senderEmail = None self.err5s = [] self.closed = False self.timestamp = None self.server = None LogTransaction.instanceCount += 1 def spaceUsed( self ) : s = 0 for e in self.entries : s += e.spaceUsed() return s def append( self, entry ) : """Appends a LogEntry instance to the container.""" self.entries.append( entry ) def analyze( self ) : """Analyze this transaction and cache the results""" for entry in self.entries : self.analyzeEntry( entry ) def analyzeEntry( self, entry ) : """Analyze a single entry. Subclasses should enhance""" # every entry has an id self.id = self.id or entry.id # get first timestamp self.timestamp = self.timestamp or entry.timestamp # some entries name recipients entry.appendRecipients( self.recipients ) self.senderEmail = self.senderEmail or entry.fromAddress() # note the server self.server = self.server or entry.server # collect all the 5xx error numbers entry.appendError5Response( self.err5s ) def purge( self ) : """Delete all entries unless they're reportable""" if self.isReportable() : self.entries = filter( lambda e : e.isReportable(), self.entries ) else : self.entries = [] def hasReportableError5Response( self ) : """Returns True if any entry has an error response that needs detail""" return len( self.err5s ) > 0 def isReportable( self ) : return self.hasReportableError5Response() and not self.isSpam() def report( self ) : """Report on the transaction, if its interesting""" if self.isReportable() : # outgoing needs the target server servername = self.servernameForHeader() if servername != "" : servername += " " rpt = "%s-%s %s %s%s\n" % (self.TransactionKey, self.id, self.timestamp, servername, self.targetIPAddr() ) rpt += self.reportDetails() return rpt def servernameForHeader( self ) : return "" def appendSpammers( self, spamlist ) : """Collect list of spam senders""" #default pass def appendUnknowns( self, unknowns ) : """Collect list of unknown recipients""" #default pass def appendSpamees( self, spamees ) : """Collect list of spammed addresses""" #default pass def isSpam( self ) : """Return whether or not this transaction was a spam""" #default return False def isSpewing( self ) : return self.num5Errors > 1 def appendBlacklists( self, blacklists ) : """Collect list of blacklists""" #default pass def appendSpamtraps( self, spamtraps ) : """Collect list of spamtraps""" #default pass def appendContentRejections( self, contentRejections ) : """Collect list of content rejections""" #default pass def appendRelayRejections( self, relayRejections ) : """Collect list of relay rejections""" #default pass def appendReport( self, reports ) : """Append my report to the approprite section of reports""" rpt = self.report() if rpt : reports[ self.TransactionKey ] += "\n" + rpt class SMTPOutTransaction( LogTransaction ) : """Outgoing SMTP transaction""" TransactionKey = "SMTP" # for reporting, the commands and their normalized equivalents commands = {"MAIL FROM": "FROM", "RCPT TO": "TO"} bytesRegex = sre.compile( "SIZE=(\d+)$" ) sentRegex = sre.compile( "^\[\d+\] sent to" ) IPAddrRegex = sre.compile( "connecting to (\[.+\])$" ); # calc max len of equivalent commandLen = 0 for cmd in commands.values() : commandLen = max( commandLen, len( cmd )) def __init__( self ) : LogTransaction.__init__( self ) self.bytesSent = 0 self.messageSent = False self.receiverIPAddr = None def analyze( self ) : """Gather the summary info""" LogTransaction.analyze( self ) # ensure that vars have values self.senderEmail = self.senderEmail or "(Unknown)" self.timestamp = self.timestamp or "(Unknown)" def analyzeEntry( self, entry ) : """Analyze a single entry""" LogTransaction.analyzeEntry( self, entry ) if self.bytesSent == 0 : bytesResult = self.bytesRegex.search( entry.text ) if bytesResult : self.bytesSent = int( bytesResult.group( 1 )) if not self.messageSent and self.sentRegex.match( entry.text ) : self.messageSent = True if not self.receiverIPAddr : result = self.IPAddrRegex.search( entry.text ) self.receiverIPAddr = result and result.group( 1 ) def servernameForHeader( self ) : return self.server def reportDetails( self ) : """Report on the transaction""" # ignore all sub-transactions that are not # in error. An error keeps only the preceeding From and To rptlines = [] lastFrom = -1 lastTo = -1 index = 0 for entry in self.entries : rpt = "" if entry.dataType == "cmd" : if self.commands.has_key( entry.command ) : rpt += (self.commands[entry.command] + ":").ljust( self.commandLen + 2 ) cmd = self.commands[entry.command] if cmd == "FROM" : rpt += entry.fromAddress() or self.senderEmail() # cut back past the last From if ( lastFrom != -1 ) : index = lastFrom rptlines = rptlines[0 : index] lastFrom = index else : # TO rpt += entry.toAddress() # cut back to the last To if ( lastTo != -1 ) : index = lastTo rptlines = rptlines[0 : index] lastTo = index elif entry.dataType == "rsp" and entry.responseCode : if entry.responseCode[0] == "5" : rpt += "ERR:".ljust( self.commandLen + 2 ) + entry.response # never cut back past an error lastFrom = -1 lastTo = -1 if rpt != '' : rptlines.append( rpt ) index += 1 # rip off any trailing non-error From's and To's cutpoint = index if lastFrom != -1 : cutpoint = min( cutpoint, lastFrom ) if lastTo != -1 : cutpoint = min( cutpoint, lastTo ) rptlines = rptlines[0 : cutpoint] # concatenate the remaining report lines rpt = "" for r in rptlines : rpt += r + "\n" return rpt def statistics( self, stats ) : """Add statistics for this transaction to the given stats""" if self.messageSent : stats["SMTP"] = stats.get( "SMTP", 0 ) + 1 if self.messageSent : stats["SMTP-Bytes"] = stats.get( "SMTP-Bytes", 0 ) + self.bytesSent stats["OutRecip"] = stats.get( "OutRecip", 0 ) + len( self.recipients ) return stats def targetIPAddr( self ) : """Return the IP Address of the answering mailserver""" if self.receiverIPAddr : return self.receiverIPAddr return "(IPAddress Unknown)" class SMTPInTransaction( LogTransaction ) : """Incoming SMTP transaction""" TransactionKey = "SMTPI" # for reporting, the commands and their normalized equivalents commands = {"HELO": "HELO", "EHLO": "HELO", "MAIL FROM": "FROM", "RCPT TO": "TO"} # calc max len of equivalent commandLen = 0 for cmd in commands.values() : commandLen = max( commandLen, len( cmd )) bytesRegex = sre.compile( "^\[\d+\] received, (\d+) bytes$" ) senderHostRegex = sre.compile( "cmd: (?:HELO|EHLO) *(.*)$", sre.I ) senderIPAddrRegex = sre.compile( "got connection on \[.+\]$" ) IPARegex = sre.compile( "\[\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\]" ) hostNameCheckRegex = sre.compile( "cmd: MAIL FROM", sre.I ) blacklistRegex = sre.compile( " *your host \[(.+)\] is blacklisted(?: by (.+))?\. Send your questions to" ) spamtrapRegex = sre.compile( "a message from \[.+\] addresses the spamtrap '(.+)'$" ) contentRegex = sre.compile( "message content \(\d+ bytes\) rejected: '(.+)' found$" ) def __init__( self ) : LogTransaction.__init__( self ) # our summary vars. Filled in by separate pass self.isSpamtrapped = False self.isBlacklisted = False self.hostRejected = False self.contentRejected = False self.senderHost = None self.senderIPAddr = None self.isSpewing = False self.relayRejected = False self.hostResolves = False self.unknownRecipients = [] self.spamtrapName = None self.rejector = None self.numBytes = 0 self.localhost = False self.numGoodMsgs = 0 def analyze( self ) : """Gather the summary info""" LogTransaction.analyze( self ) # ensure that vars have values self.senderHost = self.senderHost or "(Unknown)" self.senderIPAddr = self.senderIPAddr or "(IP Address Unknown)" def analyzeEntry( self, entry ) : """Analyze a single entry""" LogTransaction.analyzeEntry( self, entry ) if not self.isSpamtrapped and not self.isBlacklisted : result = self.spamtrapRegex.match( entry.text ) if result : self.spamtrapName = result.group( 1 ) self.rejector = "Spamtrap" self.isSpamtrapped = True if entry.responseCode == "591" : self.isBlacklisted = True rm = self.blacklistRegex.search( entry.text ) self.rejector = rm and rm.group( 2 ) or "CGP" if entry.responseCode == "572" : self.hostRejected = True self.rejector = "Host Rejected" if not self.contentRejected : contentResult = self.contentRegex.match( entry.text ) if contentResult : self.contentRejected = True self.rejector = contentResult.group( 1 ) if entry.responseCode == "571" : self.relayRejected = True if not self.senderHost : rm = self.senderHostRegex.match( entry.text ) if rm : name = rm.group( 1 ) if name == "" : name = "(Blank Host Name)" self.senderHost = name if not self.senderIPAddr and self.IPARegex.match( entry.server ) : self.senderIPAddr = entry.server self.localhost = self.senderIPAddr == "[127.0.0.1]" if not self.hostResolves and self.hostNameCheckRegex.match( entry.text ) : self.hostResolves = entry.server == self.senderHost if entry.responseCode in ["573", "550", "571"] : self.unknownRecipients.append( entry.response.split()[0].lower() ) if self.numBytes == 0 : bytesResult = self.bytesRegex.match( entry.text ) if bytesResult : self.numBytes = int( bytesResult.group( 1 )) self.numGoodMsgs += 1 def hasReportableError5Response( self ) : """Returns True if any entry has an error response that needs detail""" for x in self.err5s : if x not in boring5errs : return True if reportSpews and self.err5s.count( '501' ) > 1 : return True return False def reportDetails( self ) : """Report on the transaction""" badCmdCount = 0 badCmdText = "" rpt = "" for entry in self.entries : if entry.dataType == "cmd" : if self.commands.has_key( entry.command ) : cmd = self.commands[entry.command] rpt += (cmd + ":").ljust( self.commandLen + 2 ) if cmd == "HELO" : rpt += self.senderHost elif cmd == "TO" : rpt += entry.toAddress() else : # FROM rpt += self.senderEmail rpt += "\n" elif entry.dataType == "rsp" and entry.responseCode : if entry.responseCode == "501" : # unknown command badCmdCount += 1 badCmdText = entry.response elif entry.responseCode == "591" : # blacklisted rm = self.blacklistRegex.search( entry.response ) if rm : mtxt = "host " + rm.group( 1 ) + " is blacklisted" if rm.group( 2 ) : mtxt += " by " + rm.group( 2 ) else : mtxt = entry.response rpt += "ERR:".ljust( self.commandLen + 2 ) + entry.responseCode + " " + mtxt + "\n" elif entry.responseCode == "597" : # spamtrap rpt += "ERR:".ljust( self.commandLen + 2 ) + entry.responseCode + " " + "Spamtrap\n" elif entry.responseCode == "550" : # unknown recipient rpt += "ERR:".ljust( self.commandLen + 2 ) + entry.responseCode + " " + entry.response + "\n" elif entry.responseCode == "579" : # content rejected rpt += "ERR:".ljust( self.commandLen + 2 ) + entry.responseCode + " Content rejected - " + self.rejector + "\n" elif entry.responseCode[0] == "5" : # all other errors rpt += "ERR:".ljust( self.commandLen + 2 ) + entry.responseCode + " " + entry.response + "\n" if badCmdCount > 1 : rpt += "SPEW:".ljust( self.commandLen + 2 ) + "Sender is spewing (" + `badCmdCount` + ")\n" elif badCmdCount == 1 : rpt += "ERR:".ljust( self.commandLen + 2 ) + badCmdText + "\n" return rpt def statistics( self, stats ) : """Add statistics for this transaction to the given stats""" # keep local transfers out of main stats if self.localhost : stats["SMTPI-I"] = stats.get( "SMTPI-I", 0 ) + 1 else : stats["SMTPI"] = stats.get( "SMTPI", 0 ) + 1 stats["SMTPI-Bytes"] = stats.get( "SMTPI-Bytes", 0 ) + self.numBytes stats["InRecip"] = stats.get( "InRecip", 0 ) + len( self.recipients ) stats["SMTPI-Msgs"] = stats.get( "SMTPI-Msgs", 0 ) + self.numGoodMsgs if self.isBlacklisted : stats["Blacklists"] = stats.get( "Blacklists", 0 ) + 1 if self.isSpamtrapped : stats["Spamtraps"] = stats.get( "Spamtraps", 0 ) + 1 if self.hostRejected : stats["HostRejects"] = stats.get( "HostRejects", 0 ) + 1 if self.contentRejected : stats["ContentRejects"] = stats.get( "ContentRejects", 0 ) + 1 if self.relayRejected : stats["RelayRejects"] = stats.get( "RelayRejects", 0 ) + 1 if self.isSpewing : stats["SenderSpewing"] = stats.get( "SenderSpewing", 0 ) + 1 return stats def appendSpammers( self, spamlist ) : """Collect list of spam senders""" if self.isSpam() : spammer = Spammer( self.senderEmail, self.senderHost, self.hostResolves, \ self.senderIPAddr, self.recipients, self.rejector ) spamlist.append( spammer ) def appendUnknowns( self, unknowns ) : """Collect list of unknown recipients from non-spammers""" if not self.isSpam() : for unk in self.unknownRecipients : unknowns[unk] = unknowns.get( unk, 0 ) + 1 def isSpam( self ) : """Return whether or not this transaction was a spam""" return self.isBlacklisted or self.isSpamtrapped or self.hostRejected def appendBlacklists( self, blacklists ) : """Collect list of blacklists""" if self.isBlacklisted : blacklists[self.rejector] = blacklists.get( self.rejector, 0 ) + 1 def appendSpamtraps( self, spamtraps ) : """Collect list of spamtraps""" if self.isSpamtrapped : spamtraps[self.spamtrapName] = spamtraps.get( self.spamtrapName, 0 ) + 1 def appendContentRejections( self, contentRejections ) : """Collect list of content rejections""" if self.contentRejected : key = reduce( lambda a, b : a + ', ' + b, self.recipients) + ": " + self.rejector contentRejections[key] = contentRejections.get( key, 0 ) + 1 def appendRelayRejections( self, relayRejections ) : """Collect list of relay rejections""" if self.relayRejected : for relay in self.recipients : relayRejections[relay] = relayRejections.get( relay, 0 ) + 1 def targetIPAddr( self ) : return self.senderIPAddr class CGPLogReporter : # matches lines from the log lineRegex = sre.compile( "^(\d\d:\d\d:\d\d\.\d\d) \d ([A-Z]+)(?:-(\d+))?(?:\(([^)]+)\))? (.+)$" ) systemLineRegex = sre.compile( "^(\d\d:\d\d:\d\d\.\d\d) \d SYSTEM [\d\.]+ started" ) # all the transactions, keyed by TYPE-ID-SEQID (e.g. SMPTI-12345-1) transactions = {} def __init__( self ) : self.sequenceID = 0 def spaceUsed( self ) : s = 0 for k in self.transactions : s += len( k ) + self.transactions[k].spaceUsed() return s spaceUsed = classmethod(spaceUsed) def parse( self, line ) : """Creates the appropriate subclass of LogEntry, or None.""" line = line.rstrip() regMatch = self.lineRegex.match( line ) # ignore non-matching lines if not regMatch : return None groups = regMatch.groups() lineType = groups[1] lineID = groups[2] if lineType == "SMTP" and lineID != None : return SMTPOutEntry( groups, line, self.sequenceID ) if lineType == "SMTPI" and lineID != None : return SMTPInEntry( groups, line, self.sequenceID ) # if this is a restart line, bump the sequenceID to compensate # for the lineID numbers restarting rm = self.systemLineRegex.match( line ) if rm : self.sequenceID += 1; # ignore everything else return None def munchFile( self, input ) : """Parses the log file, creates objects representing its pieces, and analyzes them.""" if testing : print "Munching file" while True : line = input.readline() if not len( line ) : break try: entry = self.parse( line ) if not entry : continue LogTransaction.AddEntry( entry, self.transactions ) except ValueError: # line is invalid print "Invalid Log Line: " + line pass def generateReport( self ) : """Returns a string containing the summary""" # the output string outstring = "" # define the stats output stats_display = (("SMTP", "Outgoing SMTP"), ("OutRecip", "Outgoing Recipients"), ("SMTP-Bytes", "Outgoing bytes"), ("SMTPI", "Incoming SMTP"), ("InRecip", "Incoming Recipients"), ("SMTPI-Msgs", "Messages accepted"), ("SMTPI-Bytes", "Bytes accepted"), ("SMTPI-I", "Localhost SMTPI"), ("Blacklists", "Blacklisted Spams"), ("Spamtraps", "Spamtraps"), ("HostRejects", "Hosts Rejected"), ("RelayRejects", "Relays Rejected"), ("ContentRejects", "Rejected Contents"), ("SenderSpewing", "Senders Spewing"), ("TotalBytes", "Total Bytes Xfrd")) stats_len = 0 for a in stats_display: stats_len = max( stats_len, len( a[1] )) stats = {} spammers = [] unknowns = {} blacklists = {} spamtraps = {} contentRejections = {} relayRejections = {} unclosed = 0 reports = { "SMTP" : "", "SMTPI" : "" } transactions = self.transactions.values() transactions.sort( lambda a, b : cmp( a.timestamp, b.timestamp )) if testing : print "Creating report" for trans in transactions : if not trans.closed : unclosed += 1 continue try: stats = trans.statistics( stats ) trans.appendSpammers( spammers ) trans.appendUnknowns( unknowns ) trans.appendBlacklists( blacklists ) trans.appendSpamtraps( spamtraps ) trans.appendContentRejections( contentRejections ) trans.appendRelayRejections( relayRejections ) trans.appendReport( reports ) except: pass if testing and unclosed > 0 : print "There were %d unclosed transactions" % unclosed if testing : print "Writing report to file" if len( stats ) : outstring += "\n--- Statistics ---\n\n" # get total bytes transferred totalBytes = stats.get( "SMTP-Bytes", 0 ) + stats.get( "SMTPI-Bytes", 0 ) stats["TotalBytes"] = totalBytes # calc number alignment maxval = 0 for item in stats_display : key = item[0] if stats.has_key( key ) : maxval = max( maxval, stats[key] ) # account for commas numChars = len( commaize( str( maxval ))) for key, val in stats_display : if stats.has_key( key ) : outstring += (val + ":" ).ljust( stats_len + 2 ) + \ commaize( str( stats[key] )).rjust( numChars + 2 ) + "\n" if len( blacklists ) : outstring += "\n--- Blacklist Hits ---\n\n" keys = blacklists.keys() keys.sort() for key in keys : val = blacklists[key] outstring += key + ": " + str( val ) + "\n" if len( spamtraps ) : outstring += "\n--- Spamtrap Hits ---\n\n" keys = spamtraps.keys() keys.sort() for key in keys : val = spamtraps[key] outstring += key + ": " + str( val ) + "\n" if len( spammers ) : outstring += "\n--- Spammed Addresses ---\n\n" spamees = {} for s in spammers : if s.recipients : for r in s.recipients : spamees[r] = spamees.get( r, 0 ) + 1 keys = spamees.keys() keys.sort() for key in keys : val = spamees[key] addendum = "" if val > 1 : addendum = " - " + str( val ) + " msgs" outstring += str( key ) + addendum + "\n" if len( relayRejections ) : outstring += "\n--- Relay Rejections ---\n\n" keys = relayRejections.keys() keys.sort() for key in keys : val = relayRejections[key] addendum = "" if val > 1 : addendum = " - " + str( val ) + " msgs" outstring += str( key ) + addendum + "\n" if len( contentRejections ) : outstring += "\n--- Content Rejections ---\n\n" keys = contentRejections.keys() keys.sort() for key in keys : val = contentRejections[key] addendum = "" if val > 1 : addendum = " - " + str( val ) + " msgs" outstring += str( key ) + addendum + "\n" if len( unknowns ) : outstring += "\n--- Unknown Local Recipients ---\n\n" keys = unknowns.keys() keys.sort() for key in keys : val = unknowns[key] addendum = "" if val > 1 : addendum = " - " + str( val ) + " msgs" outstring += str( key ) + addendum + "\n" for key, val in stats_display : if reports.has_key( key ) : rpts = reports[key] if len( rpts ) : outstring += "\n--- " + val + " Errors ---\n" outstring += rpts if len( spammers ) : outstring += "\n--- Spammers ---\n\n" # collect by domain domains = {} # collect responsible rejectors rejectors = {} # collect target email addresses recipients = {} for s in spammers : # collect spams for this domain dom = s.domain.lower() dlist = domains.get( dom, [] ) dlist.append( s ) domains[dom] = dlist # collect rejectors for this spam domain dombl = rejectors.get( dom, [] ) if s.rejector and not s.rejector in dombl : dombl.append( s.rejector ) rejectors[dom] = dombl # collect recipient addresses for this domain recip = recipients.get( dom, [] ) if ( s.recipients ) : for r in s.recipients : if not r in recip : recip.append( r ) recipients[dom] = recip # report by sorted domain keys = domains.keys() keys.sort() for key in keys : slimeballs = domains[key] blockers = rejectors[key] blockertext = "" if len( blockers ) : blockertext = " (" docomma = False for b in blockers : if docomma : blockertext += ", " docomma = True blockertext += b blockertext += ")" recips = recipients[key] reciptext = "" if len( recips ) : reciptext = ": " docomma = False for r in recips : if docomma : reciptext += ", " docomma = True reciptext += r outstring += key + blockertext + reciptext + "\n" # condense spammers at this domain by string sb = {} for s in slimeballs : sb[s.desc] = sb.get( s.desc, 0 ) + 1 # report by sorted descriptions descs = sb.keys() descs.sort( lambda a, b: cmp( a.lower(), b.lower() )) for d in descs : numspams = sb[d] tag = "" if numspams > 1 : tag += " (%d)" % (numspams) outstring += " %s%s\n" % (d, tag ) outstring += "\n" return outstring # global functions def commaize( valstr ) : """Adds commas to integers for better readability""" ilist = list( valstr ) ilist.reverse() olist = [] for i in range( len( ilist )) : if i % 3 == 0 and i > 2 : olist.append( "," ) olist.append( ilist[i] ) olist.reverse() return string.join( olist, "" ) def help() : """Prints the help description.""" print "Usage: " + sys.argv[0] + " [-t inputfile|days back]" def myMain( argv ) : """Main routine. With no args, works on yesterday's log. An arg is taken to be the day offset (1 = yesterday)""" global testing, logdir, useBase64, boring5errs doHelp = False if len( argv ) == 3 and argv[1] == "-t" : inputfilename = argv[2] testing = True elif len( argv ) == 2 : try : dayoffset = int( argv[1] ) except ValueError : doHelp = True elif len( argv ) == 1 : dayoffset = 1 else : doHelp = True if doHelp : help() return # ensure boring errors are strings, not numbers boring5errs = [str( x ) for x in boring5errs] # open input log file if testing : logname = basename( inputfilename ) # remove .log if there nameparts = splitext( logname ) if nameparts[1] == "log" : logname = nameparts[0] logdir = dirname( abspath( inputfilename )) + '/' else : logtime = time.time() - 60 * 60 * 24 * dayoffset logtime = time.localtime( logtime ) logname = time.strftime( "%Y-%m-%d", logtime ) # for reporting fullLogname = logname + ".log" fullLognameWithPath = logdir + fullLogname # look for other log files from the same day # but cut for being too big. Successive file names are formatted like # yyyy-mm-dd_hh-mm.log fileRegex = sre.compile( logname + "_\d\d-\d\d\.log" ) filenames = os.listdir( logdir ) logfilenames = [] for f in filenames : if fileRegex.match( f ) : logfilenames.append( f ) # ensure they are in ascending order logfilenames.sort() # the base log is first logfilenames.insert( 0, logname + ".log" ) reporter = CGPLogReporter() # make note about how many parts there were partAddendum = "" if len( logfilenames ) > 1 : partAddendum = ", in %d parts" % (len( logfilenames )) # read all the log parts for fname in logfilenames : fnamewithpath = logdir + fname try : inputFile = file( fnamewithpath, "r" ) except IOError, reason : print "Error: " + str( reason ) return if testing : print "Reading file '%s'" % (fname) reporter.munchFile( inputFile ) inputFile.close() # open output file if testing : outfilename = fullLognameWithPath + ".summary" fullOutfilename = outfilename else : outfilename = submitdir + "ls" + str( time.time() ) + "-" + str( os.getpid() ) fullOutfilename = outfilename + ".tmp" try : outfile = file( fullOutfilename, "w" ) except IOError : print "Can't open output file " + fullOutfilename return if not testing : # add the mail headers first outfile.write( "To: " + reportToAddress + "\n" ) outfile.write( "From: " + reportFromAddress + "\n" ) outfile.write( "Subject: CGP Log Summary for " + logname + "\n" ) if useBase64 : outfile.write( "Content-Transfer-Encoding: base64\n" ) outfile.write( "\n" ) # save all this as a string so that we can base64 encode it outstring = "" outstring += "Summary of file: " + fullLogname + partAddendum + "\n" outstring += "Generated " + time.asctime() + "\n" outstring += reporter.generateReport() if useBase64 : outstring = base64.encodestring( outstring ) outfile.write( outstring ) outfile.close() if not testing : # rename output file to submit it try : os.rename( outfilename + ".tmp", outfilename + ".sub" ) except OSError : print "Can't rename mail file to " + outfilename + ".sub" # simplify testing def runtest( logname ) : myMain( ["CGPLogSummary.py", "-t", logname] ) # main program if __name__ == '__main__': # executed in main level - not imported myMain( sys.argv )