#!/usr/bin/python# Title:# crm114.py## Description:# interface between tpblue and CRM 114. Handles train as good,# train as bad, and scoring.## Copyright:# Eric S. Johansson (esj@harvee.org)# BSD License: see LICENSE or http://www.opensource.org/licenses/bsd-license.php## Requires:#import emailfrom email.Errors import BoundaryError,HeaderParseError, MessageError import binasciiimport sysimport tracebackimport subprocess#from esjtools.simple_locker import simple_lockerimport syslog#from tpblue.log import log#from tpblue import tpblue_utils# from tpblue import brown_utils#from tpblue import common_services#from tpblue import message_pool####### just code #####def dump_stack(preamble="default"): etype, value, tb = sys.exc_info() exception_strings = traceback.format_exception(etype, value, tb) for i in exception_strings: syslog.syslog("%s: %s"%(preamble,i)) del tb sys.last_traceback = Noneclass CRM114 (object): def __init__(self, common_CRM, user_CRM): """remember where everything is located """ self.common_CRM = common_CRM self.user_CRM = user_CRM # print common_CRM, user_CRM def build_crm_command(self, command_type): """build a complete crm114 command line """ command_table = { "stats": "--stats_only", "spam": "--learnspam", "nonspam": "--learnnonspam", } # first string is path to normalizemime. # 2ed string is path to crm file. # 3ed is command, # 4th is user path # 5th is error log command_list = [self.common_CRM, command_table[command_type], "--fileprefix=%s"%self.user_CRM, "-u /usr/local/crm/" ] return command_list # ['/usr/local/crm/mailfilter.crm', '--stats_only', '--fileprefix=/var/tpblue/esj/crm114/', '-u /usr/local/crm/'] def CRM114_train(self, learn_as, message): result = True learn_command = {"red": "spam", "green":"nonspam", } command_list = self.build_crm_command(learn_command[learn_as]) try: # train as not spam p = subprocess.Popen(command_list, shell=False, bufsize=4000, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) message_text = message #.as_string (1) (crm114_output, crm114_error) = p.communicate(message_text[:10000]) # syslog.syslog("CRM 114_output %s"% crm114_output) # syslog.syslog("CRM 114_error %s"% crm114_error) except: # crm114 woofed again. fake a 0 return and return the message syslog.syslog("crm114 call failed") dump_stack() result = False return result def green_test(self, given_score): """score magic. set limit according to some state. force to outer limits to make training turn off """ result = given_score < 3 return result def red_test(self, given_score): """score magic. set limit according to some state. force to outer limits to make training turn off """ score_limit = -3 result = given_score >= score_limit return result def train_as_good(self, message, given_score): """ """ result = 0 # preseeed success try: score_limit = self.green_test(given_score) syslog.syslog( "train it mebby green %s %s"% (given_score, score_limit, )) if score_limit: # retrain as green because the user said so. result = self.CRM114_train("green", message) if not result: syslog.syslog("TAG failed to learn %s" % result) except Exception, error: syslog.syslog("train as good error: %s" % (error,)) dump_stack('TAG:') # invert the sense of result so it'll make more sense outside of # this method. True equal success on training, false equals # failure return not result def train_as_bad(self, message, given_score): """for a given user, train messages as bad. """ result = True #indicate failure. Yes, it's weird. try: score_limit = self.red_test(given_score) syslog.syslog( "train it mebby red %s %s"% (given_score, score_limit)) if score_limit: # syslog.syslog( "train it red %s %s %s %s"% (given_score, # score_limit, # self.red_count, # self.green_count)) # retrain as bad because the user said so. result = self.CRM114_train("red", message) if not result: syslog.syslog("TAB failed to learn %s" % result) except Exception, error: dump_stack('TAG:') syslog.syslog("train as bad error %s" % (error,)) return not result def score_message(self, message): """run the message through CRM114 return the priority code as well as modifying the message to contain the codes as a header""" try: command_list = self.build_crm_command("stats") message_text = message # print command_list p = subprocess.Popen(command_list, shell=False, bufsize=4000, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) message_text = message #.as_string (1) (crm114_output, crm114_error) = p.communicate(message_text[:10000]) # syslog.syslog("CRM 114_output %s"% crm114_output) # syslog.syslog("CRM 114_error %s"% crm114_error) try: # convert string to float if it dies, well, it's dead Jim #print "returned result from CRM 114 = %s" % str(result), result = float (crm114_output.rstrip()) except Exception, error: # blech dog food! syslog.syslog("bad score, error = %s %s"% (crm114_output, error)) syslog.syslog("command = %s"% (command_list)) result = -0.13 except (BoundaryError, HeaderParseError, binascii.Error ), error_message: syslog.syslog("score_message failed %s" % error_message ) return resultclass CRM114_balanced(CRM114): def __init__(self, common_CRM, user_CRM): CRM114.__init__(self, common_CRM, user_CRM) self.green_count = 0 self.red_count = 0 def green_test(self, given_score): """score magic. set limit according to some state. force to outer limits to make training turn off """ result = given_score < 3 # trying balanced scoring. since green is rarer, only bump count if result: self.green_count = self.green_count + 1 # print "green", self.count return result def red_test(self, given_score): """score magic. set limit according to some state. force to outer limits to make training turn off """ score_limit = 1000 # trying balanced scoring. since green is rarer, only bump count if self.red_count < self.green_count: score_limit = -3 result = given_score >= score_limit if result: self.red_count = self.red_count + 1 syslog.syslog("red limit test %s %s %s"%(result, given_score, score_limit)) return resultclass CRM114_var_limit(CRM114): def __init__(self, common_CRM, user_CRM): CRM114.__init__(self, common_CRM, user_CRM) self.green_count = 0 self.red_count = 0 self.starting_green_limit = 350 self.green_limit = 350 self.starting_red_limit = -350 self.red_limit = -350 def green_test(self, given_score): """score magic. set limit according to some state. force to outer limits to make training turn off """ result = given_score < self.green_limit # bump count and adjust green limit self.green_count = self.green_count + 1 self.green_limit = self.starting_green_limit - (self.green_count/3) if self.green_limit < 3: self.green_limit = 3 return result def red_test(self, given_score): """score magic. set limit according to some state. force to outer limits to make training turn off """ result = given_score >= self.red_limit # bump count and adjust red limit self.red_count = self.red_count + 1 self.red_limit = self.starting_red_limit - (self.red_count/3) if self.red_limit < 3: self.red_limit = 3 return result#################### test code ################## def main(): #development_path = "/usr/local/tpblue/src" #sys.path.insert(1,development_path) from tpblue import configuration from tpblue import tpblue_email #import rpdb2; rpdb2.start_embedded_debugger('1234', fAllowRemote = True) tpblue_ID = 'esj' config = configuration.configuration(tpblue_ID) crm_path = config["crm114_command"] user_path = config["crm114_base"] content_analyzer = CRM114(crm_path, user_path) #start of the main code try: # go grab message from standard in my_message = tpblue_email.tpblue_message() my_message.message_from_file(sys.stdin) my_message.meta['tpblue_ID'] = tpblue_ID my_message.meta['recipient_list'] = ['esj@harvee.org'] score = content_analyzer.score_message(my_message.message) # print "%s score is %s"% (tpblue_ID, score) except (StandardError),e: # exception was thrown, log it and return original message # spamtrap these messages marking them as exception fodder syslog.syslog("tpblue filter threw exception: %s" % e) # print ("tpblue filter threw exception: %s" % e) etype, value, tb = sys.exc_info() exception_strings = traceback.format_exception(etype, value, tb) for i in exception_strings: syslog.syslog(i) # print(i) del tb sys.last_traceback = None except MessageError,e: # this is highly likely an ill formed piece of spam. It did # not even create a message object. So, I say dump it on the # floor after logging the event. syslog.syslog("junk message threw exception: %s" % e) etype, value, tb = sys.exc_info() exception_strings = traceback.format_exception(etype, value, tb) for i in exception_strings: syslog.syslog(i) del tb sys.last_traceback = Noneif __name__ == "__main__": main()