All pastes #1025922 Raw Edit

crm11t test interface

public python v1 · immutable
#1025922 ·published 2008-05-22 14:36 UTC
rendered paste body
#!/usr/bin/python# Title:# crm114.py## Description:# interface between tpblue and CRM 114.  Handles train as good,# train as bad, and scoring.## Copyright:# Eric S. Johansson (esj@harvee.org)# BSD License: see LICENSE or http://www.opensource.org/licenses/bsd-license.php## Requires:#import emailfrom email.Errors import BoundaryError,HeaderParseError, MessageError import binasciiimport sysimport tracebackimport subprocess#from esjtools.simple_locker import simple_lockerimport syslog#from tpblue.log import log#from tpblue import tpblue_utils# from tpblue import brown_utils#from tpblue import common_services#from tpblue import message_pool####### just code #####def dump_stack(preamble="default"):    etype, value, tb = sys.exc_info()    exception_strings = traceback.format_exception(etype, value, tb)    for i in exception_strings:        syslog.syslog("%s: %s"%(preamble,i))        del tb    sys.last_traceback = Noneclass CRM114 (object):    def __init__(self,  common_CRM, user_CRM):        """remember where everything is located """        self.common_CRM = common_CRM        self.user_CRM = user_CRM        # print common_CRM, user_CRM     def build_crm_command(self, command_type):        """build a complete crm114 command line        """        command_table = { "stats": "--stats_only",                          "spam": "--learnspam",                          "nonspam": "--learnnonspam",                          }        # first string is path to normalizemime.        # 2ed string is path to crm file.        # 3ed is command,        # 4th is user path        # 5th is error log            command_list = [self.common_CRM,                        command_table[command_type],                        "--fileprefix=%s"%self.user_CRM,                        "-u /usr/local/crm/"                        ]        return command_list    # ['/usr/local/crm/mailfilter.crm', '--stats_only', '--fileprefix=/var/tpblue/esj/crm114/', '-u /usr/local/crm/']    def CRM114_train(self, learn_as, message):        result = True        learn_command = {"red": "spam",                         "green":"nonspam",                         }        command_list = self.build_crm_command(learn_command[learn_as])        try:             # train as not spam            p = subprocess.Popen(command_list, shell=False, bufsize=4000,                                 stdin=subprocess.PIPE,                                 stdout=subprocess.PIPE,                                 stderr=subprocess.PIPE,                                 close_fds=True)            message_text = message  #.as_string (1)            (crm114_output, crm114_error) = p.communicate(message_text[:10000])                        # syslog.syslog("CRM 114_output %s"% crm114_output)            # syslog.syslog("CRM 114_error %s"% crm114_error)        except:            # crm114 woofed again.  fake a 0 return and return the message            syslog.syslog("crm114 call failed")            dump_stack()            result = False        return result    def green_test(self, given_score):        """score magic.  set limit according to some state.  force to outer        limits to make training turn off        """        result = given_score < 3        return result    def red_test(self, given_score):        """score magic.  set limit according to some state.  force to outer        limits to make training turn off        """        score_limit = -3        result = given_score >= score_limit        return result    def train_as_good(self, message,  given_score):        """ """        result = 0 # preseeed success        try:            score_limit = self.green_test(given_score)            syslog.syslog( "train it mebby green %s %s"% (given_score,                                                              score_limit,                                                              ))            if score_limit:                # retrain as green because the user said so.                result = self.CRM114_train("green", message)                        if not result:                    syslog.syslog("TAG failed to learn %s" % result)        except Exception, error:            syslog.syslog("train as good error: %s" % (error,))            dump_stack('TAG:')            # invert the sense of result so it'll make more sense outside of            # this method.  True equal success on training, false equals            # failure        return not result    def train_as_bad(self, message, given_score):        """for a given user, train messages as bad.        """        result = True #indicate failure.  Yes, it's weird.        try:            score_limit = self.red_test(given_score)            syslog.syslog( "train it mebby red %s %s"% (given_score,                                                            score_limit))            if score_limit:                # syslog.syslog( "train it red %s %s %s %s"% (given_score,                 #                                            score_limit,                 #                                            self.red_count,                #                                            self.green_count))                # retrain as bad because the user said so.                result = self.CRM114_train("red", message)                if not result:                    syslog.syslog("TAB failed to learn %s" % result)        except Exception, error:            dump_stack('TAG:')            syslog.syslog("train as bad error %s" % (error,))        return not result    def score_message(self, message):        """run the message through CRM114 return the priority code as well        as modifying the message to contain the codes as a header"""        try:            command_list = self.build_crm_command("stats")            message_text = message            # print command_list            p = subprocess.Popen(command_list, shell=False, bufsize=4000,                                 stdin=subprocess.PIPE,                                 stdout=subprocess.PIPE,                                 stderr=subprocess.PIPE,                                 close_fds=True)            message_text = message #.as_string (1)            (crm114_output, crm114_error) = p.communicate(message_text[:10000])            # syslog.syslog("CRM 114_output %s"% crm114_output)            # syslog.syslog("CRM 114_error %s"% crm114_error)            try:                # convert string to float if it dies, well, it's dead Jim                #print "returned result from CRM 114 = %s" % str(result),                result = float (crm114_output.rstrip())            except Exception, error:                # blech dog food!                syslog.syslog("bad score, error = %s %s"% (crm114_output, error))                syslog.syslog("command =  %s"% (command_list))                result = -0.13        except (BoundaryError, HeaderParseError, binascii.Error ), error_message:            syslog.syslog("score_message failed %s" % error_message )        return resultclass CRM114_balanced(CRM114):    def __init__(self, common_CRM, user_CRM):        CRM114.__init__(self, common_CRM, user_CRM)                self.green_count = 0        self.red_count = 0    def green_test(self, given_score):        """score magic.  set limit according to some state.  force to outer        limits to make training turn off        """        result = given_score < 3        # trying balanced scoring. since green is rarer, only bump count        if result: self.green_count = self.green_count + 1        # print "green", self.count        return result    def red_test(self, given_score):        """score magic.  set limit according to some state.  force to outer        limits to make training turn off        """        score_limit = 1000        # trying balanced scoring. since green is rarer, only bump count        if self.red_count < self.green_count:            score_limit = -3             result = given_score >= score_limit        if result: self.red_count = self.red_count + 1        syslog.syslog("red limit test %s %s %s"%(result, given_score, score_limit))        return resultclass CRM114_var_limit(CRM114):    def __init__(self, common_CRM, user_CRM):        CRM114.__init__(self, common_CRM, user_CRM)                self.green_count = 0        self.red_count   = 0        self.starting_green_limit = 350        self.green_limit = 350        self.starting_red_limit   = -350        self.red_limit   = -350    def green_test(self, given_score):        """score magic.  set limit according to some state.  force to outer        limits to make training turn off        """        result = given_score < self.green_limit                # bump count and adjust green limit        self.green_count = self.green_count + 1        self.green_limit = self.starting_green_limit - (self.green_count/3)        if self.green_limit < 3: self.green_limit = 3        return result    def red_test(self, given_score):        """score magic.  set limit according to some state.  force to outer        limits to make training turn off        """        result = given_score >= self.red_limit        # bump count and adjust red limit        self.red_count = self.red_count + 1        self.red_limit = self.starting_red_limit - (self.red_count/3)        if self.red_limit < 3: self.red_limit = 3        return result####################  test code ##################    def main():    #development_path = "/usr/local/tpblue/src"    #sys.path.insert(1,development_path)    from tpblue import configuration    from tpblue import tpblue_email    #import rpdb2; rpdb2.start_embedded_debugger('1234', fAllowRemote = True)    tpblue_ID = 'esj'    config = configuration.configuration(tpblue_ID)    crm_path = config["crm114_command"]    user_path = config["crm114_base"]    content_analyzer = CRM114(crm_path, user_path)    #start of the main code        try:        # go grab message from standard in        my_message = tpblue_email.tpblue_message()        my_message.message_from_file(sys.stdin)        my_message.meta['tpblue_ID'] = tpblue_ID        my_message.meta['recipient_list'] = ['esj@harvee.org']        score = content_analyzer.score_message(my_message.message)                                # print "%s score is %s"% (tpblue_ID, score)                except (StandardError),e:        # exception was thrown, log it and return original message        # spamtrap these messages marking them as exception fodder                syslog.syslog("tpblue filter threw exception: %s" % e)        # print ("tpblue filter threw exception: %s" % e)        etype, value, tb = sys.exc_info()        exception_strings = traceback.format_exception(etype, value, tb)        for i in exception_strings:            syslog.syslog(i)            # print(i)        del tb        sys.last_traceback = None    except MessageError,e:        # this is highly likely an ill formed piece of spam.  It did        # not even create a message object.  So, I say dump it on the        # floor after logging the event.         syslog.syslog("junk message threw exception: %s" % e)        etype, value, tb = sys.exc_info()        exception_strings = traceback.format_exception(etype, value, tb)        for i in exception_strings:            syslog.syslog(i)        del tb        sys.last_traceback = None            if __name__ == "__main__":    main()