Miscellany

public python v1 · immutable
#333384
·published 2007-01-30 21:42 UTC
infinity = 1e99999999def alphaBetaWithMemory(root, maxDepth, alpha=None, beta=None, memory=None):    '''Search game to determine best action; use alpha-beta pruning.    This version cuts off search and uses an evaluation function.        Based on algorithms described in http://www.cs.vu.nl/~aske/mtdf.html'''    player = root.turn        if memory is None:        memory = {}            def retrieve(node, alpha, beta):        '''Get memory.'''        nodeKey = hash(node)        if nodeKey in memory:            lowerbound = memory[nodeKey].get('lowerbound', -infinity)            upperbound = memory[nodeKey].get('upperbound', infinity)            if lowerbound >= beta:                raise 'return', lowerbound            if upperbound <= alpha:                raise 'return', upperbound            alpha = max(alpha, lowerbound)            beta = min(beta, upperbound)        return alpha,beta            def store(node, alpha, beta, v):        '''Save bounds in memory.'''        nodeKey = hash(node)        memory[nodeKey] = memory.get(nodeKey, {})        if v <= alpha:            # Fail low result implies an upper bound.            memory[nodeKey]['upperbound'] = v # store upperbound        if v > alpha and v < beta:            # Found an accurate minimax value.            # This will not occur if called with zero window.            memory[nodeKey]['lowerbound'] = v # store lowerbound            memory[nodeKey]['upperbound'] = v # store upperbound        if v >= beta:            # Fail high result implies a lower bound.            memory[nodeKey]['lowerbound'] = v # store lowerbound        def is_terminal(node, depth):        '''The default test cuts off at depth d or at a terminal state.'''        return depth > maxDepth or node.isGameOver()        def heuristic_value(node):        '''Returns heuristic value of game relative to the turn in the root game.'''        score1,score2 = node.getScore()        score = score1-score2        if player == node.color2:            score *= -1        return score    def max_value(node, alpha, beta, depth):        try:            alpha,beta = retrieve(node, alpha, beta)        except 'return', bound:            return bound        if is_terminal(node, depth):            v = heuristic_value(node)        else:            v = -infinity; a = alpha; # save original alpha value            for nextNode in node.getNextNodes():                if v >= beta:                    break                v = max(v, min_value(nextNode, a, beta, depth+1))                a = max(a, v)        store(node, alpha, beta, v)        return v    def min_value(node, alpha, beta, depth):        try:            alpha,beta = retrieve(node, alpha, beta)        except 'return', bound:            return bound        #print 'memory:',len(memory)        if is_terminal(node, depth):            v = heuristic_value(node)        else:            v = +infinity; b = beta; # save original beta value            for nextNode in node.getNextNodes():                if v <= alpha:                    break                v = min(v, max_value(nextNode, alpha, b, depth+1))                b = min(b, v)        store(node, alpha, beta, v)        return v    # Evaluate all choices.    if alpha is None:        alpha = -infinity    if beta is None:        beta = infinity    choices = map(lambda node: (min_value(node, alpha, beta, 0), node.actionHistory[-1]),                  root.getNextNodes())    #print 'choices:',sorted(choices, reverse=True)        # Chose best action.    score,action = max(choices)        return score,action