infinity = 1e99999999def alphaBetaWithMemory(root, maxDepth, alpha=None, beta=None, memory=None): '''Search game to determine best action; use alpha-beta pruning. This version cuts off search and uses an evaluation function. Based on algorithms described in http://www.cs.vu.nl/~aske/mtdf.html''' player = root.turn if memory is None: memory = {} def retrieve(node, alpha, beta): '''Get memory.''' nodeKey = hash(node) if nodeKey in memory: lowerbound = memory[nodeKey].get('lowerbound', -infinity) upperbound = memory[nodeKey].get('upperbound', infinity) if lowerbound >= beta: raise 'return', lowerbound if upperbound <= alpha: raise 'return', upperbound alpha = max(alpha, lowerbound) beta = min(beta, upperbound) return alpha,beta def store(node, alpha, beta, v): '''Save bounds in memory.''' nodeKey = hash(node) memory[nodeKey] = memory.get(nodeKey, {}) if v <= alpha: # Fail low result implies an upper bound. memory[nodeKey]['upperbound'] = v # store upperbound if v > alpha and v < beta: # Found an accurate minimax value. # This will not occur if called with zero window. memory[nodeKey]['lowerbound'] = v # store lowerbound memory[nodeKey]['upperbound'] = v # store upperbound if v >= beta: # Fail high result implies a lower bound. memory[nodeKey]['lowerbound'] = v # store lowerbound def is_terminal(node, depth): '''The default test cuts off at depth d or at a terminal state.''' return depth > maxDepth or node.isGameOver() def heuristic_value(node): '''Returns heuristic value of game relative to the turn in the root game.''' score1,score2 = node.getScore() score = score1-score2 if player == node.color2: score *= -1 return score def max_value(node, alpha, beta, depth): try: alpha,beta = retrieve(node, alpha, beta) except 'return', bound: return bound if is_terminal(node, depth): v = heuristic_value(node) else: v = -infinity; a = alpha; # save original alpha value for nextNode in node.getNextNodes(): if v >= beta: break v = max(v, min_value(nextNode, a, beta, depth+1)) a = max(a, v) store(node, alpha, beta, v) return v def min_value(node, alpha, beta, depth): try: alpha,beta = retrieve(node, alpha, beta) except 'return', bound: return bound #print 'memory:',len(memory) if is_terminal(node, depth): v = heuristic_value(node) else: v = +infinity; b = beta; # save original beta value for nextNode in node.getNextNodes(): if v <= alpha: break v = min(v, max_value(nextNode, alpha, b, depth+1)) b = min(b, v) store(node, alpha, beta, v) return v # Evaluate all choices. if alpha is None: alpha = -infinity if beta is None: beta = infinity choices = map(lambda node: (min_value(node, alpha, beta, 0), node.actionHistory[-1]), root.getNextNodes()) #print 'choices:',sorted(choices, reverse=True) # Chose best action. score,action = max(choices) return score,action