grid = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 1, 1, 0]] goal = [0, len(grid[0])-1]
delta = [[-1, 0 ], [ 0, -1], [ 1, 0 ], [ 0, 1 ]]
delta_name = ['^', '<', 'v', '>']
success_prob = 0.5 failure_prob = (1.0 - success_prob)/2.0 collision_cost = 100 cost_step = 1
def stochastic_value(): value = [[1000 for row in range(len(grid[0]))] for col in range(len(grid))] policy = [[' ' for row in range(len(grid[0]))] for col in range(len(grid))]
policy[goal[0]][goal[1]] = '*'
for i in range(100): process(value, policy)
return value, policy
def process(value, policy): processed = [[False for row in range(len(grid[0]))] for col in range(len(grid))] x = goal[0] y = goal[1] g = 0 value[x][y] = 0 processed[x][y] = True
open = [[g, x, y]] while len(open) > 0: open.sort() open.reverse() next = open.pop() x = next[1] y = next[2] g = next[0]
if x != goal[0] or y != goal[1]: value[x][y], dir = calc_min_cost(grid, value, x, y) policy[x][y] = delta_name[dir]
for i in range(len(delta)): x2 = x + delta[i][0] y2 = y + delta[i][1] if x2 >= 0 and x2 < len(grid) and y2 >=0 and y2 < len(grid[0]): if not processed[x2][y2] and grid[x2][y2] == 0: g2 = value[x][y] + cost_step open.append([g2, x2, y2]) processed[x2][y2] = True
def calc_min_cost(grid, value, x, y): mindir = -1 mincost = 0 for dir in range(4): cost = calc_cost(grid, value, x, y, dir) if mindir < 0 or cost < mincost: mindir = dir mincost = cost return mincost, mindir
def calc_cost(grid, value, x, y, dir): return (get_dir_cost(grid, value, x, y, dir) * success_prob + get_dir_cost(grid, value, x, y, dir - 1) * failure_prob + get_dir_cost(grid, value, x, y, dir + 1) * failure_prob) + cost_step
def get_dir_cost(grid, value, x, y, dir): dir &= 3 x += delta[dir][0] y += delta[dir][1] if x < 0 or y < 0 or x >= len(value) or y >= len(value[x]) or grid[x][y] != 0: return collision_cost else: return value[x][y]
value, policy = stochastic_value() for line in value: print line for line in policy: print line
|