Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from types import ListType
- from types import FloatType
- W = [
- [' ', ' ', ' ', 'G'],
- [' ', '#', ' ', 'H'],
- [' ', ' ', ' ', ' ']
- ]
- Cost = 3
- def RewardOfCell(C):
- Result = -Cost
- if C == 'G':
- Result = 100
- elif C == 'H':
- Result = -100
- return Result
- Dir_North = 0
- Dir_East = 1
- Dir_South = 2
- Dir_West = 3
- ActionSet = [Dir_North, Dir_East, Dir_South, Dir_West]
- DirDeltas = [
- (0, -1), (1, 0), (0, 1), (-1, 0)
- ]
- def Dim(A):
- Z = A
- Result = ()
- while type(Z) == ListType:
- Result += (len(Z),)
- Z = Z[0]
- return Result
- def NewArrayOfDim(Dim):
- class tRef (object):
- pass
- def Fill(A, k):
- e = Dim[k]
- if k + 1 < len(Dim):
- for i in range(e):
- A.append([])
- Fill(A[i], k + 1)
- else:
- for i in range(e):
- A.append(0)
- if len(Dim) > 0:
- A = []
- Fill(A, 0)
- return A
- else:
- return []
- class tEnv (object):
- def __init__(self, World, PForward, DiscountFactor):
- self.World = World
- self.PForward = PForward
- self.DiscountFactor = DiscountFactor # 1 => no cost, 0.9 => 10% penalty
- def CellAt(Env, State):
- return Env.World[State[1]][State[0]]
- def RewardAt(Env, State):
- return RewardOfCell(CellAt(Env, State))
- def LeftFrom(Dir):
- return [Dir_West, Dir_North, Dir_East, Dir_South][Dir]
- def RightFrom(Dir):
- return [Dir_East, Dir_South, Dir_West, Dir_North][Dir]
- def BackFrom(Dir):
- return [Dir_South, Dir_West, Dir_North, Dir_East][Dir]
- def InitalValueMap(Env):
- Result = NewArrayOfDim(Dim(W))
- return Result
- def StateAfterAction(Env, State, Action):
- Result = State
- if CellAt(Env, State) in [' ']:
- D = Dim(Env.World)
- Delta = DirDeltas[Action]
- NewState = (
- max(0, min(D[1] - 1, State[0] + Delta[0])),
- max(0, min(D[0] - 1, State[1] + Delta[1]))
- )
- if CellAt(Env, NewState) != '#':
- Result = NewState
- return Result
- def ActionStates_LR(Env, State, Action):
- PLeft = 0.5 * (1.0 - Env.PForward)
- PRight = 0.5 * (1.0 - Env.PForward)
- Result = []
- if CellAt(Env, State) in [' ']:
- if Env.PForward > 0:
- Result.append((
- Env.PForward, StateAfterAction(Env, State, Action), Action
- ))
- if PLeft > 0:
- AltAction = LeftFrom(Action)
- Result.append((
- PLeft, StateAfterAction(Env, State, AltAction), AltAction
- ))
- if PRight > 0:
- AltAction = RightFrom(Action)
- Result.append((
- PRight, StateAfterAction(Env, State, AltAction), AltAction
- ))
- return Result
- def ActionStates_B(Env, State, Action):
- PBack = 1.0 - Env.PForward
- Result = []
- if CellAt(Env, State) in [' ']:
- if Env.PForward > 0:
- Result.append((
- Env.PForward, StateAfterAction(Env, State, Action), Action
- ))
- if PBack > 0:
- AltAction = BackFrom(Action)
- Result.append((
- PBack, StateAfterAction(Env, State, AltAction), AltAction
- ))
- return Result
- def ValueForAction(Env, V, State, Action):
- ASRecs = ActionStates_LR(Env, State, Action)
- Result = 0
- for Probability, NewState, EffectiveAction in ASRecs:
- Result += Probability * V[NewState[1]][NewState[0]]
- Result = Result * Env.DiscountFactor + RewardAt(Env, State)
- return Result
- def ValueOfState(Env, V, State):
- Result = None
- if CellAt(Env, State) not in ['#']:
- for Action in ActionSet:
- ActionValue = ValueForAction(Env, V, State, Action)
- if (Result == None) or (ActionValue > Result):
- Result = ActionValue
- return Result
- def UpdateValueAt(Env, V, State):
- x, y = State
- Result = ValueOfState(Env, V, (x, y))
- V[y][x] = Result
- return Result
- def UpdateValues(Env, V):
- D = Dim(Env.World)
- for y in range(D[0]):
- for x in range(D[1]):
- UpdateValueAt(Env, V, (x, y))
- def PolicyAt(Env, V, State):
- D = Dim(V)
- x, y = State
- Result = 'X'
- if CellAt(Env, State) in [' ']:
- BestValue = 0
- for Action in ActionSet:
- ActionValue = ValueForAction(Env, V, State, Action)
- if (Result == 'X') or (ActionValue > BestValue):
- BestValue = ActionValue
- Result = 'NESW'[Action]
- return Result
- def Policy(Env, V):
- D = Dim(V)
- Result = NewArrayOfDim(D)
- for y in range(D[0]):
- for x in range(D[1]):
- Result[y][x] = PolicyAt(Env, V, (x, y))
- return Result
- def PrintNice(A):
- D = Dim(A)
- for y in range(D[0]):
- PrefixStr = '[' if y == 0 else ' '
- SuffixStr = ']' if y + 1 == D[0] else ''
- LStr = ''
- for x in range(D[1]):
- Value = A[y][x]
- if type(Value) == FloatType:
- S = "%.4g" % (1.0 * Value)
- else:
- S = str(Value)
- LStr += S if x == 0 else ", " + S
- print PrefixStr + "[" + LStr + "]" + SuffixStr
- return
- Env = tEnv(W, 0.8, 1.0)
- V = InitalValueMap(Env)
- for i in range(500):
- UpdateValues(Env, V)
- PrintNice(V)
- PrintNice(Policy(Env, V))
- # 85.18 89.40 93.15 100
- # 81.43 ##### 68.37 -100
- # 77.21 73.46 69.56 47.39
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement