希望打印的新的列表能够覆盖原来的列表显示，只用看create_environment函数,Python交流,编程语言专区,鱼C论坛

猪猪虾 发表于 2020-12-26 15:54:31

希望打印的新的列表能够覆盖原来的列表显示，只用看create_environment函数

本帖最后由猪猪虾于 2020-12-26 15:55 编辑

‘！！！！！！！！！！！
程序虽然长，但是都不用看，看create_environment（第一个定义的函数即可，只有这里是显示用的）
我想打印的列表就是env_list ，每次新生成的列表，覆盖掉原列表显示

’

import numpy as np
import pandas as pd
import time

def create_environment(state_x,state_y,episode,step_counter,table):
'''
就是创建游戏的简单画面，定义游戏的移动等
'''
env_list = [['#', '#', '#', '#'] ,
            ['#', '&', '&', '#'] ,
            ['#', '&', '@', '#'] ,
            ['#', '#', '#', '#'] ]
if state_x == 1000:
   interraction = 'episode %d: total step_step_move: %d'%(episode + 1,step_counter)
   print('\r{}'.format(interraction),end = '')
   time.sleep(2)
   print('\r             ',end = '')
else:
   env_list = 'o'
   #interraction = ''.join(env_list)
   #print('\r{}'.format(interraction),end = '')
   print("***************new way******************")
   print("\n")
   for i in range(4):
         print(env_list)
         print("\n")
   time.sleep(Fresh_time)

def get_evn_feedback(state_x,state_y,A):
'''
就是看环境会给我多少reward以及下一步的状态是什么
'''
if A == 'right':
   #判断有没有超出边界
   if state_x >= N_state_x or state_x < 0 or \
      state_y >= N_state_y or state_y < 0:
      R = -1
      if state_x >= N_state_x:
            state_x=0
      if state_x < 0:
            state_x= N_state_x-1
      if state_y >= N_state_y:
            state_y= 0
      if state_y < 0:
            state_y= N_state_y-1

   #踩到陷阱
   elif (state_x,state_y) in [(1,1),(1,2),(2,1)]:
         state_x = state_x
         state_y += 1
         R = -1

   #终点
   elif (state_x,state_y)== (2,2):
         state_x = 1000
         state_y = 1000
         R = 1

   #其他情况不给奖励
   else:
         R = 0
         if state_y - 1 <0:
            state_y = N_state_y-1
         else:
            state_y -= 1
         state_x = state_x


elif A == 'left':
   #判断有没有超出边界
   if state_x >= N_state_x or state_x < 0 or \
      state_y >= N_state_y or state_y < 0:
      R = -1
      if state_x >= N_state_x:
            state_x=0
      if state_x < 0:
            state_x= N_state_x-1
      if state_y >= N_state_y:
            state_y=0
      if state_y < 0:
            state_y= N_state_y-1

   #踩到陷阱
   elif (state_x,state_y) in [(1,1),(1,2),(2,1)]:
         state_x = state_x
         state_y -= 1
         R = -1

   #终点
   elif (state_x,state_y)== (2,2):
         state_x = 1000
         state_y = 1000
         R = 1

   #其他情况不给奖励
   else:
         R = 0
         state_x = state_x
         if state_y +1 >= N_state_y:
            state_y = 0
         else:
            state_y += 1


elif A == 'up':
   #判断有没有超出边界
   if state_x >= N_state_x or state_x < 0 or \
      state_y >= N_state_y or state_y < 0:
      R = -1
      if state_x >= N_state_x:
            state_x=0
      if state_x < 0:
            state_x= N_state_x -1
      if state_y >= N_state_y:
            state_y=0
      if state_y < 0:
            state_y= N_state_y -1

   #踩到陷阱
   elif (state_x,state_y) in [(1,1),(1,2),(2,1)]:
         state_x -= 1
         state_y = state_y
         R = -1

   #终点
   elif (state_x,state_y)== (2,2):
         state_x = 1000
         state_y = 1000
         R = 1

   #其他情况不给奖励
   else:
         R = 0
         if state_x - 1 < 0:
            state_x = N_state_x - 1
         else:
            state_x -= 1
         state_y = state_y




else:
         #判断有没有超出边界
         if state_x >= N_state_x or state_x < 0 or \
            state_y >= N_state_y or state_y < 0:
            R = -1
            if state_x >= N_state_x:
               state_x=0
            if state_x < 0:
               state_x= N_state_x -1
            if state_y >= N_state_y:
               state_y=0
            if state_y < 0:
               state_y= N_state_y -1

         #踩到陷阱
         elif (state_x,state_y) in [(1,1),(1,2),(2,1)]:
            state_x += 1
            state_y = state_y
            R = -1

         #终点
         elif (state_x,state_y)== (2,2):
            state_x = 1000
            state_y = 1000
            R = 1
         #其他情况不给奖励
         else:
            R = 0
            if state_x + 1 >= N_state_x:
               state_x = 0
            else:
               state_x += 1
            state_y = state_y

return state_x,state_y,R

def build_q_table(N_state_x,N_state_y):
table = np.zeros((N_state_x,N_state_y)) #表格的值都初始化为0
return table

#print(build_q_table(N_state,Action))

def choose_action(state_x,state_y,q_table):
'''
state = (N_state_x,N_state_y)
下一步动作的选取要根据当前的状态以及已有的q_table来进行选择
选择的时候分为两种情况：
1.在90%的情况下，根据当前的状态以及已有的q_table来选择最优的action
2.10%的情况下，随机选择一个action
'''

score_of_4_dir,score_of_4_direction = get_neibor_q(state_x,state_y,q_table)

#>0.9,也就是产生的随机数是在0.9~1之间，10%的情况下，随机选择下部一的动作
#全部为0的情况也随机选取下一步的状态
sign_all_0 = True
#判断上下左右四个分数是不是全部为0
for i in range(len(score_of_4_dir)):
   if score_of_4_dir != 0 and score_of_4_dir != -100:
      sign_all_0= False
      break

if np.random.uniform() > Greedy_plicy or sign_all_0 == True :
   #socre = -100的位置表示已经超出了游戏边界，这个方向不能走，三级选择的方向里面需要排除掉这些，
   #再进行随机选择
   action_choice =Action.copy()
   for i in range(4):#四个方向
         if score_of_4_dir == -100:
            action_choice = 'prohibit'

   #一直随机挑选，不能选择标记为none的方向
   action_name = ''
   while action_name == 'prohibit' or action_name == '':
         action_name = np.random.choice(action_choice)

else:
   #标记为none的地方对应的q表值为-100，不可能是最大值

   action_name = Action#返回state_of_action里面较大值的索引
return action_name

def get_neibor_q(state_x,state_y,q_table):
'''
获取当前状态的上下左右四个状态的q值以及对应的坐标
'''
#当前的点不一定能向四个方向前进，可能到边上了
#分数在列表里面的存储顺序默认为上下左右
position_of_4_direction = [,\
                           ,\
                           ,\
                           ]
score_of_4_dir = []
for i in range(len(position_of_4_direction)):

   ifposition_of_4_direction < 0 or position_of_4_direction >= N_state_x \
      or position_of_4_direction < 0 or position_of_4_direction >= N_state_y:
         position_of_4_direction= -100
         position_of_4_direction= -100#设置成这个值，表示这个方向不能走
         score_of_4_dir.append(-100)
   else:
#          print("position_of_4_direction[{}]={}".format(i,position_of_4_direction))
#          print("\n")
#          print("position_of_4_direction[{}]={}".format(i,position_of_4_direction))
#          print("\n")
         score_of_4_dir.append(q_table,position_of_4_direction])
return score_of_4_dir,position_of_4_direction

def reforcement_learning():
table = build_q_table(N_state_x,N_state_y)
for episode in range(max_episodes):
      state_x,state_y = 0,0

      step_counter =0
      create_environment(state_x,state_y,episode,step_counter,table)

      while state_x != 1000:
         A = choose_action(state_x,state_y,table)
         state_x_,state_y_,R = get_evn_feedback(state_x,state_y,A)
         q_predict = table

         if state_x_ != 1000 :
            score_of_4_dir,position_of_4_direction = get_neibor_q(state_x,state_y,table)
            position_of_4_direction = get_neibor_q(state_x,state_y,table)
            reality = R +discount_factor * max(score_of_4_dir)
         else:
            reality= R#discount_factor * max(table.iloc)不存在了，已经终结没有下一个状态了
            state_x,state_y= 1000,1000

         table = table + Learning_rate * (reality - q_predict)
         state_x,state_y = state_x_,state_y_

         create_environment(state_x,state_y,episode,step_counter+1,table)
         step_counter += 1
return table

if __name__== "__main__":
N_state_x = 4#状态的种类
N_state_y = 4
Action = ['up','down','left','right']
Greedy_plicy = 0.9 #多少比例择优选择action，其余比例下随机选择action
Learning_rate = 0.1 #学习率
discount_factor = 0.9#未来奖励的衰减值
max_episodes = 50 #只玩13回合就结束
Fresh_time = 0.3 #0.3秒显示走一步
np.random.seed(2)

q_table = reforcement_learning()
print(q_table)

tryhi 发表于 2020-12-27 01:39:26

不错，支持一下

页: [1]

鱼C论坛's Archiver

希望打印的新的列表能够覆盖原来的列表显示，只用看create_environment函数