|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
这个代码造成的MemoryError怎么修改呀 ai调不出来
import numpy as np
import random
import matplotlib.pyplot as plt
# 定义参数和常量
M = 1 # 市场需求函数的截距
m = 0.5 # 次级市场的价格
alpha = 1.2 # 高质量产品的价格溢价
theta = 0.5 # 厂商2的产品为高质量的概率
cA = 0.2 # 厂商1的边际成本
cB = 0.4 # 厂商2的边际成本
gamma = 0.9 # 未来奖励的折扣因子
epsilon = 0.1 # 探索率,用于epsilon-贪心策略
num_episodes = 100 # 训练的轮数
num_steps = 10 # 每轮的步数
# 定义奖励函数
def reward(state, action):
P, cA, cB = state
qA, qB = action
profitA = qA * (P - cA)
profitB = qB * (P - cB)
return profitA, profitB
# 定义转移函数
def transition(state, action):
P, cA, cB = state
qA, qB = action
quality = random.choices(["L", "H"], weights=[1 - theta, theta])[0]
if quality == "L":
next_P = M - qA - qB
else:
next_P = alpha * M - qA - qB
next_cA = cA
next_cB = random.uniform(0, M)
next_state = (next_P, next_cA, next_cB)
return next_state
# 定义状态空间和动作空间
state_space = []
for P in np.arange(0, M + 0.01, 0.1):
for cA in np.arange(0, M + 0.01, 0.1):
for cB in np.arange(0, M + 0.01, 0.1):
state_space.append((P, cA, cB))
action_space = np.arange(0, M + 0.01, 0.1)
# 初始化两个厂商的Q表
Q1 = {} # 厂商1的Q表
Q2 = {} # 厂商2的Q表
for state in state_space:
for action in action_space:
Q1[(str(state), str(action))] = 0
Q2[(str(state), str(action))] = 0
# 训练Q-learning算法
state_Ps = []
state_As = []
state_Bs = []
profits_A = []
profits_B = []
for episode in range(num_episodes):
# 随机初始化初始状态和动作(假设初始市场价格为M)
state_Ps.append(M)
state_cA = cA
state_cB = cB
state = (M, state_cA, state_cB)
action_A = random.choice(action_space)
action_B = random.choice(action_space)
action = (action_A, action_B)
total_rewardA = 0
total_rewardB = 0
for step in range(num_steps):
# epsilon-贪心策略选择动作
if random.random() < epsilon:
next_action_A = random.choice(action_space)
next_action_B = random.choice(action_space)
else:
if (str(state), str(action)) not in Q1:
Q1[(str(state), str(action))] = 0
if (str(state), str(action)) not in Q2:
Q2[(str(state), str(action))] = 0
max_qA = max(Q1.get((str(state), str(a)), 0) for a in action_space)
max_qB = max(Q2.get((str(state), str(a)), 0) for a in action_space)
next_action_A = max(Q1, key=lambda x: Q1[x])[1]
next_action_B = max(Q2, key=lambda x: Q2[x])[1]
next_state = transition(state, action)
next_P, next_cA, next_cB = next_state
# 执行动作,得到下一个状态和奖励
rewardA, rewardB = reward(state, action)
# 更新Q值
if (str(state), str(action)) not in Q1:
Q1[(str(state), str(action))] = 0
if (str(state), str(action)) not in Q2:
Q2[(str(state), str(action))] = 0
Q1[(str(state), str(action))] += alpha * (rewardA + gamma * max_qA - Q1[(str(state), str(action))])
Q2[(str(state), str(action))] += alpha * (rewardB + gamma * max_qB - Q2[(str(state), str(action))])
next_action = (next_action_A, next_action_B)
# 更新状态和动作
state = next_state
action = next_action
state_Ps.append(next_P)
state_As.append(next_cA)
state_Bs.append(next_cB)
total_rewardA += rewardA
total_rewardB += rewardB
# 保存每轮的利润
profits_A.append(total_rewardA)
profits_B.append(total_rewardB)
# 绘制市场价格、边际成本和利润的变化曲线
plt.subplot(3, 1, 1)
plt.plot(state_Ps)
plt.xlabel("Step")
plt.ylabel("Market Price")
plt.subplot(3, 1, 2)
plt.plot(state_As, label="Manufacturer A")
plt.plot(state_Bs, label="Manufacturer B")
plt.xlabel("Step")
plt.ylabel("Marginal Cost")
plt.legend()
plt.subplot(3, 1, 3)
plt.plot(profits_A, label="Manufacturer A")
plt.plot(profits_B, label="Manufacturer B")
plt.xlabel("Episode")
plt.ylabel("Profit")
plt.legend()
plt.tight_layout()
plt.show()
MemoryError通常是因为程序使用的内存超出了可用内存限制导致的。可以尝试以下几种方法来解决:
1. 适当减小数组的尺寸或分段处理数组,尽量将内存占用量降到可接受范围内。
2. 使用一些库函数来代替手动写循环和计算,比如使用numpy库的矩阵计算可以显著降低内存开销。
3. 增加物理内存或虚拟内存的限制,但这种方法可能会导致系统变慢或变得不稳定。
在这个代码中,使用了三个嵌套的for循环来生成状态空间,可能会导致内存消耗巨大。可以考虑使用其他方式生成状态空间,比如使用numpy.linspace()函数来代替for循环。
|
|