import GameEnvDDPG
import numpy as np
from DDPG1 import DDPGAgent1
import random, math
training_episodes = 10000
max_training_time = 200
target_score = 74
env = GameEnvDDPG.RacingEnv()
state_size = 22
action_size = 4
learning_rate = 0.001
tau = 0.001
gamma = 0.99
replay_buffer_size = 10000
batch_size = 64
agent = DDPGAgent1(state_size, action_size, learning_rate, gamma, tau, replay_buffer_size, batch_size)
episode_rewards = []
render_interval = 10
for episode in range(1, training_episodes + 1):
state = env.reset()
total_reward = 0
for t in range(max_training_time):
action = agent.act(state)
print("Action:", action)
next_state, reward, done, _ = env.step(action[0])
agent.replay_buffer.add(state, action, reward, next_state, done)
if len(agent.replay_buffer.buffer) > batch_size:
batch = agent.replay_buffer.sample(batch_size)
agent.learn(*batch)
total_reward += reward
state = next_state
if done:
break
episode_rewards.append(total_reward)
average_reward = np.mean(episode_rewards[-100:])
print(f"Episode {episode}/{training_episodes} - Total Reward: {total_reward:.2f} - Average Reward: {average_reward:.2f}")
if episode % 100 == 0:
agent.save_model(f"ddpg_episode_{episode}.h5")
if average_reward >= target_score:
print(f"Target average reward reached! (>= {target_score})")
break
if episode % render_interval == 0:
env.render()
agent.save_model("final_ddpg_model.h5")
env.close()
import pygame
class Wall:
def __init__(self, x1, y1, x2, y2):
self.x1 = x1
self.y1 = y1
self.x2 = x2
self.y2 = y2
def draw(self, win):
pygame.draw.line(win, (255,255,255), (self.x1, self.y1), (self.x2, self.y2), 5)
def getWalls():
walls = []
wall1 = Wall(12, 451, 15, 130)
wall2 = Wall(15, 130, 61, 58)
wall3 = Wall(61, 58, 149, 14)
wall4 = Wall(149, 14, 382, 20)
wall5 = Wall(382, 20, 549, 31)
wall6 = Wall(549, 31, 636, 58)
wall7 = Wall(636, 58, 678, 102)
wall8 = Wall(678, 102, 669, 167)
wall9 = Wall(669, 167, 600, 206)
wall10 = Wall(600, 206, 507, 214)
wall11 = Wall(507, 214, 422, 232)
wall12 = Wall(422, 232, 375, 263)
wall13 = Wall(375, 263, 379, 283)
wall14 = Wall(379, 283, 454, 299)
wall15 = Wall(454, 299, 613, 286)
wall16 = Wall(613, 286, 684, 238)
wall17 = Wall(684, 238, 752, 180)
wall18 = Wall(752, 180, 862, 185)
wall19 = Wall(862, 185, 958, 279)
wall20 = Wall(958, 279, 953, 410)
wall21 = Wall(953, 410, 925, 505)
wall22 = Wall(925, 505, 804, 566)
wall23 = Wall(804, 566, 150, 570)
wall24 = Wall(150, 570, 46, 529)
wall25 = Wall(46, 529, 12, 451)
wall27 = Wall(104, 436, 96, 161)
wall28 = Wall(96, 161, 122, 122)
wall29 = Wall(122, 122, 199, 91)
wall30 = Wall(199, 91, 376, 94)
wall31 = Wall(376, 94, 469, 100)
wall32 = Wall(469, 100, 539, 102)
wall33 = Wall(539, 102, 585, 121)
wall34 = Wall(585, 121, 585, 139)
wall35 = Wall(585, 139, 454, 158)
wall36 = Wall(454, 158, 352, 183)
wall37 = Wall(352, 183, 293, 239)
wall38 = Wall(293, 239, 294, 318)
wall39 = Wall(294, 318, 361, 357)
wall40 = Wall(361, 357, 490, 373)
wall41 = Wall(490, 373, 671, 359)
wall42 = Wall(671, 359, 752, 300) #
wall43 = Wall(752, 300, 812, 310)#
wall44 = Wall(812, 310, 854, 369)
wall45 = Wall(854, 369, 854, 429)
wall46 = Wall(854, 429, 754, 483)
wall47 = Wall(754, 483, 192, 489)
wall48 = Wall(192, 489, 104, 436)
walls.append(wall1)
walls.append(wall2)
walls.append(wall3)
walls.append(wall4)
walls.append(wall5)
walls.append(wall6)
walls.append(wall7)
walls.append(wall8)
walls.append(wall9)
walls.append(wall10)
walls.append(wall11)
walls.append(wall12)
walls.append(wall13)
walls.append(wall14)
walls.append(wall15)
walls.append(wall16)
walls.append(wall17)
walls.append(wall18)
walls.append(wall19)
walls.append(wall20)
walls.append(wall21)
walls.append(wall22)
walls.append(wall23)
walls.append(wall24)
walls.append(wall25)
walls.append(wall27)
walls.append(wall28)
walls.append(wall29)
walls.append(wall30)
walls.append(wall31)
walls.append(wall32)
walls.append(wall33)
walls.append(wall34)
walls.append(wall35)
walls.append(wall36)
walls.append(wall37)
walls.append(wall38)
walls.append(wall39)
walls.append(wall40)
walls.append(wall41)
walls.append(wall42)
walls.append(wall43)
walls.append(wall44)
walls.append(wall45)
walls.append(wall46)
walls.append(wall47)
walls.append(wall48)
return(walls)
import pygame
class Goal:
def __init__(self, x1, y1, x2, y2):
self.x1 = x1
self.y1 = y1
self.x2 = x2
self.y2 = y2
self.isactiv = False
def draw(self, win):
pygame.draw.line(win, (0,255,0), (self.x1, self.y1), (self.x2, self.y2), 2)
if self.isactiv:
pygame.draw.line(win, (255,0,0), (self.x1, self.y1), (self.x2, self.y2), 2)
# the file of shame
def getGoals():
goals = []
goal1 = Goal(0,200,120,200)
goal2 = Goal(0,100,120,150)
goal2_5 = Goal(0,0,150,130)
goal3 = Goal(120,0,170,120)
goal3_5 = Goal(200,0,200,120)
goal4 = Goal(270,0,270,110)
goal4_5 = Goal(350,0,350,110)
goal5 = Goal(450,0,450,110)
goal5_5 = Goal(525,0,525,110)
goal6 = Goal(600,0,550,130)
goal6_5 = Goal(550,130,700,60)
goal7 = Goal(550,130,700,130)
goal7_5 = Goal(550,130,650,200)
goal8 = Goal(550,130,570,240)
goal9 = Goal(410,130,430,260)
goal9_5 = Goal(430,260,300,350)
goal10 = Goal(430,260,260,260)
goal10_5 = Goal(430,260,280,180)
goal11 = Goal(430,260,400,400)
goal12 = Goal(550,260,570,400)
goal13 = Goal(750,400,650,200)
goal14 = Goal(750,400,800,160)
goal15 = Goal(750,400,950,240)
goal16 = Goal(750,400,980,440)
goal17 = Goal(750,400,900,600)
goal18 = Goal(750,460,750,600)
goal19 = Goal(670,460,670,600)
goal19_5 = Goal(590,460,590,600)
goal20 = Goal(510,460,510,600)
goal20_5 = Goal(430,460,430,600)
goal21 = Goal(350,460,350,600)
goal21_5 = Goal(280,460,278,600)
goal22 = Goal(210,460,190,600)
goal22_5 = Goal(80,600,175,440)
goal23 = Goal(150,420,0,570)
goal23_5 = Goal(0,450,130,400)
goal24 = Goal(0,380,130,380)
goals.append(goal1)
goals.append(goal2)
goals.append(goal2_5)
goals.append(goal3)
goals.append(goal3_5)
goals.append(goal4)
goals.append(goal4_5)
goals.append(goal5)
goals.append(goal5_5)
goals.append(goal6)
goals.append(goal6_5)
goals.append(goal7)
goals.append(goal7_5)
goals.append(goal8)
goals.append(goal9)
goals.append(goal10_5)
goals.append(goal10)
goals.append(goal9_5)
goals.append(goal11)
goals.append(goal12)
goals.append(goal13)
goals.append(goal14)
goals.append(goal15)
goals.append(goal16)
goals.append(goal17)
goals.append(goal18)
goals.append(goal19)
goals.append(goal19_5)
goals.append(goal20)
goals.append(goal20_5)
goals.append(goal21)
goals.append(goal21_5)
goals.append(goal22)
goals.append(goal22_5)
goals.append(goal23)
goals.append(goal23_5)
goals.append(goal24)
goals[len(goals)-1].isactiv = True
return(goals)
import pygame
import math
import numpy as np
from Walls import Wall
from Walls import getWalls
from Goals import Goal
from Goals import getGoals
GOALREWARD = 1
LIFE_REWARD = 0
PENALTY = -1
def distance(pt1, pt2):
return(((pt1.x - pt2.x)**2 + (pt1.y - pt2.y)**2)**0.5)
def rotate(origin,point,angle):
qx = origin.x + math.cos(angle) * (point.x - origin.x) - math.sin(angle) * (point.y - origin.y)
qy = origin.y + math.sin(angle) * (point.x - origin.x) + math.cos(angle) * (point.y - origin.y)
q = myPoint(qx, qy)
return q
def rotateRect(pt1, pt2, pt3, pt4, angle):
pt_center = myPoint((pt1.x + pt3.x)/2, (pt1.y + pt3.y)/2)
pt1 = rotate(pt_center,pt1,angle)
pt2 = rotate(pt_center,pt2,angle)
pt3 = rotate(pt_center,pt3,angle)
pt4 = rotate(pt_center,pt4,angle)
return pt1, pt2, pt3, pt4
class myPoint:
def __init__(self, x, y):
self.x = x
self.y = y
class myLine:
def __init__(self, pt1, pt2):
self.pt1 = myPoint(pt1.x, pt1.y)
self.pt2 = myPoint(pt2.x, pt2.y)
class Ray:
def __init__(self,x,y,angle):
self.x = x
self.y = y
self.angle = angle
def cast(self, wall):
x1 = wall.x1
y1 = wall.y1
x2 = wall.x2
y2 = wall.y2
vec = rotate(myPoint(0,0), myPoint(0,-1000), self.angle)
x3 = self.x
y3 = self.y
x4 = self.x + vec.x
y4 = self.y + vec.y
den = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4)
if(den == 0):
den = 0
else:
t = ((x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4)) / den
u = -((x1 - x2) * (y1 - y3) - (y1 - y2) * (x1 - x3)) / den
if t > 0 and t < 1 and u < 1 and u > 0:
pt = myPoint(math.floor(x1 + t * (x2 - x1)), math.floor(y1 + t * (y2 - y1)))
return(pt)
class Car:
def __init__(self, x, y):
self.pt = myPoint(x, y)
self.x = x
self.y = y
self.width = 14
self.height = 30
self.points = 0
self.original_image = pygame.image.load("car.png").convert()
self.image = self.original_image
self.image.set_colorkey((0,0,0))
self.rect = self.image.get_rect().move(self.x, self.y)
self.angle = math.radians(180)
self.soll_angle = self.angle
self.dvel = 1
self.vel = 0
self.velX = 0
self.velY = 0
self.maxvel = 15
self.angle = math.radians(180)
self.soll_angle = self.angle
self.pt1 = myPoint(self.pt.x - self.width / 2, self.pt.y - self.height / 2)
self.pt2 = myPoint(self.pt.x + self.width / 2, self.pt.y - self.height / 2)
self.pt3 = myPoint(self.pt.x + self.width / 2, self.pt.y + self.height / 2)
self.pt4 = myPoint(self.pt.x - self.width / 2, self.pt.y + self.height / 2)
self.p1 = self.pt1
self.p2 = self.pt2
self.p3 = self.pt3
self.p4 = self.pt4
self.distances = []
self.closestRays = [] # 改了这边
def action(self, choice):
if choice == 0:
pass
elif np.any(choice == 1):
self.accelerate(self.dvel)
elif np.any(choice == 8):
self.accelerate(self.dvel)
self.turn(1)
elif np.any(choice == 7):
self.accelerate(self.dvel)
self.turn(-1)
elif np.any(choice == 4):
self.accelerate(-self.dvel)
elif np.any(choice == 5):
self.accelerate(-self.dvel)
self.turn(1)
elif np.any(choice == 6):
self.accelerate(-self.dvel)
self.turn(-1)
elif np.any(choice == 3):
self.turn(1)
elif np.any(choice == 2):
self.turn(-1)
def accelerate(self,dvel):
dvel = dvel * 2
self.vel = self.vel + dvel
if self.vel > self.maxvel:
self.vel = self.maxvel
if self.vel < -self.maxvel:
self.vel = -self.maxvel
def turn(self, dir):
self.soll_angle = self.soll_angle + dir * math.radians(15)
def update(self):
self.angle = self.soll_angle
vec_temp = rotate(myPoint(0,0), myPoint(0,self.vel), self.angle)
self.velX, self.velY = vec_temp.x, vec_temp.y
self.x = self.x + self.velX
self.y = self.y + self.velY
self.rect.center = self.x, self.y
self.pt1 = myPoint(self.pt1.x + self.velX, self.pt1.y + self.velY)
self.pt2 = myPoint(self.pt2.x + self.velX, self.pt2.y + self.velY)
self.pt3 = myPoint(self.pt3.x + self.velX, self.pt3.y + self.velY)
self.pt4 = myPoint(self.pt4.x + self.velX, self.pt4.y + self.velY)
self.p1 ,self.p2 ,self.p3 ,self.p4 = rotateRect(self.pt1, self.pt2, self.pt3, self.pt4, self.soll_angle)
self.image = pygame.transform.rotate(self.original_image, 90 - self.soll_angle * 180 / math.pi)
x, y = self.rect.center
self.rect = self.image.get_rect()
self.rect.center = (x, y)
def cast(self, walls):
ray1 = Ray(self.x, self.y, self.soll_angle)
ray2 = Ray(self.x, self.y, self.soll_angle - math.radians(30))
ray3 = Ray(self.x, self.y, self.soll_angle + math.radians(30))
ray4 = Ray(self.x, self.y, self.soll_angle + math.radians(45))
ray5 = Ray(self.x, self.y, self.soll_angle - math.radians(45))
ray6 = Ray(self.x, self.y, self.soll_angle + math.radians(90))
ray7 = Ray(self.x, self.y, self.soll_angle - math.radians(90))
ray8 = Ray(self.x, self.y, self.soll_angle + math.radians(180))
ray9 = Ray(self.x, self.y, self.soll_angle + math.radians(10))
ray10 = Ray(self.x, self.y, self.soll_angle - math.radians(10))
ray11 = Ray(self.x, self.y, self.soll_angle + math.radians(135))
ray12 = Ray(self.x, self.y, self.soll_angle - math.radians(135))
ray13 = Ray(self.x, self.y, self.soll_angle + math.radians(20))
ray14 = Ray(self.x, self.y, self.soll_angle - math.radians(20))
ray15 = Ray(self.p1.x,self.p1.y, self.soll_angle + math.radians(90))
ray16 = Ray(self.p2.x,self.p2.y, self.soll_angle - math.radians(90))
ray17 = Ray(self.p1.x,self.p1.y, self.soll_angle + math.radians(0))
ray18 = Ray(self.p2.x,self.p2.y, self.soll_angle - math.radians(0))
self.rays = []
self.rays.append(ray1)
self.rays.append(ray2)
self.rays.append(ray3)
self.rays.append(ray4)
self.rays.append(ray5)
self.rays.append(ray6)
self.rays.append(ray7)
self.rays.append(ray8)
self.rays.append(ray9)
self.rays.append(ray10)
self.rays.append(ray11)
self.rays.append(ray12)
self.rays.append(ray13)
self.rays.append(ray14)
self.rays.append(ray15)
self.rays.append(ray16)
self.rays.append(ray17)
self.rays.append(ray18)
observations = []
self.closestRays = []
for ray in self.rays:
closest = None
record = math.inf
for wall in walls:
pt = ray.cast(wall)
if pt:
dist = distance(myPoint(self.x, self.y),pt)
if dist < record:
record = dist
closest = pt
if closest:
self.closestRays.append(closest)
observations.append(record)
else:
observations.append(1000)
for i in range(len(observations)):
observations[i] = ((1000 - observations[i]) / 1000)
observations.append(self.vel / self.maxvel)
return observations
def collision(self, wall):
line1 = myLine(self.p1, self.p2)
line2 = myLine(self.p2, self.p3)
line3 = myLine(self.p3, self.p4)
line4 = myLine(self.p4, self.p1)
x1 = wall.x1
y1 = wall.y1
x2 = wall.x2
y2 = wall.y2
lines = []
lines.append(line1)
lines.append(line2)
lines.append(line3)
lines.append(line4)
for li in lines:
x3 = li.pt1.x
y3 = li.pt1.y
x4 = li.pt2.x
y4 = li.pt2.y
den = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4)
if(den == 0):
den = 0
else:
t = ((x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4)) / den
u = -((x1 - x2) * (y1 - y3) - (y1 - y2) * (x1 - x3)) / den
if t > 0 and t < 1 and u < 1 and u > 0:
return(True)
return(False)
def score(self, goal):
line1 = myLine(self.p1, self.p3)
vec = rotate(myPoint(0,0), myPoint(0,-50), self.angle)
line1 = myLine(myPoint(self.x,self.y),myPoint(self.x + vec.x, self.y + vec.y))
x1 = goal.x1
y1 = goal.y1
x2 = goal.x2
y2 = goal.y2
x3 = line1.pt1.x
y3 = line1.pt1.y
x4 = line1.pt2.x
y4 = line1.pt2.y
den = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4)
if(den == 0):
den = 0
else:
t = ((x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4)) / den
u = -((x1 - x2) * (y1 - y3) - (y1 - y2) * (x1 - x3)) / den
if t > 0 and t < 1 and u < 1 and u > 0:
pt = math.floor(x1 + t * (x2 - x1)), math.floor(y1 + t * (y2 - y1))
d = distance(myPoint(self.x, self.y), myPoint(pt[0], pt[1]))
if d < 20:
self.points += GOALREWARD
return(True)
return(False)
def reset(self):
self.x = 50
self.y = 300
self.velX = 0
self.velY = 0
self.vel = 0
self.angle = math.radians(180)
self.soll_angle = self.angle
self.points = 0
self.pt1 = myPoint(self.pt.x - self.width / 2, self.pt.y - self.height / 2)
self.pt2 = myPoint(self.pt.x + self.width / 2, self.pt.y - self.height / 2)
self.pt3 = myPoint(self.pt.x + self.width / 2, self.pt.y + self.height / 2)
self.pt4 = myPoint(self.pt.x - self.width / 2, self.pt.y + self.height / 2)
self.p1 = self.pt1
self.p2 = self.pt2
self.p3 = self.pt3
self.p4 = self.pt4
def draw(self, win):
win.blit(self.image, self.rect)
class RacingEnv:
def __init__(self):
pygame.init()
self.font = pygame.font.Font(pygame.font.get_default_font(), 36)
self.fps = 120
self.width = 1000
self.height = 600
self.history = []
self.screen = pygame.display.set_mode((self.width, self.height))
pygame.display.set_caption("RACING DDPG")
self.screen.fill((0,0,0))
self.back_image = pygame.image.load("track.png").convert()
self.back_rect = self.back_image.get_rect().move(0, 0)
self.action_space = None # You can keep this as None
self.game_reward = 0
self.score = 0
self.reset()
self.game_reward = 0
self.score = 0
self.reset()
def reset(self):
self.screen.fill((0, 0, 0))
self.car = Car(50, 300)
self.walls = getWalls()
self.goals = getGoals()
self.game_reward = 0
self.render(1) # 改了这边
def step(self, action):
self.car.action(action)
self.car.update()
reward = LIFE_REWARD
index = 1
for goal in self.goals:
if index >= len(self.goals):
index = 0
if goal.isactiv:
if self.car.score(goal):
goal.isactiv = False
self.goals[index + 1].isactiv = True
reward += GOALREWARD
index += 1
done = False
for wall in self.walls:
if self.car.collision(wall):
reward += PENALTY
done = True
break
new_state = self.car.cast(self.walls)
return new_state, reward, done, {}
self.render(action) # 改了这边
return new_state, reward, done, {}
def render(self, action):
DRAW_WALLS = False
DRAW_GOALS = False
DRAW_RAYS = True
pygame.time.delay(10)
self.clock = pygame.time.Clock()
self.screen.fill((0, 0, 0))
self.screen.blit(self.back_image, self.back_rect)
if DRAW_WALLS:
for wall in self.walls:
wall.draw(self.screen)
if DRAW_GOALS:
for goal in self.goals:
goal.draw(self.screen)
if goal.isactiv:
goal.draw(self.screen)
self.car.draw(self.screen)
if DRAW_RAYS:
i = 0
for pt in self.car.closestRays:
pygame.draw.circle(self.screen, (0,0,255), (pt.x, pt.y), 5)
i += 1
if i < 15:
pygame.draw.line(self.screen, (255,255,255), (self.car.x, self.car.y), (pt.x, pt.y), 1)
elif i >=15 and i < 17:
pygame.draw.line(self.screen, (255,255,255), ((self.car.p1.x + self.car.p2.x)/2, (self.car.p1.y + self.car.p2.y)/2), (pt.x, pt.y), 1)
elif i == 17:
pygame.draw.line(self.screen, (255,255,255), (self.car.p1.x , self.car.p1.y ), (pt.x, pt.y), 1)
else:
pygame.draw.line(self.screen, (255,255,255), (self.car.p2.x, self.car.p2.y), (pt.x, pt.y), 1)
pygame.draw.rect(self.screen,(255,255,255),(800, 100, 40, 40),2)
pygame.draw.rect(self.screen,(255,255,255),(850, 100, 40, 40),2)
pygame.draw.rect(self.screen,(255,255,255),(900, 100, 40, 40),2)
pygame.draw.rect(self.screen,(255,255,255),(850, 50, 40, 40),2)
if action == 4:
pygame.draw.rect(self.screen,(0,255,0),(850, 50, 40, 40))
elif action == 6:
pygame.draw.rect(self.screen,(0,255,0),(850, 50, 40, 40))
pygame.draw.rect(self.screen,(0,255,0),(800, 100, 40, 40))
elif action == 5:
pygame.draw.rect(self.screen,(0,255,0),(850, 50, 40, 40))
pygame.draw.rect(self.screen,(0,255,0),(900, 100, 40, 40))
elif action == 1:
pygame.draw.rect(self.screen,(0,255,0),(850, 100, 40, 40))
elif action == 8:
pygame.draw.rect(self.screen,(0,255,0),(850, 100, 40, 40))
pygame.draw.rect(self.screen,(0,255,0),(800, 100, 40, 40))
elif action == 7:
pygame.draw.rect(self.screen,(0,255,0),(850, 100, 40, 40))
pygame.draw.rect(self.screen,(0,255,0),(900, 100, 40, 40))
elif action == 2:
pygame.draw.rect(self.screen,(0,255,0),(800, 100, 40, 40))
elif action == 3:
pygame.draw.rect(self.screen,(0,255,0),(900, 100, 40, 40))
# score
text_surface = self.font.render(f'Points {self.car.points}', True, pygame.Color('green'))
self.screen.blit(text_surface, dest=(0, 0))
# speed
text_surface = self.font.render(f'Speed {self.car.vel*-1}', True, pygame.Color('green'))
self.screen.blit(text_surface, dest=(800, 0))
self.clock.tick(self.fps)
pygame.display.update()
def close(self):
pygame.quit()
import GameEnvDDPG
import numpy as np
from DDPG1 import DDPGAgent1
import random, math
# Parameters
training_episodes = 10000
max_training_time = 200
target_score = 74
env = GameEnvDDPG.RacingEnv()
# Adjust state_size based on your environment's state space
state_size = 22
action_size = 4
learning_rate = 0.001
tau = 0.001
gamma = 0.99
replay_buffer_size = 10000
batch_size = 64
# Create a DDPG agent
agent = DDPGAgent1(state_size, action_size, learning_rate, gamma, tau, replay_buffer_size, batch_size)
# Training loop
episode_rewards = []
render_interval = 10 # Reduce rendering frequency for better performance
for episode in range(1, training_episodes + 1):
state = env.reset() # Initialize state at the beginning of each episode
total_reward = 0
for t in range(max_training_time):
action = agent.act(state)
# Debugging code to check the action
print("Action:", action)
next_state, reward, done, _ = env.step(action[0])
agent.replay_buffer.add(state, action, reward, next_state, done)
if len(agent.replay_buffer.buffer) > batch_size:
batch = agent.replay_buffer.sample(batch_size)
agent.learn(*batch)
total_reward += reward
state = next_state # Update the state for the next time step
if done:
break
episode_rewards.append(total_reward)
average_reward = np.mean(episode_rewards[-100:])
print(f"Episode {episode}/{training_episodes} - Total Reward: {total_reward:.2f} - Average Reward: {average_reward:.2f}")
if episode % 100 == 0:
agent.save_model(f"ddpg_episode_{episode}.h5")
if average_reward >= target_score:
print(f"Target average reward reached! (>= {target_score})")
break
# Rendering (conditionally)
if episode % render_interval == 0:
env.render()
# Save the final model
agent.save_model("final_ddpg_model.h5")
# Close the environment when done
env.close()
What I have tried:
next_state, reward, done, _ = env.step(action[0])
IndexError: invalid index to scalar variable.
This is the error.