I don't know how to fix it

Question

1.00/5 (1 vote)

See more:

Python

import GameEnvDDPG
import numpy as np
from DDPG1 import DDPGAgent1 
import random, math

# Parameters
training_episodes = 10000
max_training_time = 200
target_score = 74

env = GameEnvDDPG.RacingEnv()

# Adjust state_size based on your environment's state space
state_size = 22
action_size = 4
learning_rate = 0.001
tau = 0.001
gamma = 0.99
replay_buffer_size = 10000
batch_size = 64

# Create a DDPG agent
agent = DDPGAgent1(state_size, action_size, learning_rate, gamma, tau, replay_buffer_size, batch_size)

# Training loop
episode_rewards = []
render_interval = 10  # Reduce rendering frequency for better performance

for episode in range(1, training_episodes + 1):
    state = env.reset()  # Initialize state at the beginning of each episode
    total_reward = 0

    for t in range(max_training_time):
        action = agent.act(state)
        
        # Debugging code to check the action
        print("Action:", action)
        
        next_state, reward, done, _ = env.step(action[0])

        agent.replay_buffer.add(state, action, reward, next_state, done)

        if len(agent.replay_buffer.buffer) > batch_size:
            batch = agent.replay_buffer.sample(batch_size)
            agent.learn(*batch)

        total_reward += reward
        state = next_state  # Update the state for the next time step

        if done:
            break

    episode_rewards.append(total_reward)
    average_reward = np.mean(episode_rewards[-100:])

    print(f"Episode {episode}/{training_episodes} - Total Reward: {total_reward:.2f} - Average Reward: {average_reward:.2f}")

    if episode % 100 == 0:
        agent.save_model(f"ddpg_episode_{episode}.h5")

    if average_reward >= target_score:
        print(f"Target average reward reached! (>= {target_score})")
        break

    # Rendering (conditionally)
    if episode % render_interval == 0:
        env.render()

# Save the final model
agent.save_model("final_ddpg_model.h5")

# Close the environment when done
env.close()

import pygame

class Wall:
    def __init__(self, x1, y1, x2, y2):
        self.x1 = x1
        self.y1 = y1
        self.x2 = x2
        self.y2 = y2
    
    def draw(self, win):
        pygame.draw.line(win, (255,255,255), (self.x1, self.y1), (self.x2, self.y2), 5)

def getWalls():
    walls = []

    wall1 = Wall(12, 451, 15, 130)
    wall2 = Wall(15, 130, 61, 58)
    wall3 = Wall(61, 58, 149, 14)
    wall4 = Wall(149, 14, 382, 20)
    wall5 = Wall(382, 20, 549, 31)
    wall6 = Wall(549, 31, 636, 58)
    wall7 = Wall(636, 58, 678, 102)
    wall8 = Wall(678, 102, 669, 167)
    wall9 = Wall(669, 167, 600, 206)
    wall10 = Wall(600, 206, 507, 214)
    wall11 = Wall(507, 214, 422, 232)
    wall12 = Wall(422, 232, 375, 263)
    wall13 = Wall(375, 263, 379, 283)
    wall14 = Wall(379, 283, 454, 299)
    wall15 = Wall(454, 299, 613, 286)
    wall16 = Wall(613, 286, 684, 238)
    wall17 = Wall(684, 238, 752, 180)
    wall18 = Wall(752, 180, 862, 185)
    wall19 = Wall(862, 185, 958, 279)
    wall20 = Wall(958, 279, 953, 410)
    wall21 = Wall(953, 410, 925, 505)
    wall22 = Wall(925, 505, 804, 566)
    wall23 = Wall(804, 566, 150, 570)
    wall24 = Wall(150, 570, 46, 529)
    wall25 = Wall(46, 529, 12, 451)
    wall27 = Wall(104, 436, 96, 161)
    wall28 = Wall(96, 161, 122, 122)
    wall29 = Wall(122, 122, 199, 91)
    wall30 = Wall(199, 91, 376, 94)
    wall31 = Wall(376, 94, 469, 100)
    wall32 = Wall(469, 100, 539, 102)
    wall33 = Wall(539, 102, 585, 121)
    wall34 = Wall(585, 121, 585, 139)
    wall35 = Wall(585, 139, 454, 158)
    wall36 = Wall(454, 158, 352, 183)
    wall37 = Wall(352, 183, 293, 239)
    wall38 = Wall(293, 239, 294, 318)
    wall39 = Wall(294, 318, 361, 357)
    wall40 = Wall(361, 357, 490, 373)
    wall41 = Wall(490, 373, 671, 359)
    wall42 = Wall(671, 359, 752, 300) #
    wall43 = Wall(752, 300, 812, 310)#
    wall44 = Wall(812, 310, 854, 369)
    wall45 = Wall(854, 369, 854, 429)
    wall46 = Wall(854, 429, 754, 483)
    wall47 = Wall(754, 483, 192, 489)
    wall48 = Wall(192, 489, 104, 436)

    walls.append(wall1)
    walls.append(wall2)
    walls.append(wall3)
    walls.append(wall4)
    walls.append(wall5)
    walls.append(wall6)
    walls.append(wall7)
    walls.append(wall8)
    walls.append(wall9)
    walls.append(wall10)
    walls.append(wall11)
    walls.append(wall12)
    walls.append(wall13)
    walls.append(wall14)
    walls.append(wall15)
    walls.append(wall16)
    walls.append(wall17)
    walls.append(wall18)
    walls.append(wall19)
    walls.append(wall20)
    walls.append(wall21)
    walls.append(wall22)
    walls.append(wall23)
    walls.append(wall24)
    walls.append(wall25)

    walls.append(wall27)
    walls.append(wall28)
    walls.append(wall29)
    walls.append(wall30)
    walls.append(wall31)
    walls.append(wall32)
    walls.append(wall33)
    walls.append(wall34)
    walls.append(wall35)
    walls.append(wall36)
    walls.append(wall37)
    walls.append(wall38)
    walls.append(wall39)
    walls.append(wall40)
    walls.append(wall41)
    walls.append(wall42)
    walls.append(wall43)
    walls.append(wall44)
    walls.append(wall45)
    walls.append(wall46)
    walls.append(wall47)
    walls.append(wall48)

    return(walls)

import pygame

class Goal:
    def __init__(self, x1, y1, x2, y2):
        self.x1 = x1
        self.y1 = y1
        self.x2 = x2
        self.y2 = y2

        self.isactiv = False
    
    def draw(self, win):
        pygame.draw.line(win, (0,255,0), (self.x1, self.y1), (self.x2, self.y2), 2)
        if self.isactiv:
            pygame.draw.line(win, (255,0,0), (self.x1, self.y1), (self.x2, self.y2), 2)

# the file of shame
def getGoals():
    goals = []

    goal1 = Goal(0,200,120,200)
    goal2 = Goal(0,100,120,150)
    goal2_5 = Goal(0,0,150,130)
    goal3 = Goal(120,0,170,120)
    goal3_5 = Goal(200,0,200,120)
    goal4 = Goal(270,0,270,110)
    goal4_5 = Goal(350,0,350,110)
    goal5 = Goal(450,0,450,110)
    goal5_5 = Goal(525,0,525,110)
    goal6 = Goal(600,0,550,130)
    goal6_5 = Goal(550,130,700,60)
    goal7 = Goal(550,130,700,130)
    goal7_5 = Goal(550,130,650,200)
    goal8 = Goal(550,130,570,240)
    goal9 = Goal(410,130,430,260)
    goal9_5 = Goal(430,260,300,350)
    goal10 = Goal(430,260,260,260)
    goal10_5 = Goal(430,260,280,180)
    goal11 = Goal(430,260,400,400)
    goal12 = Goal(550,260,570,400)
    goal13 = Goal(750,400,650,200)
    goal14 = Goal(750,400,800,160)
    goal15 = Goal(750,400,950,240)
    goal16 = Goal(750,400,980,440)
    goal17 = Goal(750,400,900,600)
    goal18 = Goal(750,460,750,600)
    goal19 = Goal(670,460,670,600)
    goal19_5 = Goal(590,460,590,600)
    goal20 = Goal(510,460,510,600)
    goal20_5 = Goal(430,460,430,600)
    goal21 = Goal(350,460,350,600)
    goal21_5 = Goal(280,460,278,600)
    goal22 = Goal(210,460,190,600)
    goal22_5 = Goal(80,600,175,440)
    goal23 = Goal(150,420,0,570)
    goal23_5 = Goal(0,450,130,400)
    goal24 = Goal(0,380,130,380)

    goals.append(goal1)
    goals.append(goal2)
    goals.append(goal2_5)
    goals.append(goal3)
    goals.append(goal3_5)
    goals.append(goal4)
    goals.append(goal4_5)
    goals.append(goal5)
    goals.append(goal5_5)
    goals.append(goal6)
    goals.append(goal6_5)
    goals.append(goal7)
    goals.append(goal7_5)
    goals.append(goal8)
    goals.append(goal9)
    goals.append(goal10_5)
    goals.append(goal10)
    goals.append(goal9_5)
    goals.append(goal11)
    goals.append(goal12)
    goals.append(goal13)
    goals.append(goal14)
    goals.append(goal15)
    goals.append(goal16)
    goals.append(goal17)
    goals.append(goal18)
    goals.append(goal19)
    goals.append(goal19_5)
    goals.append(goal20)
    goals.append(goal20_5)
    goals.append(goal21)
    goals.append(goal21_5)
    goals.append(goal22)
    goals.append(goal22_5)
    goals.append(goal23)
    goals.append(goal23_5)
    goals.append(goal24)

    goals[len(goals)-1].isactiv = True

    return(goals)

import pygame
import math
import numpy as np
from Walls import Wall
from Walls import getWalls
from Goals import Goal
from Goals import getGoals

GOALREWARD = 1
LIFE_REWARD = 0
PENALTY = -1


def distance(pt1, pt2):
    return(((pt1.x - pt2.x)**2 + (pt1.y - pt2.y)**2)**0.5)

def rotate(origin,point,angle):
    qx = origin.x + math.cos(angle) * (point.x - origin.x) - math.sin(angle) * (point.y - origin.y)
    qy = origin.y + math.sin(angle) * (point.x - origin.x) + math.cos(angle) * (point.y - origin.y)
    q = myPoint(qx, qy)
    return q

def rotateRect(pt1, pt2, pt3, pt4, angle):

    pt_center = myPoint((pt1.x + pt3.x)/2, (pt1.y + pt3.y)/2)

    pt1 = rotate(pt_center,pt1,angle)
    pt2 = rotate(pt_center,pt2,angle)
    pt3 = rotate(pt_center,pt3,angle)
    pt4 = rotate(pt_center,pt4,angle)

    return pt1, pt2, pt3, pt4

class myPoint:
    def __init__(self, x, y):
        self.x = x
        self.y = y
        
class myLine:
    def __init__(self, pt1, pt2):
        self.pt1 = myPoint(pt1.x, pt1.y)
        self.pt2 = myPoint(pt2.x, pt2.y)

class Ray:
    def __init__(self,x,y,angle):
        self.x = x
        self.y = y
        self.angle = angle

    def cast(self, wall):
        x1 = wall.x1 
        y1 = wall.y1
        x2 = wall.x2
        y2 = wall.y2

        vec = rotate(myPoint(0,0), myPoint(0,-1000), self.angle)
        
        x3 = self.x
        y3 = self.y
        x4 = self.x + vec.x
        y4 = self.y + vec.y

        den = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4)
            
        if(den == 0):
            den = 0
        else:
            t = ((x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4)) / den
            u = -((x1 - x2) * (y1 - y3) - (y1 - y2) * (x1 - x3)) / den

            if t > 0 and t < 1 and u < 1 and u > 0:
                pt = myPoint(math.floor(x1 + t * (x2 - x1)), math.floor(y1 + t * (y2 - y1)))
                return(pt)

class Car:
    def __init__(self, x, y):
        self.pt = myPoint(x, y)
        self.x = x
        self.y = y
        self.width = 14
        self.height = 30

        self.points = 0

        self.original_image = pygame.image.load("car.png").convert()
        self.image = self.original_image  
        self.image.set_colorkey((0,0,0))
        self.rect = self.image.get_rect().move(self.x, self.y)

        self.angle = math.radians(180)
        self.soll_angle = self.angle

        self.dvel = 1
        self.vel = 0
        self.velX = 0
        self.velY = 0
        self.maxvel = 15 

        self.angle = math.radians(180)
        self.soll_angle = self.angle

        self.pt1 = myPoint(self.pt.x - self.width / 2, self.pt.y - self.height / 2)
        self.pt2 = myPoint(self.pt.x + self.width / 2, self.pt.y - self.height / 2)
        self.pt3 = myPoint(self.pt.x + self.width / 2, self.pt.y + self.height / 2)
        self.pt4 = myPoint(self.pt.x - self.width / 2, self.pt.y + self.height / 2)

        self.p1 = self.pt1
        self.p2 = self.pt2
        self.p3 = self.pt3
        self.p4 = self.pt4

        self.distances = []
        self.closestRays = [] # 改了这边
    

    def action(self, choice):
        if choice == 0:
            pass
        elif np.any(choice == 1):
            self.accelerate(self.dvel)
        elif np.any(choice == 8):
            self.accelerate(self.dvel)
            self.turn(1)
        elif np.any(choice == 7):
            self.accelerate(self.dvel)
            self.turn(-1)
        elif np.any(choice == 4):
            self.accelerate(-self.dvel)
        elif np.any(choice == 5):
            self.accelerate(-self.dvel)
            self.turn(1)
        elif np.any(choice == 6):
            self.accelerate(-self.dvel)
            self.turn(-1)
        elif np.any(choice == 3):
            self.turn(1)
        elif np.any(choice == 2):
            self.turn(-1)

    def accelerate(self,dvel):
        dvel = dvel * 2

        self.vel = self.vel + dvel

        if self.vel > self.maxvel:
            self.vel = self.maxvel
        
        if self.vel < -self.maxvel:
            self.vel = -self.maxvel
        
        
    def turn(self, dir):
        self.soll_angle = self.soll_angle + dir * math.radians(15)
    
    def update(self):

        self.angle = self.soll_angle

        vec_temp = rotate(myPoint(0,0), myPoint(0,self.vel), self.angle)
        self.velX, self.velY = vec_temp.x, vec_temp.y

        self.x = self.x + self.velX
        self.y = self.y + self.velY

        self.rect.center = self.x, self.y

        self.pt1 = myPoint(self.pt1.x + self.velX, self.pt1.y + self.velY)
        self.pt2 = myPoint(self.pt2.x + self.velX, self.pt2.y + self.velY)
        self.pt3 = myPoint(self.pt3.x + self.velX, self.pt3.y + self.velY)
        self.pt4 = myPoint(self.pt4.x + self.velX, self.pt4.y + self.velY)

        self.p1 ,self.p2 ,self.p3 ,self.p4  = rotateRect(self.pt1, self.pt2, self.pt3, self.pt4, self.soll_angle)

        self.image = pygame.transform.rotate(self.original_image, 90 - self.soll_angle * 180 / math.pi)
        x, y = self.rect.center  
        self.rect = self.image.get_rect()  
        self.rect.center = (x, y)

    def cast(self, walls):

        ray1 = Ray(self.x, self.y, self.soll_angle)
        ray2 = Ray(self.x, self.y, self.soll_angle - math.radians(30))
        ray3 = Ray(self.x, self.y, self.soll_angle + math.radians(30))
        ray4 = Ray(self.x, self.y, self.soll_angle + math.radians(45))
        ray5 = Ray(self.x, self.y, self.soll_angle - math.radians(45))
        ray6 = Ray(self.x, self.y, self.soll_angle + math.radians(90))
        ray7 = Ray(self.x, self.y, self.soll_angle - math.radians(90))
        ray8 = Ray(self.x, self.y, self.soll_angle + math.radians(180))

        ray9 = Ray(self.x, self.y, self.soll_angle + math.radians(10))
        ray10 = Ray(self.x, self.y, self.soll_angle - math.radians(10))
        ray11 = Ray(self.x, self.y, self.soll_angle + math.radians(135))
        ray12 = Ray(self.x, self.y, self.soll_angle - math.radians(135))
        ray13 = Ray(self.x, self.y, self.soll_angle + math.radians(20))
        ray14 = Ray(self.x, self.y, self.soll_angle - math.radians(20))

        ray15 = Ray(self.p1.x,self.p1.y, self.soll_angle + math.radians(90))
        ray16 = Ray(self.p2.x,self.p2.y, self.soll_angle - math.radians(90))

        ray17 = Ray(self.p1.x,self.p1.y, self.soll_angle + math.radians(0))
        ray18 = Ray(self.p2.x,self.p2.y, self.soll_angle - math.radians(0))

        self.rays = []
        self.rays.append(ray1)
        self.rays.append(ray2)
        self.rays.append(ray3)
        self.rays.append(ray4)
        self.rays.append(ray5)
        self.rays.append(ray6)
        self.rays.append(ray7)
        self.rays.append(ray8)

        self.rays.append(ray9)
        self.rays.append(ray10)
        self.rays.append(ray11)
        self.rays.append(ray12)
        self.rays.append(ray13)
        self.rays.append(ray14)

        self.rays.append(ray15)
        self.rays.append(ray16)

        self.rays.append(ray17)
        self.rays.append(ray18)


        observations = []
        self.closestRays = []

        for ray in self.rays:
            closest = None 
            record = math.inf
            for wall in walls:
                pt = ray.cast(wall)
                if pt:
                    dist = distance(myPoint(self.x, self.y),pt)
                    if dist < record:
                        record = dist
                        closest = pt

            if closest: 
                self.closestRays.append(closest)
                observations.append(record)
               
            else:
                observations.append(1000)

        for i in range(len(observations)):
            observations[i] = ((1000 - observations[i]) / 1000)

        observations.append(self.vel / self.maxvel)
        return observations

    def collision(self, wall):

        line1 = myLine(self.p1, self.p2)
        line2 = myLine(self.p2, self.p3)
        line3 = myLine(self.p3, self.p4)
        line4 = myLine(self.p4, self.p1)

        x1 = wall.x1 
        y1 = wall.y1
        x2 = wall.x2
        y2 = wall.y2

        lines = []
        lines.append(line1)
        lines.append(line2)
        lines.append(line3)
        lines.append(line4)

        for li in lines:
            
            x3 = li.pt1.x
            y3 = li.pt1.y
            x4 = li.pt2.x
            y4 = li.pt2.y

            den = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4)
            
            if(den == 0):
                den = 0
            else:
                t = ((x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4)) / den
                u = -((x1 - x2) * (y1 - y3) - (y1 - y2) * (x1 - x3)) / den

                if t > 0 and t < 1 and u < 1 and u > 0:
                    return(True)
        
        return(False)
    
    def score(self, goal):
        
        line1 = myLine(self.p1, self.p3)

        vec = rotate(myPoint(0,0), myPoint(0,-50), self.angle)
        line1 = myLine(myPoint(self.x,self.y),myPoint(self.x + vec.x, self.y + vec.y))

        x1 = goal.x1 
        y1 = goal.y1
        x2 = goal.x2
        y2 = goal.y2
            
        x3 = line1.pt1.x
        y3 = line1.pt1.y
        x4 = line1.pt2.x
        y4 = line1.pt2.y

        den = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4)
        
        if(den == 0):
            den = 0
        else:
            t = ((x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4)) / den
            u = -((x1 - x2) * (y1 - y3) - (y1 - y2) * (x1 - x3)) / den

            if t > 0 and t < 1 and u < 1 and u > 0:
                pt = math.floor(x1 + t * (x2 - x1)), math.floor(y1 + t * (y2 - y1))

                d = distance(myPoint(self.x, self.y), myPoint(pt[0], pt[1]))
                if d < 20:
                    self.points += GOALREWARD
                    return(True)

        return(False)

    def reset(self):

        self.x = 50
        self.y = 300
        self.velX = 0
        self.velY = 0
        self.vel = 0
        self.angle = math.radians(180)
        self.soll_angle = self.angle
        self.points = 0

        self.pt1 = myPoint(self.pt.x - self.width / 2, self.pt.y - self.height / 2)
        self.pt2 = myPoint(self.pt.x + self.width / 2, self.pt.y - self.height / 2)
        self.pt3 = myPoint(self.pt.x + self.width / 2, self.pt.y + self.height / 2)
        self.pt4 = myPoint(self.pt.x - self.width / 2, self.pt.y + self.height / 2)

        self.p1 = self.pt1
        self.p2 = self.pt2
        self.p3 = self.pt3
        self.p4 = self.pt4

    def draw(self, win):
        win.blit(self.image, self.rect)
  

class RacingEnv:

    def __init__(self):
        pygame.init()
        self.font = pygame.font.Font(pygame.font.get_default_font(), 36)

        self.fps = 120
        self.width = 1000
        self.height = 600
        self.history = []

        self.screen = pygame.display.set_mode((self.width, self.height))
        pygame.display.set_caption("RACING DDPG")
        self.screen.fill((0,0,0))
        self.back_image = pygame.image.load("track.png").convert()
        self.back_rect = self.back_image.get_rect().move(0, 0)
        self.action_space = None  # You can keep this as None
        self.game_reward = 0
        self.score = 0

        self.reset()

        self.game_reward = 0
        self.score = 0
 
        self.reset()

    def reset(self):
        self.screen.fill((0, 0, 0))

        self.car = Car(50, 300)
        self.walls = getWalls()
        self.goals = getGoals()
        self.game_reward = 0
        self.render(1) # 改了这边

    def step(self, action):

        self.car.action(action)
        self.car.update()
        reward = LIFE_REWARD  
        index = 1
        for goal in self.goals:
            if index >= len(self.goals):
                index = 0
            if goal.isactiv:
                if self.car.score(goal):
                    goal.isactiv = False
                    self.goals[index + 1].isactiv = True
                    reward += GOALREWARD
            index += 1
        done = False
        for wall in self.walls:
            if self.car.collision(wall):
                reward += PENALTY
                done = True
                break
        new_state = self.car.cast(self.walls)
        return new_state, reward, done, {}

        self.render(action) # 改了这边
        return new_state, reward, done, {}

    def render(self, action):

        DRAW_WALLS = False
        DRAW_GOALS = False
        DRAW_RAYS = True

        pygame.time.delay(10)

        self.clock = pygame.time.Clock()
        self.screen.fill((0, 0, 0))

        self.screen.blit(self.back_image, self.back_rect)

        if DRAW_WALLS:
            for wall in self.walls:
                wall.draw(self.screen)
        
        if DRAW_GOALS:
            for goal in self.goals:
                goal.draw(self.screen)
                if goal.isactiv:
                    goal.draw(self.screen)
        
        self.car.draw(self.screen)

        if DRAW_RAYS:
            i = 0
            for pt in self.car.closestRays:
                pygame.draw.circle(self.screen, (0,0,255), (pt.x, pt.y), 5)
                i += 1
                if i < 15:
                    pygame.draw.line(self.screen, (255,255,255), (self.car.x, self.car.y), (pt.x, pt.y), 1)
                elif i >=15 and i < 17:
                    pygame.draw.line(self.screen, (255,255,255), ((self.car.p1.x + self.car.p2.x)/2, (self.car.p1.y + self.car.p2.y)/2), (pt.x, pt.y), 1)
                elif i == 17:
                    pygame.draw.line(self.screen, (255,255,255), (self.car.p1.x , self.car.p1.y ), (pt.x, pt.y), 1)
                else:
                    pygame.draw.line(self.screen, (255,255,255), (self.car.p2.x, self.car.p2.y), (pt.x, pt.y), 1)

        pygame.draw.rect(self.screen,(255,255,255),(800, 100, 40, 40),2)
        pygame.draw.rect(self.screen,(255,255,255),(850, 100, 40, 40),2)
        pygame.draw.rect(self.screen,(255,255,255),(900, 100, 40, 40),2)
        pygame.draw.rect(self.screen,(255,255,255),(850, 50, 40, 40),2)

        if action == 4:
            pygame.draw.rect(self.screen,(0,255,0),(850, 50, 40, 40)) 
        elif action == 6:
            pygame.draw.rect(self.screen,(0,255,0),(850, 50, 40, 40))
            pygame.draw.rect(self.screen,(0,255,0),(800, 100, 40, 40))
        elif action == 5:
            pygame.draw.rect(self.screen,(0,255,0),(850, 50, 40, 40))
            pygame.draw.rect(self.screen,(0,255,0),(900, 100, 40, 40))
        elif action == 1:
            pygame.draw.rect(self.screen,(0,255,0),(850, 100, 40, 40)) 
        elif action == 8:
            pygame.draw.rect(self.screen,(0,255,0),(850, 100, 40, 40))
            pygame.draw.rect(self.screen,(0,255,0),(800, 100, 40, 40))
        elif action == 7:
            pygame.draw.rect(self.screen,(0,255,0),(850, 100, 40, 40))
            pygame.draw.rect(self.screen,(0,255,0),(900, 100, 40, 40))
        elif action == 2:
            pygame.draw.rect(self.screen,(0,255,0),(800, 100, 40, 40))
        elif action == 3:
            pygame.draw.rect(self.screen,(0,255,0),(900, 100, 40, 40))

        # score
        text_surface = self.font.render(f'Points {self.car.points}', True, pygame.Color('green'))
        self.screen.blit(text_surface, dest=(0, 0))
        # speed
        text_surface = self.font.render(f'Speed {self.car.vel*-1}', True, pygame.Color('green'))
        self.screen.blit(text_surface, dest=(800, 0))

        self.clock.tick(self.fps)
        pygame.display.update()

    def close(self):
        pygame.quit()

import GameEnvDDPG
import numpy as np
from DDPG1 import DDPGAgent1 
import random, math

# Parameters
training_episodes = 10000
max_training_time = 200
target_score = 74

env = GameEnvDDPG.RacingEnv()

# Adjust state_size based on your environment's state space
state_size = 22
action_size = 4
learning_rate = 0.001
tau = 0.001
gamma = 0.99
replay_buffer_size = 10000
batch_size = 64

# Create a DDPG agent
agent = DDPGAgent1(state_size, action_size, learning_rate, gamma, tau, replay_buffer_size, batch_size)

# Training loop
episode_rewards = []
render_interval = 10  # Reduce rendering frequency for better performance

for episode in range(1, training_episodes + 1):
    state = env.reset()  # Initialize state at the beginning of each episode
    total_reward = 0

    for t in range(max_training_time):
        action = agent.act(state)
        
        # Debugging code to check the action
        print("Action:", action)
        
        next_state, reward, done, _ = env.step(action[0])

        agent.replay_buffer.add(state, action, reward, next_state, done)

        if len(agent.replay_buffer.buffer) > batch_size:
            batch = agent.replay_buffer.sample(batch_size)
            agent.learn(*batch)

        total_reward += reward
        state = next_state  # Update the state for the next time step

        if done:
            break

    episode_rewards.append(total_reward)
    average_reward = np.mean(episode_rewards[-100:])

    print(f"Episode {episode}/{training_episodes} - Total Reward: {total_reward:.2f} - Average Reward: {average_reward:.2f}")

    if episode % 100 == 0:
        agent.save_model(f"ddpg_episode_{episode}.h5")

    if average_reward >= target_score:
        print(f"Target average reward reached! (>= {target_score})")
        break

    # Rendering (conditionally)
    if episode % render_interval == 0:
        env.render()

# Save the final model
agent.save_model("final_ddpg_model.h5")

# Close the environment when done
env.close()

What I have tried:

   next_state, reward, done, _ = env.step(action[0])
IndexError: invalid index to scalar variable.

This is the error.

Posted 27-Aug-23 7:09am

Kelvin Beh

Updated 28-Aug-23 9:16am

Deeksha Shenoy

v2

Add a Solution

1 solution

Add a Solution

Add your solution here

Treat my content as plain text, not as HTML

Preview 0

…

Existing Members

Sign in to your account

...or Join us

Download, Vote, Comment, Publish.

Your Email
Password
Forgot your password?

Your Email
This email is in use. Do you need your password?
Optional Password

I have read and agree to the Terms of Service and Privacy Policy
Please subscribe me to the CodeProject newsletters

When answering a question please:

Read the question carefully.
Understand that English isn't everyone's first language so be lenient of bad spelling and grammar.
If a question is poorly phrased then either ask for clarification, ignore it, or edit the question and fix the problem. Insults are not welcome.
Don't tell someone to read the manual. Chances are they have and don't get it. Provide an answer or move on to the next question.

Let's work to help developers, not make them feel stupid.

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)

k5054 · Answer 1 · 2023-08-27T11:36:00

It's fairly straight forward : the variable action is a scalar e.g an integer, floating point, character, etc, and not an array or other indexable object. The assignment to action seems to be in the first block of code, above

Python

action = agent.act(state)

. As a quick guess, the offending line should be

Python

next_state, reward, done, _ = env.step(action)

instead. But you'll have to check your docs on what agent.act() returns, and whether that is the correct thing to pass to a env.step() call.