Click here to Skip to main content
15,887,957 members
Please Sign up or sign in to vote.
1.00/5 (1 vote)
See more:
Python
import GameEnvDDPG
import numpy as np
from DDPG1 import DDPGAgent1 
import random, math

# Parameters
training_episodes = 10000
max_training_time = 200
target_score = 74

env = GameEnvDDPG.RacingEnv()

# Adjust state_size based on your environment's state space
state_size = 22
action_size = 4
learning_rate = 0.001
tau = 0.001
gamma = 0.99
replay_buffer_size = 10000
batch_size = 64

# Create a DDPG agent
agent = DDPGAgent1(state_size, action_size, learning_rate, gamma, tau, replay_buffer_size, batch_size)

# Training loop
episode_rewards = []
render_interval = 10  # Reduce rendering frequency for better performance

for episode in range(1, training_episodes + 1):
    state = env.reset()  # Initialize state at the beginning of each episode
    total_reward = 0

    for t in range(max_training_time):
        action = agent.act(state)
        
        # Debugging code to check the action
        print("Action:", action)
        
        next_state, reward, done, _ = env.step(action[0])

        agent.replay_buffer.add(state, action, reward, next_state, done)

        if len(agent.replay_buffer.buffer) > batch_size:
            batch = agent.replay_buffer.sample(batch_size)
            agent.learn(*batch)

        total_reward += reward
        state = next_state  # Update the state for the next time step

        if done:
            break

    episode_rewards.append(total_reward)
    average_reward = np.mean(episode_rewards[-100:])

    print(f"Episode {episode}/{training_episodes} - Total Reward: {total_reward:.2f} - Average Reward: {average_reward:.2f}")

    if episode % 100 == 0:
        agent.save_model(f"ddpg_episode_{episode}.h5")

    if average_reward >= target_score:
        print(f"Target average reward reached! (>= {target_score})")
        break

    # Rendering (conditionally)
    if episode % render_interval == 0:
        env.render()

# Save the final model
agent.save_model("final_ddpg_model.h5")

# Close the environment when done
env.close()


import pygame

class Wall:
    def __init__(self, x1, y1, x2, y2):
        self.x1 = x1
        self.y1 = y1
        self.x2 = x2
        self.y2 = y2
    
    def draw(self, win):
        pygame.draw.line(win, (255,255,255), (self.x1, self.y1), (self.x2, self.y2), 5)

def getWalls():
    walls = []

    wall1 = Wall(12, 451, 15, 130)
    wall2 = Wall(15, 130, 61, 58)
    wall3 = Wall(61, 58, 149, 14)
    wall4 = Wall(149, 14, 382, 20)
    wall5 = Wall(382, 20, 549, 31)
    wall6 = Wall(549, 31, 636, 58)
    wall7 = Wall(636, 58, 678, 102)
    wall8 = Wall(678, 102, 669, 167)
    wall9 = Wall(669, 167, 600, 206)
    wall10 = Wall(600, 206, 507, 214)
    wall11 = Wall(507, 214, 422, 232)
    wall12 = Wall(422, 232, 375, 263)
    wall13 = Wall(375, 263, 379, 283)
    wall14 = Wall(379, 283, 454, 299)
    wall15 = Wall(454, 299, 613, 286)
    wall16 = Wall(613, 286, 684, 238)
    wall17 = Wall(684, 238, 752, 180)
    wall18 = Wall(752, 180, 862, 185)
    wall19 = Wall(862, 185, 958, 279)
    wall20 = Wall(958, 279, 953, 410)
    wall21 = Wall(953, 410, 925, 505)
    wall22 = Wall(925, 505, 804, 566)
    wall23 = Wall(804, 566, 150, 570)
    wall24 = Wall(150, 570, 46, 529)
    wall25 = Wall(46, 529, 12, 451)
    wall27 = Wall(104, 436, 96, 161)
    wall28 = Wall(96, 161, 122, 122)
    wall29 = Wall(122, 122, 199, 91)
    wall30 = Wall(199, 91, 376, 94)
    wall31 = Wall(376, 94, 469, 100)
    wall32 = Wall(469, 100, 539, 102)
    wall33 = Wall(539, 102, 585, 121)
    wall34 = Wall(585, 121, 585, 139)
    wall35 = Wall(585, 139, 454, 158)
    wall36 = Wall(454, 158, 352, 183)
    wall37 = Wall(352, 183, 293, 239)
    wall38 = Wall(293, 239, 294, 318)
    wall39 = Wall(294, 318, 361, 357)
    wall40 = Wall(361, 357, 490, 373)
    wall41 = Wall(490, 373, 671, 359)
    wall42 = Wall(671, 359, 752, 300) #
    wall43 = Wall(752, 300, 812, 310)#
    wall44 = Wall(812, 310, 854, 369)
    wall45 = Wall(854, 369, 854, 429)
    wall46 = Wall(854, 429, 754, 483)
    wall47 = Wall(754, 483, 192, 489)
    wall48 = Wall(192, 489, 104, 436)

    walls.append(wall1)
    walls.append(wall2)
    walls.append(wall3)
    walls.append(wall4)
    walls.append(wall5)
    walls.append(wall6)
    walls.append(wall7)
    walls.append(wall8)
    walls.append(wall9)
    walls.append(wall10)
    walls.append(wall11)
    walls.append(wall12)
    walls.append(wall13)
    walls.append(wall14)
    walls.append(wall15)
    walls.append(wall16)
    walls.append(wall17)
    walls.append(wall18)
    walls.append(wall19)
    walls.append(wall20)
    walls.append(wall21)
    walls.append(wall22)
    walls.append(wall23)
    walls.append(wall24)
    walls.append(wall25)

    walls.append(wall27)
    walls.append(wall28)
    walls.append(wall29)
    walls.append(wall30)
    walls.append(wall31)
    walls.append(wall32)
    walls.append(wall33)
    walls.append(wall34)
    walls.append(wall35)
    walls.append(wall36)
    walls.append(wall37)
    walls.append(wall38)
    walls.append(wall39)
    walls.append(wall40)
    walls.append(wall41)
    walls.append(wall42)
    walls.append(wall43)
    walls.append(wall44)
    walls.append(wall45)
    walls.append(wall46)
    walls.append(wall47)
    walls.append(wall48)

    return(walls)


import pygame

class Goal:
    def __init__(self, x1, y1, x2, y2):
        self.x1 = x1
        self.y1 = y1
        self.x2 = x2
        self.y2 = y2

        self.isactiv = False
    
    def draw(self, win):
        pygame.draw.line(win, (0,255,0), (self.x1, self.y1), (self.x2, self.y2), 2)
        if self.isactiv:
            pygame.draw.line(win, (255,0,0), (self.x1, self.y1), (self.x2, self.y2), 2)

# the file of shame
def getGoals():
    goals = []

    goal1 = Goal(0,200,120,200)
    goal2 = Goal(0,100,120,150)
    goal2_5 = Goal(0,0,150,130)
    goal3 = Goal(120,0,170,120)
    goal3_5 = Goal(200,0,200,120)
    goal4 = Goal(270,0,270,110)
    goal4_5 = Goal(350,0,350,110)
    goal5 = Goal(450,0,450,110)
    goal5_5 = Goal(525,0,525,110)
    goal6 = Goal(600,0,550,130)
    goal6_5 = Goal(550,130,700,60)
    goal7 = Goal(550,130,700,130)
    goal7_5 = Goal(550,130,650,200)
    goal8 = Goal(550,130,570,240)
    goal9 = Goal(410,130,430,260)
    goal9_5 = Goal(430,260,300,350)
    goal10 = Goal(430,260,260,260)
    goal10_5 = Goal(430,260,280,180)
    goal11 = Goal(430,260,400,400)
    goal12 = Goal(550,260,570,400)
    goal13 = Goal(750,400,650,200)
    goal14 = Goal(750,400,800,160)
    goal15 = Goal(750,400,950,240)
    goal16 = Goal(750,400,980,440)
    goal17 = Goal(750,400,900,600)
    goal18 = Goal(750,460,750,600)
    goal19 = Goal(670,460,670,600)
    goal19_5 = Goal(590,460,590,600)
    goal20 = Goal(510,460,510,600)
    goal20_5 = Goal(430,460,430,600)
    goal21 = Goal(350,460,350,600)
    goal21_5 = Goal(280,460,278,600)
    goal22 = Goal(210,460,190,600)
    goal22_5 = Goal(80,600,175,440)
    goal23 = Goal(150,420,0,570)
    goal23_5 = Goal(0,450,130,400)
    goal24 = Goal(0,380,130,380)

    goals.append(goal1)
    goals.append(goal2)
    goals.append(goal2_5)
    goals.append(goal3)
    goals.append(goal3_5)
    goals.append(goal4)
    goals.append(goal4_5)
    goals.append(goal5)
    goals.append(goal5_5)
    goals.append(goal6)
    goals.append(goal6_5)
    goals.append(goal7)
    goals.append(goal7_5)
    goals.append(goal8)
    goals.append(goal9)
    goals.append(goal10_5)
    goals.append(goal10)
    goals.append(goal9_5)
    goals.append(goal11)
    goals.append(goal12)
    goals.append(goal13)
    goals.append(goal14)
    goals.append(goal15)
    goals.append(goal16)
    goals.append(goal17)
    goals.append(goal18)
    goals.append(goal19)
    goals.append(goal19_5)
    goals.append(goal20)
    goals.append(goal20_5)
    goals.append(goal21)
    goals.append(goal21_5)
    goals.append(goal22)
    goals.append(goal22_5)
    goals.append(goal23)
    goals.append(goal23_5)
    goals.append(goal24)

    goals[len(goals)-1].isactiv = True

    return(goals)


import pygame
import math
import numpy as np
from Walls import Wall
from Walls import getWalls
from Goals import Goal
from Goals import getGoals

GOALREWARD = 1
LIFE_REWARD = 0
PENALTY = -1


def distance(pt1, pt2):
    return(((pt1.x - pt2.x)**2 + (pt1.y - pt2.y)**2)**0.5)

def rotate(origin,point,angle):
    qx = origin.x + math.cos(angle) * (point.x - origin.x) - math.sin(angle) * (point.y - origin.y)
    qy = origin.y + math.sin(angle) * (point.x - origin.x) + math.cos(angle) * (point.y - origin.y)
    q = myPoint(qx, qy)
    return q

def rotateRect(pt1, pt2, pt3, pt4, angle):

    pt_center = myPoint((pt1.x + pt3.x)/2, (pt1.y + pt3.y)/2)

    pt1 = rotate(pt_center,pt1,angle)
    pt2 = rotate(pt_center,pt2,angle)
    pt3 = rotate(pt_center,pt3,angle)
    pt4 = rotate(pt_center,pt4,angle)

    return pt1, pt2, pt3, pt4

class myPoint:
    def __init__(self, x, y):
        self.x = x
        self.y = y
        
class myLine:
    def __init__(self, pt1, pt2):
        self.pt1 = myPoint(pt1.x, pt1.y)
        self.pt2 = myPoint(pt2.x, pt2.y)

class Ray:
    def __init__(self,x,y,angle):
        self.x = x
        self.y = y
        self.angle = angle

    def cast(self, wall):
        x1 = wall.x1 
        y1 = wall.y1
        x2 = wall.x2
        y2 = wall.y2

        vec = rotate(myPoint(0,0), myPoint(0,-1000), self.angle)
        
        x3 = self.x
        y3 = self.y
        x4 = self.x + vec.x
        y4 = self.y + vec.y

        den = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4)
            
        if(den == 0):
            den = 0
        else:
            t = ((x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4)) / den
            u = -((x1 - x2) * (y1 - y3) - (y1 - y2) * (x1 - x3)) / den

            if t > 0 and t < 1 and u < 1 and u > 0:
                pt = myPoint(math.floor(x1 + t * (x2 - x1)), math.floor(y1 + t * (y2 - y1)))
                return(pt)

class Car:
    def __init__(self, x, y):
        self.pt = myPoint(x, y)
        self.x = x
        self.y = y
        self.width = 14
        self.height = 30

        self.points = 0

        self.original_image = pygame.image.load("car.png").convert()
        self.image = self.original_image  
        self.image.set_colorkey((0,0,0))
        self.rect = self.image.get_rect().move(self.x, self.y)

        self.angle = math.radians(180)
        self.soll_angle = self.angle

        self.dvel = 1
        self.vel = 0
        self.velX = 0
        self.velY = 0
        self.maxvel = 15 

        self.angle = math.radians(180)
        self.soll_angle = self.angle

        self.pt1 = myPoint(self.pt.x - self.width / 2, self.pt.y - self.height / 2)
        self.pt2 = myPoint(self.pt.x + self.width / 2, self.pt.y - self.height / 2)
        self.pt3 = myPoint(self.pt.x + self.width / 2, self.pt.y + self.height / 2)
        self.pt4 = myPoint(self.pt.x - self.width / 2, self.pt.y + self.height / 2)

        self.p1 = self.pt1
        self.p2 = self.pt2
        self.p3 = self.pt3
        self.p4 = self.pt4

        self.distances = []
        self.closestRays = [] # 改了这边
    

    def action(self, choice):
        if choice == 0:
            pass
        elif np.any(choice == 1):
            self.accelerate(self.dvel)
        elif np.any(choice == 8):
            self.accelerate(self.dvel)
            self.turn(1)
        elif np.any(choice == 7):
            self.accelerate(self.dvel)
            self.turn(-1)
        elif np.any(choice == 4):
            self.accelerate(-self.dvel)
        elif np.any(choice == 5):
            self.accelerate(-self.dvel)
            self.turn(1)
        elif np.any(choice == 6):
            self.accelerate(-self.dvel)
            self.turn(-1)
        elif np.any(choice == 3):
            self.turn(1)
        elif np.any(choice == 2):
            self.turn(-1)

    def accelerate(self,dvel):
        dvel = dvel * 2

        self.vel = self.vel + dvel

        if self.vel > self.maxvel:
            self.vel = self.maxvel
        
        if self.vel < -self.maxvel:
            self.vel = -self.maxvel
        
        
    def turn(self, dir):
        self.soll_angle = self.soll_angle + dir * math.radians(15)
    
    def update(self):

        self.angle = self.soll_angle

        vec_temp = rotate(myPoint(0,0), myPoint(0,self.vel), self.angle)
        self.velX, self.velY = vec_temp.x, vec_temp.y

        self.x = self.x + self.velX
        self.y = self.y + self.velY

        self.rect.center = self.x, self.y

        self.pt1 = myPoint(self.pt1.x + self.velX, self.pt1.y + self.velY)
        self.pt2 = myPoint(self.pt2.x + self.velX, self.pt2.y + self.velY)
        self.pt3 = myPoint(self.pt3.x + self.velX, self.pt3.y + self.velY)
        self.pt4 = myPoint(self.pt4.x + self.velX, self.pt4.y + self.velY)

        self.p1 ,self.p2 ,self.p3 ,self.p4  = rotateRect(self.pt1, self.pt2, self.pt3, self.pt4, self.soll_angle)

        self.image = pygame.transform.rotate(self.original_image, 90 - self.soll_angle * 180 / math.pi)
        x, y = self.rect.center  
        self.rect = self.image.get_rect()  
        self.rect.center = (x, y)

    def cast(self, walls):

        ray1 = Ray(self.x, self.y, self.soll_angle)
        ray2 = Ray(self.x, self.y, self.soll_angle - math.radians(30))
        ray3 = Ray(self.x, self.y, self.soll_angle + math.radians(30))
        ray4 = Ray(self.x, self.y, self.soll_angle + math.radians(45))
        ray5 = Ray(self.x, self.y, self.soll_angle - math.radians(45))
        ray6 = Ray(self.x, self.y, self.soll_angle + math.radians(90))
        ray7 = Ray(self.x, self.y, self.soll_angle - math.radians(90))
        ray8 = Ray(self.x, self.y, self.soll_angle + math.radians(180))

        ray9 = Ray(self.x, self.y, self.soll_angle + math.radians(10))
        ray10 = Ray(self.x, self.y, self.soll_angle - math.radians(10))
        ray11 = Ray(self.x, self.y, self.soll_angle + math.radians(135))
        ray12 = Ray(self.x, self.y, self.soll_angle - math.radians(135))
        ray13 = Ray(self.x, self.y, self.soll_angle + math.radians(20))
        ray14 = Ray(self.x, self.y, self.soll_angle - math.radians(20))

        ray15 = Ray(self.p1.x,self.p1.y, self.soll_angle + math.radians(90))
        ray16 = Ray(self.p2.x,self.p2.y, self.soll_angle - math.radians(90))

        ray17 = Ray(self.p1.x,self.p1.y, self.soll_angle + math.radians(0))
        ray18 = Ray(self.p2.x,self.p2.y, self.soll_angle - math.radians(0))

        self.rays = []
        self.rays.append(ray1)
        self.rays.append(ray2)
        self.rays.append(ray3)
        self.rays.append(ray4)
        self.rays.append(ray5)
        self.rays.append(ray6)
        self.rays.append(ray7)
        self.rays.append(ray8)

        self.rays.append(ray9)
        self.rays.append(ray10)
        self.rays.append(ray11)
        self.rays.append(ray12)
        self.rays.append(ray13)
        self.rays.append(ray14)

        self.rays.append(ray15)
        self.rays.append(ray16)

        self.rays.append(ray17)
        self.rays.append(ray18)


        observations = []
        self.closestRays = []

        for ray in self.rays:
            closest = None 
            record = math.inf
            for wall in walls:
                pt = ray.cast(wall)
                if pt:
                    dist = distance(myPoint(self.x, self.y),pt)
                    if dist < record:
                        record = dist
                        closest = pt

            if closest: 
                self.closestRays.append(closest)
                observations.append(record)
               
            else:
                observations.append(1000)

        for i in range(len(observations)):
            observations[i] = ((1000 - observations[i]) / 1000)

        observations.append(self.vel / self.maxvel)
        return observations

    def collision(self, wall):

        line1 = myLine(self.p1, self.p2)
        line2 = myLine(self.p2, self.p3)
        line3 = myLine(self.p3, self.p4)
        line4 = myLine(self.p4, self.p1)

        x1 = wall.x1 
        y1 = wall.y1
        x2 = wall.x2
        y2 = wall.y2

        lines = []
        lines.append(line1)
        lines.append(line2)
        lines.append(line3)
        lines.append(line4)

        for li in lines:
            
            x3 = li.pt1.x
            y3 = li.pt1.y
            x4 = li.pt2.x
            y4 = li.pt2.y

            den = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4)
            
            if(den == 0):
                den = 0
            else:
                t = ((x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4)) / den
                u = -((x1 - x2) * (y1 - y3) - (y1 - y2) * (x1 - x3)) / den

                if t > 0 and t < 1 and u < 1 and u > 0:
                    return(True)
        
        return(False)
    
    def score(self, goal):
        
        line1 = myLine(self.p1, self.p3)

        vec = rotate(myPoint(0,0), myPoint(0,-50), self.angle)
        line1 = myLine(myPoint(self.x,self.y),myPoint(self.x + vec.x, self.y + vec.y))

        x1 = goal.x1 
        y1 = goal.y1
        x2 = goal.x2
        y2 = goal.y2
            
        x3 = line1.pt1.x
        y3 = line1.pt1.y
        x4 = line1.pt2.x
        y4 = line1.pt2.y

        den = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4)
        
        if(den == 0):
            den = 0
        else:
            t = ((x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4)) / den
            u = -((x1 - x2) * (y1 - y3) - (y1 - y2) * (x1 - x3)) / den

            if t > 0 and t < 1 and u < 1 and u > 0:
                pt = math.floor(x1 + t * (x2 - x1)), math.floor(y1 + t * (y2 - y1))

                d = distance(myPoint(self.x, self.y), myPoint(pt[0], pt[1]))
                if d < 20:
                    self.points += GOALREWARD
                    return(True)

        return(False)

    def reset(self):

        self.x = 50
        self.y = 300
        self.velX = 0
        self.velY = 0
        self.vel = 0
        self.angle = math.radians(180)
        self.soll_angle = self.angle
        self.points = 0

        self.pt1 = myPoint(self.pt.x - self.width / 2, self.pt.y - self.height / 2)
        self.pt2 = myPoint(self.pt.x + self.width / 2, self.pt.y - self.height / 2)
        self.pt3 = myPoint(self.pt.x + self.width / 2, self.pt.y + self.height / 2)
        self.pt4 = myPoint(self.pt.x - self.width / 2, self.pt.y + self.height / 2)

        self.p1 = self.pt1
        self.p2 = self.pt2
        self.p3 = self.pt3
        self.p4 = self.pt4

    def draw(self, win):
        win.blit(self.image, self.rect)
  

class RacingEnv:

    def __init__(self):
        pygame.init()
        self.font = pygame.font.Font(pygame.font.get_default_font(), 36)

        self.fps = 120
        self.width = 1000
        self.height = 600
        self.history = []

        self.screen = pygame.display.set_mode((self.width, self.height))
        pygame.display.set_caption("RACING DDPG")
        self.screen.fill((0,0,0))
        self.back_image = pygame.image.load("track.png").convert()
        self.back_rect = self.back_image.get_rect().move(0, 0)
        self.action_space = None  # You can keep this as None
        self.game_reward = 0
        self.score = 0

        self.reset()

        self.game_reward = 0
        self.score = 0
 
        self.reset()

    def reset(self):
        self.screen.fill((0, 0, 0))

        self.car = Car(50, 300)
        self.walls = getWalls()
        self.goals = getGoals()
        self.game_reward = 0
        self.render(1) # 改了这边

    def step(self, action):

        self.car.action(action)
        self.car.update()
        reward = LIFE_REWARD  
        index = 1
        for goal in self.goals:
            if index >= len(self.goals):
                index = 0
            if goal.isactiv:
                if self.car.score(goal):
                    goal.isactiv = False
                    self.goals[index + 1].isactiv = True
                    reward += GOALREWARD
            index += 1
        done = False
        for wall in self.walls:
            if self.car.collision(wall):
                reward += PENALTY
                done = True
                break
        new_state = self.car.cast(self.walls)
        return new_state, reward, done, {}

        self.render(action) # 改了这边
        return new_state, reward, done, {}

    def render(self, action):

        DRAW_WALLS = False
        DRAW_GOALS = False
        DRAW_RAYS = True

        pygame.time.delay(10)

        self.clock = pygame.time.Clock()
        self.screen.fill((0, 0, 0))

        self.screen.blit(self.back_image, self.back_rect)

        if DRAW_WALLS:
            for wall in self.walls:
                wall.draw(self.screen)
        
        if DRAW_GOALS:
            for goal in self.goals:
                goal.draw(self.screen)
                if goal.isactiv:
                    goal.draw(self.screen)
        
        self.car.draw(self.screen)

        if DRAW_RAYS:
            i = 0
            for pt in self.car.closestRays:
                pygame.draw.circle(self.screen, (0,0,255), (pt.x, pt.y), 5)
                i += 1
                if i < 15:
                    pygame.draw.line(self.screen, (255,255,255), (self.car.x, self.car.y), (pt.x, pt.y), 1)
                elif i >=15 and i < 17:
                    pygame.draw.line(self.screen, (255,255,255), ((self.car.p1.x + self.car.p2.x)/2, (self.car.p1.y + self.car.p2.y)/2), (pt.x, pt.y), 1)
                elif i == 17:
                    pygame.draw.line(self.screen, (255,255,255), (self.car.p1.x , self.car.p1.y ), (pt.x, pt.y), 1)
                else:
                    pygame.draw.line(self.screen, (255,255,255), (self.car.p2.x, self.car.p2.y), (pt.x, pt.y), 1)

        pygame.draw.rect(self.screen,(255,255,255),(800, 100, 40, 40),2)
        pygame.draw.rect(self.screen,(255,255,255),(850, 100, 40, 40),2)
        pygame.draw.rect(self.screen,(255,255,255),(900, 100, 40, 40),2)
        pygame.draw.rect(self.screen,(255,255,255),(850, 50, 40, 40),2)

        if action == 4:
            pygame.draw.rect(self.screen,(0,255,0),(850, 50, 40, 40)) 
        elif action == 6:
            pygame.draw.rect(self.screen,(0,255,0),(850, 50, 40, 40))
            pygame.draw.rect(self.screen,(0,255,0),(800, 100, 40, 40))
        elif action == 5:
            pygame.draw.rect(self.screen,(0,255,0),(850, 50, 40, 40))
            pygame.draw.rect(self.screen,(0,255,0),(900, 100, 40, 40))
        elif action == 1:
            pygame.draw.rect(self.screen,(0,255,0),(850, 100, 40, 40)) 
        elif action == 8:
            pygame.draw.rect(self.screen,(0,255,0),(850, 100, 40, 40))
            pygame.draw.rect(self.screen,(0,255,0),(800, 100, 40, 40))
        elif action == 7:
            pygame.draw.rect(self.screen,(0,255,0),(850, 100, 40, 40))
            pygame.draw.rect(self.screen,(0,255,0),(900, 100, 40, 40))
        elif action == 2:
            pygame.draw.rect(self.screen,(0,255,0),(800, 100, 40, 40))
        elif action == 3:
            pygame.draw.rect(self.screen,(0,255,0),(900, 100, 40, 40))

        # score
        text_surface = self.font.render(f'Points {self.car.points}', True, pygame.Color('green'))
        self.screen.blit(text_surface, dest=(0, 0))
        # speed
        text_surface = self.font.render(f'Speed {self.car.vel*-1}', True, pygame.Color('green'))
        self.screen.blit(text_surface, dest=(800, 0))

        self.clock.tick(self.fps)
        pygame.display.update()

    def close(self):
        pygame.quit()


import GameEnvDDPG
import numpy as np
from DDPG1 import DDPGAgent1 
import random, math

# Parameters
training_episodes = 10000
max_training_time = 200
target_score = 74

env = GameEnvDDPG.RacingEnv()

# Adjust state_size based on your environment's state space
state_size = 22
action_size = 4
learning_rate = 0.001
tau = 0.001
gamma = 0.99
replay_buffer_size = 10000
batch_size = 64

# Create a DDPG agent
agent = DDPGAgent1(state_size, action_size, learning_rate, gamma, tau, replay_buffer_size, batch_size)

# Training loop
episode_rewards = []
render_interval = 10  # Reduce rendering frequency for better performance

for episode in range(1, training_episodes + 1):
    state = env.reset()  # Initialize state at the beginning of each episode
    total_reward = 0

    for t in range(max_training_time):
        action = agent.act(state)
        
        # Debugging code to check the action
        print("Action:", action)
        
        next_state, reward, done, _ = env.step(action[0])

        agent.replay_buffer.add(state, action, reward, next_state, done)

        if len(agent.replay_buffer.buffer) > batch_size:
            batch = agent.replay_buffer.sample(batch_size)
            agent.learn(*batch)

        total_reward += reward
        state = next_state  # Update the state for the next time step

        if done:
            break

    episode_rewards.append(total_reward)
    average_reward = np.mean(episode_rewards[-100:])

    print(f"Episode {episode}/{training_episodes} - Total Reward: {total_reward:.2f} - Average Reward: {average_reward:.2f}")

    if episode % 100 == 0:
        agent.save_model(f"ddpg_episode_{episode}.h5")

    if average_reward >= target_score:
        print(f"Target average reward reached! (>= {target_score})")
        break

    # Rendering (conditionally)
    if episode % render_interval == 0:
        env.render()

# Save the final model
agent.save_model("final_ddpg_model.h5")

# Close the environment when done
env.close()


What I have tried:

   next_state, reward, done, _ = env.step(action[0])
IndexError: invalid index to scalar variable.


This is the error.
Posted
Updated 28-Aug-23 9:16am
v2

1 solution

It's fairly straight forward : the variable action is a scalar e.g an integer, floating point, character, etc, and not an array or other indexable object. The assignment to action seems to be in the first block of code, above
Python
action = agent.act(state)
. As a quick guess, the offending line should be
Python
next_state, reward, done, _ = env.step(action)
instead. But you'll have to check your docs on what agent.act() returns, and whether that is the correct thing to pass to a env.step() call.
 
Share this answer
 

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)



CodeProject, 20 Bay Street, 11th Floor Toronto, Ontario, Canada M5J 2N8 +1 (416) 849-8900