+block2d/gym/stable_baseline thing
This commit is contained in:
parent
e8b847bd2d
commit
755040ef0e
6
Python/ai/block_game_gym/Makefile
Normal file
6
Python/ai/block_game_gym/Makefile
Normal file
@ -0,0 +1,6 @@
|
||||
clean:
|
||||
-rm -r logs/PPO_* logs/A2C_*
|
||||
-rm frame_*.png
|
||||
|
||||
gif:
|
||||
convert -delay 10 -loop 0 *.png output.gif
|
132
Python/ai/block_game_gym/block2d.py
Normal file
132
Python/ai/block_game_gym/block2d.py
Normal file
@ -0,0 +1,132 @@
|
||||
from pyray import *
|
||||
|
||||
#class Block2d:
|
||||
# def __init__(
|
||||
# self,
|
||||
# starting_position=Vector2(100, 100),
|
||||
# goal_position=Vector2(0, 0)
|
||||
# ):
|
||||
# self.position = starting_position
|
||||
# self.goal_position = goal_position
|
||||
# self.direction = Vector2(0, 0)
|
||||
# self.max_speed = 1200
|
||||
# self.speed = Vector2(0, 0)
|
||||
# self.last_update = 0 #= get_time()
|
||||
# self.last_click = Vector2(-10, -10)
|
||||
#
|
||||
# def control(self, click: Vector2):
|
||||
# def signum(x): return int(x > 0) - int(x < 0)
|
||||
# def accelerate(v: float, extra: float) -> float:
|
||||
# v = abs(v + extra) if abs(v + extra) < self.max_speed else self.max_speed
|
||||
# return v
|
||||
# self.last_click = click
|
||||
# self.direction = Vector2(self.position.x - click.x, self.position.y - click.y)
|
||||
# self.speed.x = accelerate(self.speed.x, self.direction.x)
|
||||
# self.speed.y = accelerate(self.speed.y, self.direction.y)
|
||||
# self.direction = Vector2(
|
||||
# signum(self.direction.x),
|
||||
# signum(self.direction.y),
|
||||
# )
|
||||
#
|
||||
# def update(self):
|
||||
# def decelerate(v : float) -> float:
|
||||
# if v - 1.1 > 0: v -= 1.1
|
||||
# else: v = 0
|
||||
# return v
|
||||
# update_time = self.last_update + 0.01 #get_time()
|
||||
# delta_time = update_time - self.last_update
|
||||
# self.position.x -= self.speed.x * delta_time * self.direction.x
|
||||
# self.position.y -= self.speed.y * delta_time * self.direction.y
|
||||
#
|
||||
# self.speed.x = decelerate(self.speed.x)
|
||||
# self.speed.y = decelerate(self.speed.y)
|
||||
#
|
||||
# self.last_update = update_time
|
||||
#
|
||||
# def is_win_condition(self):
|
||||
# return (
|
||||
# self.speed.x < 0 and
|
||||
# self.speed.y < 0 and
|
||||
# abs(self.position.x - self.goal_position.x) < 50 and
|
||||
# abs(self.position.y - self.goal_position.y) < 50
|
||||
# )
|
||||
#
|
||||
#
|
||||
# def display(self):
|
||||
# def draw_hitmark(x : int, y : int) -> None:
|
||||
# size = 10
|
||||
# draw_line(x - size, y - size, x + size, y + size, GREEN)
|
||||
# draw_line(x + size, y - size, x - size, y + size, GREEN)
|
||||
# draw_rectangle_lines(x - size, y - size, size*2, size*2, GREEN)
|
||||
# # Blocks
|
||||
# draw_rectangle(int(self.goal_position.x), int(self.goal_position.y), 30, 30, BLUE)
|
||||
# draw_rectangle(int(self.position.x), int(self.position.y), 30, 30, RED)
|
||||
#
|
||||
# # Hud
|
||||
# draw_text(f"X: {self.position.x:.2f}", 10, 10, 20, DARKGRAY)
|
||||
# draw_text(f"Y: {self.position.y:.2f}", 10, 40, 20, DARKGRAY)
|
||||
# draw_text(f"Speed X: {self.speed.x:.2f}", 10, 70, 20, DARKGRAY)
|
||||
# draw_text(f"Speed Y: {self.speed.y:.2f}", 10, 100, 20, DARKGRAY)
|
||||
#
|
||||
# # Player action
|
||||
# draw_hitmark(int(self.last_click.x), int(self.last_click.y))
|
||||
#
|
||||
# # Win
|
||||
# if self.is_win_condition(): draw_text(f"Victory!", 200, 200, 40, DARKGRAY)
|
||||
|
||||
# NOTE:
|
||||
# same as above with no physics
|
||||
class Block2d:
|
||||
def __init__(
|
||||
self,
|
||||
starting_position=Vector2(100, 100),
|
||||
goal_position=Vector2(0, 0)
|
||||
):
|
||||
self.position = starting_position
|
||||
self.goal_position = goal_position
|
||||
self.last_click = Vector2(-10, -10)
|
||||
|
||||
def control(self, click: Vector2):
|
||||
def signum(x): return int(x > 0) - int(x < 0)
|
||||
def accelerate(v: float, extra: float) -> float:
|
||||
v = abs(v + extra) if abs(v + extra) < self.max_speed else self.max_speed
|
||||
return v
|
||||
self.last_click = click
|
||||
self.direction = Vector2(self.position.x - click.x, self.position.y - click.y)
|
||||
#self.speed.x = accelerate(self.speed.x, self.direction.x)
|
||||
#self.speed.y = accelerate(self.speed.y, self.direction.y)
|
||||
#self.direction = Vector2(
|
||||
# signum(self.direction.x),
|
||||
# signum(self.direction.y),
|
||||
#)
|
||||
self.position.x -= self.direction.x
|
||||
self.position.y -= self.direction.y
|
||||
|
||||
def update(self):
|
||||
pass
|
||||
|
||||
def is_win_condition(self):
|
||||
return (
|
||||
abs(self.position.x - self.goal_position.x) < 50 and
|
||||
abs(self.position.y - self.goal_position.y) < 50
|
||||
)
|
||||
|
||||
def display(self):
|
||||
def draw_hitmark(x : int, y : int) -> None:
|
||||
size = 10
|
||||
draw_line(x - size, y - size, x + size, y + size, GREEN)
|
||||
draw_line(x + size, y - size, x - size, y + size, GREEN)
|
||||
draw_rectangle_lines(x - size, y - size, size*2, size*2, GREEN)
|
||||
# Blocks
|
||||
draw_rectangle(int(self.goal_position.x), int(self.goal_position.y), 30, 30, BLUE)
|
||||
draw_rectangle(int(self.position.x), int(self.position.y), 30, 30, RED)
|
||||
|
||||
# Hud
|
||||
draw_text(f"X: {self.position.x:.2f}", 10, 10, 20, DARKGRAY)
|
||||
draw_text(f"Y: {self.position.y:.2f}", 10, 40, 20, DARKGRAY)
|
||||
|
||||
# Player action
|
||||
draw_hitmark(int(self.last_click.x), int(self.last_click.y))
|
||||
|
||||
# Win
|
||||
if self.is_win_condition(): draw_text(f"Victory!", 200, 200, 40, DARKGRAY)
|
30
Python/ai/block_game_gym/game.py
Normal file
30
Python/ai/block_game_gym/game.py
Normal file
@ -0,0 +1,30 @@
|
||||
from pyray import *
|
||||
from block2d import Block2d
|
||||
|
||||
screen_width = 800
|
||||
screen_height = 600
|
||||
|
||||
def main():
|
||||
init_window(screen_width, screen_height, "Physics Simulation")
|
||||
set_target_fps(60)
|
||||
|
||||
content = Block2d(
|
||||
Vector2(screen_width / 2, screen_height / 2),
|
||||
Vector2(screen_width / 3, screen_height / 3),
|
||||
)
|
||||
|
||||
while not window_should_close():
|
||||
if is_mouse_button_pressed(MOUSE_LEFT_BUTTON):
|
||||
content.control(get_mouse_position())
|
||||
|
||||
content.update()
|
||||
|
||||
begin_drawing()
|
||||
clear_background(RAYWHITE)
|
||||
content.display()
|
||||
end_drawing()
|
||||
|
||||
close_window()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
252
Python/ai/block_game_gym/main.py
Normal file
252
Python/ai/block_game_gym/main.py
Normal file
@ -0,0 +1,252 @@
|
||||
from os import getcwd, chdir
|
||||
from time import sleep
|
||||
from random import randint
|
||||
from block2d import Block2d
|
||||
from pyray import *
|
||||
|
||||
import numpy as np
|
||||
import gymnasium as gym
|
||||
from gymnasium import spaces
|
||||
from gymnasium.utils.env_checker import check_env
|
||||
from stable_baselines3 import *
|
||||
|
||||
screen_width = 800
|
||||
screen_height = 600
|
||||
|
||||
class BlockGameEnvironment(gym.Env):
|
||||
metadata = {
|
||||
"render_modes": ["human"],
|
||||
"render_fps": 20,
|
||||
}
|
||||
|
||||
def __init__(self, render_mode=None):
|
||||
#super().__init__()
|
||||
# Raylib
|
||||
init_window(screen_width, screen_height, "Physics Simulation")
|
||||
self.frame_counter = 0
|
||||
self.content = Block2d()
|
||||
|
||||
# Action space
|
||||
# with normalization
|
||||
self.action_space = spaces.Box(
|
||||
low=np.array([-1, -1, -1]),
|
||||
high=np.array([1, 1, 1]),
|
||||
dtype=np.float32
|
||||
)
|
||||
# without normalization
|
||||
#self.action_space = spaces.Box(
|
||||
# low=np.array([0, 0, 0]),
|
||||
# high=np.array([screen_width, screen_width, screen_height]),
|
||||
# dtype=np.float32
|
||||
#)
|
||||
|
||||
# Observation space
|
||||
self.observation_space = spaces.Box(
|
||||
low=np.array([0] * 6),
|
||||
high=np.array([
|
||||
screen_width,
|
||||
screen_height,
|
||||
screen_width,
|
||||
screen_height,
|
||||
#self.content.max_speed,
|
||||
#self.content.max_speed,
|
||||
1,
|
||||
1,
|
||||
]),
|
||||
dtype=np.float64
|
||||
)
|
||||
|
||||
def reset(self, seed=None, options=None):
|
||||
super().reset(seed=seed)
|
||||
|
||||
# Reconstruct the game
|
||||
del self.content
|
||||
|
||||
self.box_start = (randint(0, screen_width), randint(0, screen_height))
|
||||
self.goal_position = (randint(0, screen_width), randint(0, screen_height))
|
||||
self.content = Block2d(
|
||||
Vector2(*self.box_start),
|
||||
Vector2(*self.goal_position),
|
||||
)
|
||||
|
||||
obs = np.array([
|
||||
self.content.position.x,
|
||||
self.content.position.y,
|
||||
self.content.goal_position.x,
|
||||
self.content.goal_position.y,
|
||||
#self.content.speed.x,
|
||||
#self.content.speed.y,
|
||||
1,
|
||||
1,
|
||||
])
|
||||
|
||||
return obs, {}
|
||||
|
||||
def step(self, action):
|
||||
def calc_reward(is_done : bool) -> float:
|
||||
# We have many strategies to reward the model,
|
||||
# one worse than the other.
|
||||
def keep_moving_large_proximity_big_earner():
|
||||
if is_done: return 100
|
||||
diff = (
|
||||
abs(self.content.goal_position.x - action[0]),
|
||||
abs(self.content.goal_position.y - action[1]),
|
||||
)
|
||||
if diff[0] < 100 and diff[1] < 100:
|
||||
return 1 / (diff[0] + diff[1])
|
||||
if self.content.speed.x == 0 and self.content.speed.y == 0:
|
||||
return -0.001
|
||||
return -0.0001
|
||||
def precise_proximity_punisher():
|
||||
r = 0
|
||||
diffs = (
|
||||
self.content.goal_position.x - self.content.position.x,
|
||||
self.content.goal_position.y - self.content.position.y
|
||||
)
|
||||
diffs = (abs(diffs[0]), abs(diffs[1]))
|
||||
speed_sum = self.content.speed.x + self.content.speed.y
|
||||
r -= (diffs[0] + diffs[1]) / 1000
|
||||
r -= speed_sum / 1200
|
||||
return r
|
||||
def take_it_slow_large_proximity_big_earner():
|
||||
if is_done: return 100
|
||||
elif (
|
||||
self.content.speed.x < 100 and
|
||||
self.content.speed.y < 100 and
|
||||
abs(self.content.position.x - self.content.goal_position.x) < 50 and
|
||||
abs(self.content.position.y - self.content.goal_position.y) < 50
|
||||
): return 1
|
||||
else: return -0.0001
|
||||
def just_move_near_it_im_begging_you():
|
||||
r = 0
|
||||
diffs = (
|
||||
self.content.goal_position.x - self.content.position.x,
|
||||
self.content.goal_position.y - self.content.position.y
|
||||
)
|
||||
dist = np.sqrt(diffs[0]**2 + diffs[1]**2)
|
||||
r = dist / 10_000
|
||||
return -r
|
||||
def just_move_near_it_im_begging_you_normalized():
|
||||
if is_done: return 10
|
||||
r = 0
|
||||
max_dist = np.sqrt(screen_width**2 + screen_height**2)
|
||||
diffs = (
|
||||
self.content.goal_position.x - self.content.position.x,
|
||||
self.content.goal_position.y - self.content.position.y
|
||||
)
|
||||
dist = np.sqrt(diffs[0]**2 + diffs[1]**2)
|
||||
r = dist / max_dist
|
||||
return -r
|
||||
return just_move_near_it_im_begging_you_normalized()
|
||||
|
||||
# normalization
|
||||
#should_move = action[0] > 0.8
|
||||
click = Vector2(action[1], action[2])
|
||||
click.x = ((click.x + 1) * screen_width) / 2
|
||||
click.y = ((click.y + 1) * screen_height) / 2
|
||||
should_move = True
|
||||
#click = Vector2(action[1], action[2])
|
||||
|
||||
if should_move:
|
||||
self.content.control(click)
|
||||
|
||||
self.content.update()
|
||||
|
||||
obs = np.array([
|
||||
self.content.position.x,
|
||||
self.content.position.y,
|
||||
self.content.goal_position.x,
|
||||
self.content.goal_position.y,
|
||||
#self.content.speed.x,
|
||||
#self.content.speed.y,
|
||||
1,
|
||||
1,
|
||||
])
|
||||
|
||||
is_done = self.content.is_win_condition()
|
||||
reward = calc_reward(is_done)
|
||||
|
||||
# printf debugging
|
||||
#print(f"obs: {obs}")
|
||||
#print(f"act: {action}")
|
||||
#print(f"rew: {reward}")
|
||||
|
||||
return obs, reward, is_done, False, {}
|
||||
|
||||
def render(self, mode="human"):
|
||||
begin_drawing()
|
||||
clear_background(RAYWHITE)
|
||||
self.content.display()
|
||||
end_drawing()
|
||||
|
||||
if self.frame_counter < 400: take_screenshot(f"frame_{self.frame_counter:05d}.png")
|
||||
else: sleep(0.01)
|
||||
|
||||
self.frame_counter += 1
|
||||
|
||||
def close(self):
|
||||
close_window()
|
||||
|
||||
|
||||
|
||||
# Env init
|
||||
gym.envs.registration.register(
|
||||
id="BlockGameEnvironment-v0",
|
||||
entry_point=__name__+":BlockGameEnvironment",
|
||||
)
|
||||
env = gym.make("BlockGameEnvironment-v0", render_mode="human")
|
||||
# very useful check, however it clones the environment,
|
||||
# which is bad in our case because raylib uses global data
|
||||
#check_env(env.unwrapped)
|
||||
|
||||
|
||||
|
||||
# Model init
|
||||
def get_mymodel(name : str, do_train : bool):
|
||||
model_name = "custom_model_-_sorta_works"
|
||||
model = None
|
||||
match name:
|
||||
case "myPPO":
|
||||
model = PPO(
|
||||
"MlpPolicy",
|
||||
env,
|
||||
learning_rate=0.001,
|
||||
ent_coef=0.1,
|
||||
batch_size=2, # NOTE: this is why we dont marge the two cases
|
||||
verbose=1,
|
||||
tensorboard_log="logs/",
|
||||
)
|
||||
case "myA2C":
|
||||
model = A2C( # really, really fast to train
|
||||
"MlpPolicy",
|
||||
env,
|
||||
verbose=1,
|
||||
tensorboard_log="logs/",
|
||||
)
|
||||
if do_train:
|
||||
for i in range(5000): model.learn(100)
|
||||
model.save(model_name)
|
||||
else:
|
||||
match name:
|
||||
case "myPPO":
|
||||
model = PPO.load(model_name, env=env)
|
||||
case "myA2C":
|
||||
model = A2C.load(model_name, env=env)
|
||||
return model
|
||||
|
||||
model = get_mymodel(
|
||||
# "myPPO",
|
||||
"myA2C",
|
||||
# True,
|
||||
False,
|
||||
)
|
||||
|
||||
# Show what the model learned
|
||||
while True:
|
||||
obs, _ = env.reset()
|
||||
for _ in range(1000):
|
||||
action, _ = model.predict(obs)
|
||||
obs, _, done, _, _ = env.step(action)
|
||||
print(action)
|
||||
env.render()
|
||||
if done: break
|
6
Python/ai/block_game_gym/requirements.txt
Normal file
6
Python/ai/block_game_gym/requirements.txt
Normal file
@ -0,0 +1,6 @@
|
||||
matplotlib
|
||||
numpy
|
||||
gymnasium
|
||||
raylib
|
||||
stable_baselines3
|
||||
tensorboard
|
Loading…
x
Reference in New Issue
Block a user