From 755040ef0ebd1265847634a740579847c4216991 Mon Sep 17 00:00:00 2001 From: anon Date: Mon, 3 Feb 2025 23:25:07 +0100 Subject: [PATCH] +block2d/gym/stable_baseline thing --- Python/ai/block_game_gym/Makefile | 6 + Python/ai/block_game_gym/block2d.py | 132 ++++++++++++ Python/ai/block_game_gym/game.py | 30 +++ Python/ai/block_game_gym/main.py | 252 ++++++++++++++++++++++ Python/ai/block_game_gym/requirements.txt | 6 + 5 files changed, 426 insertions(+) create mode 100644 Python/ai/block_game_gym/Makefile create mode 100644 Python/ai/block_game_gym/block2d.py create mode 100644 Python/ai/block_game_gym/game.py create mode 100644 Python/ai/block_game_gym/main.py create mode 100644 Python/ai/block_game_gym/requirements.txt diff --git a/Python/ai/block_game_gym/Makefile b/Python/ai/block_game_gym/Makefile new file mode 100644 index 0000000..5a2e339 --- /dev/null +++ b/Python/ai/block_game_gym/Makefile @@ -0,0 +1,6 @@ +clean: + -rm -r logs/PPO_* logs/A2C_* + -rm frame_*.png + +gif: + convert -delay 10 -loop 0 *.png output.gif diff --git a/Python/ai/block_game_gym/block2d.py b/Python/ai/block_game_gym/block2d.py new file mode 100644 index 0000000..656c44d --- /dev/null +++ b/Python/ai/block_game_gym/block2d.py @@ -0,0 +1,132 @@ +from pyray import * + +#class Block2d: +# def __init__( +# self, +# starting_position=Vector2(100, 100), +# goal_position=Vector2(0, 0) +# ): +# self.position = starting_position +# self.goal_position = goal_position +# self.direction = Vector2(0, 0) +# self.max_speed = 1200 +# self.speed = Vector2(0, 0) +# self.last_update = 0 #= get_time() +# self.last_click = Vector2(-10, -10) +# +# def control(self, click: Vector2): +# def signum(x): return int(x > 0) - int(x < 0) +# def accelerate(v: float, extra: float) -> float: +# v = abs(v + extra) if abs(v + extra) < self.max_speed else self.max_speed +# return v +# self.last_click = click +# self.direction = Vector2(self.position.x - click.x, self.position.y - click.y) +# self.speed.x = accelerate(self.speed.x, self.direction.x) +# self.speed.y = accelerate(self.speed.y, self.direction.y) +# self.direction = Vector2( +# signum(self.direction.x), +# signum(self.direction.y), +# ) +# +# def update(self): +# def decelerate(v : float) -> float: +# if v - 1.1 > 0: v -= 1.1 +# else: v = 0 +# return v +# update_time = self.last_update + 0.01 #get_time() +# delta_time = update_time - self.last_update +# self.position.x -= self.speed.x * delta_time * self.direction.x +# self.position.y -= self.speed.y * delta_time * self.direction.y +# +# self.speed.x = decelerate(self.speed.x) +# self.speed.y = decelerate(self.speed.y) +# +# self.last_update = update_time +# +# def is_win_condition(self): +# return ( +# self.speed.x < 0 and +# self.speed.y < 0 and +# abs(self.position.x - self.goal_position.x) < 50 and +# abs(self.position.y - self.goal_position.y) < 50 +# ) +# +# +# def display(self): +# def draw_hitmark(x : int, y : int) -> None: +# size = 10 +# draw_line(x - size, y - size, x + size, y + size, GREEN) +# draw_line(x + size, y - size, x - size, y + size, GREEN) +# draw_rectangle_lines(x - size, y - size, size*2, size*2, GREEN) +# # Blocks +# draw_rectangle(int(self.goal_position.x), int(self.goal_position.y), 30, 30, BLUE) +# draw_rectangle(int(self.position.x), int(self.position.y), 30, 30, RED) +# +# # Hud +# draw_text(f"X: {self.position.x:.2f}", 10, 10, 20, DARKGRAY) +# draw_text(f"Y: {self.position.y:.2f}", 10, 40, 20, DARKGRAY) +# draw_text(f"Speed X: {self.speed.x:.2f}", 10, 70, 20, DARKGRAY) +# draw_text(f"Speed Y: {self.speed.y:.2f}", 10, 100, 20, DARKGRAY) +# +# # Player action +# draw_hitmark(int(self.last_click.x), int(self.last_click.y)) +# +# # Win +# if self.is_win_condition(): draw_text(f"Victory!", 200, 200, 40, DARKGRAY) + +# NOTE: +# same as above with no physics +class Block2d: + def __init__( + self, + starting_position=Vector2(100, 100), + goal_position=Vector2(0, 0) + ): + self.position = starting_position + self.goal_position = goal_position + self.last_click = Vector2(-10, -10) + + def control(self, click: Vector2): + def signum(x): return int(x > 0) - int(x < 0) + def accelerate(v: float, extra: float) -> float: + v = abs(v + extra) if abs(v + extra) < self.max_speed else self.max_speed + return v + self.last_click = click + self.direction = Vector2(self.position.x - click.x, self.position.y - click.y) + #self.speed.x = accelerate(self.speed.x, self.direction.x) + #self.speed.y = accelerate(self.speed.y, self.direction.y) + #self.direction = Vector2( + # signum(self.direction.x), + # signum(self.direction.y), + #) + self.position.x -= self.direction.x + self.position.y -= self.direction.y + + def update(self): + pass + + def is_win_condition(self): + return ( + abs(self.position.x - self.goal_position.x) < 50 and + abs(self.position.y - self.goal_position.y) < 50 + ) + + def display(self): + def draw_hitmark(x : int, y : int) -> None: + size = 10 + draw_line(x - size, y - size, x + size, y + size, GREEN) + draw_line(x + size, y - size, x - size, y + size, GREEN) + draw_rectangle_lines(x - size, y - size, size*2, size*2, GREEN) + # Blocks + draw_rectangle(int(self.goal_position.x), int(self.goal_position.y), 30, 30, BLUE) + draw_rectangle(int(self.position.x), int(self.position.y), 30, 30, RED) + + # Hud + draw_text(f"X: {self.position.x:.2f}", 10, 10, 20, DARKGRAY) + draw_text(f"Y: {self.position.y:.2f}", 10, 40, 20, DARKGRAY) + + # Player action + draw_hitmark(int(self.last_click.x), int(self.last_click.y)) + + # Win + if self.is_win_condition(): draw_text(f"Victory!", 200, 200, 40, DARKGRAY) diff --git a/Python/ai/block_game_gym/game.py b/Python/ai/block_game_gym/game.py new file mode 100644 index 0000000..8c464b5 --- /dev/null +++ b/Python/ai/block_game_gym/game.py @@ -0,0 +1,30 @@ +from pyray import * +from block2d import Block2d + +screen_width = 800 +screen_height = 600 + +def main(): + init_window(screen_width, screen_height, "Physics Simulation") + set_target_fps(60) + + content = Block2d( + Vector2(screen_width / 2, screen_height / 2), + Vector2(screen_width / 3, screen_height / 3), + ) + + while not window_should_close(): + if is_mouse_button_pressed(MOUSE_LEFT_BUTTON): + content.control(get_mouse_position()) + + content.update() + + begin_drawing() + clear_background(RAYWHITE) + content.display() + end_drawing() + + close_window() + +if __name__ == "__main__": + main() diff --git a/Python/ai/block_game_gym/main.py b/Python/ai/block_game_gym/main.py new file mode 100644 index 0000000..ee421ab --- /dev/null +++ b/Python/ai/block_game_gym/main.py @@ -0,0 +1,252 @@ +from os import getcwd, chdir +from time import sleep +from random import randint +from block2d import Block2d +from pyray import * + +import numpy as np +import gymnasium as gym +from gymnasium import spaces +from gymnasium.utils.env_checker import check_env +from stable_baselines3 import * + +screen_width = 800 +screen_height = 600 + +class BlockGameEnvironment(gym.Env): + metadata = { + "render_modes": ["human"], + "render_fps": 20, + } + + def __init__(self, render_mode=None): + #super().__init__() + # Raylib + init_window(screen_width, screen_height, "Physics Simulation") + self.frame_counter = 0 + self.content = Block2d() + + # Action space + # with normalization + self.action_space = spaces.Box( + low=np.array([-1, -1, -1]), + high=np.array([1, 1, 1]), + dtype=np.float32 + ) + # without normalization + #self.action_space = spaces.Box( + # low=np.array([0, 0, 0]), + # high=np.array([screen_width, screen_width, screen_height]), + # dtype=np.float32 + #) + + # Observation space + self.observation_space = spaces.Box( + low=np.array([0] * 6), + high=np.array([ + screen_width, + screen_height, + screen_width, + screen_height, + #self.content.max_speed, + #self.content.max_speed, + 1, + 1, + ]), + dtype=np.float64 + ) + + def reset(self, seed=None, options=None): + super().reset(seed=seed) + + # Reconstruct the game + del self.content + + self.box_start = (randint(0, screen_width), randint(0, screen_height)) + self.goal_position = (randint(0, screen_width), randint(0, screen_height)) + self.content = Block2d( + Vector2(*self.box_start), + Vector2(*self.goal_position), + ) + + obs = np.array([ + self.content.position.x, + self.content.position.y, + self.content.goal_position.x, + self.content.goal_position.y, + #self.content.speed.x, + #self.content.speed.y, + 1, + 1, + ]) + + return obs, {} + + def step(self, action): + def calc_reward(is_done : bool) -> float: + # We have many strategies to reward the model, + # one worse than the other. + def keep_moving_large_proximity_big_earner(): + if is_done: return 100 + diff = ( + abs(self.content.goal_position.x - action[0]), + abs(self.content.goal_position.y - action[1]), + ) + if diff[0] < 100 and diff[1] < 100: + return 1 / (diff[0] + diff[1]) + if self.content.speed.x == 0 and self.content.speed.y == 0: + return -0.001 + return -0.0001 + def precise_proximity_punisher(): + r = 0 + diffs = ( + self.content.goal_position.x - self.content.position.x, + self.content.goal_position.y - self.content.position.y + ) + diffs = (abs(diffs[0]), abs(diffs[1])) + speed_sum = self.content.speed.x + self.content.speed.y + r -= (diffs[0] + diffs[1]) / 1000 + r -= speed_sum / 1200 + return r + def take_it_slow_large_proximity_big_earner(): + if is_done: return 100 + elif ( + self.content.speed.x < 100 and + self.content.speed.y < 100 and + abs(self.content.position.x - self.content.goal_position.x) < 50 and + abs(self.content.position.y - self.content.goal_position.y) < 50 + ): return 1 + else: return -0.0001 + def just_move_near_it_im_begging_you(): + r = 0 + diffs = ( + self.content.goal_position.x - self.content.position.x, + self.content.goal_position.y - self.content.position.y + ) + dist = np.sqrt(diffs[0]**2 + diffs[1]**2) + r = dist / 10_000 + return -r + def just_move_near_it_im_begging_you_normalized(): + if is_done: return 10 + r = 0 + max_dist = np.sqrt(screen_width**2 + screen_height**2) + diffs = ( + self.content.goal_position.x - self.content.position.x, + self.content.goal_position.y - self.content.position.y + ) + dist = np.sqrt(diffs[0]**2 + diffs[1]**2) + r = dist / max_dist + return -r + return just_move_near_it_im_begging_you_normalized() + + # normalization + #should_move = action[0] > 0.8 + click = Vector2(action[1], action[2]) + click.x = ((click.x + 1) * screen_width) / 2 + click.y = ((click.y + 1) * screen_height) / 2 + should_move = True + #click = Vector2(action[1], action[2]) + + if should_move: + self.content.control(click) + + self.content.update() + + obs = np.array([ + self.content.position.x, + self.content.position.y, + self.content.goal_position.x, + self.content.goal_position.y, + #self.content.speed.x, + #self.content.speed.y, + 1, + 1, + ]) + + is_done = self.content.is_win_condition() + reward = calc_reward(is_done) + + # printf debugging + #print(f"obs: {obs}") + #print(f"act: {action}") + #print(f"rew: {reward}") + + return obs, reward, is_done, False, {} + + def render(self, mode="human"): + begin_drawing() + clear_background(RAYWHITE) + self.content.display() + end_drawing() + + if self.frame_counter < 400: take_screenshot(f"frame_{self.frame_counter:05d}.png") + else: sleep(0.01) + + self.frame_counter += 1 + + def close(self): + close_window() + + + +# Env init +gym.envs.registration.register( + id="BlockGameEnvironment-v0", + entry_point=__name__+":BlockGameEnvironment", +) +env = gym.make("BlockGameEnvironment-v0", render_mode="human") +# very useful check, however it clones the environment, +# which is bad in our case because raylib uses global data +#check_env(env.unwrapped) + + + +# Model init +def get_mymodel(name : str, do_train : bool): + model_name = "custom_model_-_sorta_works" + model = None + match name: + case "myPPO": + model = PPO( + "MlpPolicy", + env, + learning_rate=0.001, + ent_coef=0.1, + batch_size=2, # NOTE: this is why we dont marge the two cases + verbose=1, + tensorboard_log="logs/", + ) + case "myA2C": + model = A2C( # really, really fast to train + "MlpPolicy", + env, + verbose=1, + tensorboard_log="logs/", + ) + if do_train: + for i in range(5000): model.learn(100) + model.save(model_name) + else: + match name: + case "myPPO": + model = PPO.load(model_name, env=env) + case "myA2C": + model = A2C.load(model_name, env=env) + return model + +model = get_mymodel( +# "myPPO", + "myA2C", +# True, + False, +) + +# Show what the model learned +while True: + obs, _ = env.reset() + for _ in range(1000): + action, _ = model.predict(obs) + obs, _, done, _, _ = env.step(action) + print(action) + env.render() + if done: break diff --git a/Python/ai/block_game_gym/requirements.txt b/Python/ai/block_game_gym/requirements.txt new file mode 100644 index 0000000..c69addf --- /dev/null +++ b/Python/ai/block_game_gym/requirements.txt @@ -0,0 +1,6 @@ +matplotlib +numpy +gymnasium +raylib +stable_baselines3 +tensorboard