+block2d/gym/stable_baseline thing

2025-02-03 23:25:07 +01:00
parent e8b847bd2d
commit 755040ef0e
5 changed files with 426 additions and 0 deletions
--- a/Python/ai/block_game_gym/Makefile
+++ b/Python/ai/block_game_gym/Makefile
@ -0,0 +1,6 @@
+clean:
+	-rm -r logs/PPO_* logs/A2C_*
+	-rm frame_*.png
+
+gif:
+	convert -delay 10 -loop 0 *.png output.gif
--- a/Python/ai/block_game_gym/block2d.py
+++ b/Python/ai/block_game_gym/block2d.py
@ -0,0 +1,132 @@
+from pyray import *
+
+#class Block2d:
+#	def __init__(
+#		self,
+#		starting_position=Vector2(100, 100),
+#		goal_position=Vector2(0, 0)
+#	):
+#		self.position = starting_position
+#		self.goal_position = goal_position
+#		self.direction = Vector2(0, 0)
+#		self.max_speed = 1200
+#		self.speed = Vector2(0, 0)
+#		self.last_update = 0 #= get_time()
+#		self.last_click = Vector2(-10, -10)
+#
+#	def control(self, click: Vector2):
+#		def signum(x): return int(x > 0) - int(x < 0)
+#		def accelerate(v: float, extra: float) -> float:
+#			v = abs(v + extra) if abs(v + extra) < self.max_speed else self.max_speed
+#			return v
+#		self.last_click = click
+#		self.direction = Vector2(self.position.x - click.x, self.position.y - click.y)
+#		self.speed.x = accelerate(self.speed.x, self.direction.x)
+#		self.speed.y = accelerate(self.speed.y, self.direction.y)
+#		self.direction = Vector2(
+#		   signum(self.direction.x),
+#		   signum(self.direction.y),
+#		)
+#
+#	def update(self):
+#		def decelerate(v : float) -> float:
+#			if v - 1.1 > 0: v -= 1.1
+#			else: v = 0
+#			return v
+#		update_time = self.last_update + 0.01 #get_time()
+#		delta_time = update_time - self.last_update
+#		self.position.x -= self.speed.x * delta_time * self.direction.x
+#		self.position.y -= self.speed.y * delta_time * self.direction.y
+#
+#		self.speed.x = decelerate(self.speed.x)
+#		self.speed.y = decelerate(self.speed.y)
+#
+#		self.last_update = update_time
+#
+#	def is_win_condition(self):
+#		return (
+#			self.speed.x < 0 and
+#			self.speed.y < 0 and
+#			abs(self.position.x - self.goal_position.x) < 50 and
+#			abs(self.position.y - self.goal_position.y) < 50
+#		)
+#		
+#
+#	def display(self):
+#		def draw_hitmark(x : int, y : int) -> None:
+#			size = 10
+#			draw_line(x - size, y - size, x + size, y + size, GREEN)
+#			draw_line(x + size, y - size, x - size, y + size, GREEN)
+#			draw_rectangle_lines(x - size, y - size, size*2, size*2, GREEN)
+#		# Blocks
+#		draw_rectangle(int(self.goal_position.x), int(self.goal_position.y), 30, 30, BLUE)
+#		draw_rectangle(int(self.position.x), int(self.position.y), 30, 30, RED)
+#
+#		# Hud
+#		draw_text(f"X: {self.position.x:.2f}", 10, 10, 20, DARKGRAY)
+#		draw_text(f"Y: {self.position.y:.2f}", 10, 40, 20, DARKGRAY)
+#		draw_text(f"Speed X: {self.speed.x:.2f}", 10, 70, 20, DARKGRAY)
+#		draw_text(f"Speed Y: {self.speed.y:.2f}", 10, 100, 20, DARKGRAY)
+#
+#		# Player action
+#		draw_hitmark(int(self.last_click.x), int(self.last_click.y))
+#
+#		# Win
+#		if self.is_win_condition(): draw_text(f"Victory!", 200, 200, 40, DARKGRAY)
+
+# NOTE:
+#	same as above with no physics
+class Block2d:
+	def __init__(
+		self,
+		starting_position=Vector2(100, 100),
+		goal_position=Vector2(0, 0)
+	):
+		self.position = starting_position
+		self.goal_position = goal_position
+		self.last_click = Vector2(-10, -10)
+
+	def control(self, click: Vector2):
+		def signum(x): return int(x > 0) - int(x < 0)
+		def accelerate(v: float, extra: float) -> float:
+			v = abs(v + extra) if abs(v + extra) < self.max_speed else self.max_speed
+			return v
+		self.last_click = click
+		self.direction = Vector2(self.position.x - click.x, self.position.y - click.y)
+		#self.speed.x = accelerate(self.speed.x, self.direction.x)
+		#self.speed.y = accelerate(self.speed.y, self.direction.y)
+		#self.direction = Vector2(
+		#   signum(self.direction.x),
+		#   signum(self.direction.y),
+		#)
+		self.position.x -= self.direction.x
+		self.position.y -= self.direction.y
+
+	def update(self):
+		pass
+
+	def is_win_condition(self):
+		return (
+			abs(self.position.x - self.goal_position.x) < 50 and
+			abs(self.position.y - self.goal_position.y) < 50
+		)
+		
+	def display(self):
+		def draw_hitmark(x : int, y : int) -> None:
+			size = 10
+			draw_line(x - size, y - size, x + size, y + size, GREEN)
+			draw_line(x + size, y - size, x - size, y + size, GREEN)
+			draw_rectangle_lines(x - size, y - size, size*2, size*2, GREEN)
+		# Blocks
+		draw_rectangle(int(self.goal_position.x), int(self.goal_position.y), 30, 30, BLUE)
+		draw_rectangle(int(self.position.x), int(self.position.y), 30, 30, RED)
+
+		# Hud
+		draw_text(f"X: {self.position.x:.2f}", 10, 10, 20, DARKGRAY)
+		draw_text(f"Y: {self.position.y:.2f}", 10, 40, 20, DARKGRAY)
+
+		# Player action
+		draw_hitmark(int(self.last_click.x), int(self.last_click.y))
+
+		# Win
+		if self.is_win_condition(): draw_text(f"Victory!", 200, 200, 40, DARKGRAY)
--- a/Python/ai/block_game_gym/game.py
+++ b/Python/ai/block_game_gym/game.py
@ -0,0 +1,30 @@
+from pyray import *
+from block2d import Block2d
+
+screen_width = 800
+screen_height = 600
+
+def main():
+	init_window(screen_width, screen_height, "Physics Simulation")
+	set_target_fps(60)
+
+	content = Block2d(
+		Vector2(screen_width / 2, screen_height / 2),
+		Vector2(screen_width / 3, screen_height / 3),
+	)
+
+	while not window_should_close():
+		if is_mouse_button_pressed(MOUSE_LEFT_BUTTON):
+			content.control(get_mouse_position())
+
+		content.update()
+
+		begin_drawing()
+		clear_background(RAYWHITE)
+		content.display()
+		end_drawing()
+
+	close_window()
+
+if __name__ == "__main__":
+	main()
--- a/Python/ai/block_game_gym/main.py
+++ b/Python/ai/block_game_gym/main.py
@ -0,0 +1,252 @@
+from os import getcwd, chdir
+from time import sleep
+from random import randint
+from block2d import Block2d
+from pyray import *
+
+import numpy as np
+import gymnasium as gym
+from gymnasium import spaces
+from gymnasium.utils.env_checker import check_env
+from stable_baselines3 import *
+
+screen_width = 800
+screen_height = 600
+
+class BlockGameEnvironment(gym.Env):
+	metadata = {
+		"render_modes": ["human"],
+		"render_fps": 20,
+	}
+
+	def __init__(self, render_mode=None):
+		#super().__init__()
+		# Raylib
+		init_window(screen_width, screen_height, "Physics Simulation")
+		self.frame_counter = 0
+		self.content = Block2d()
+
+		# Action space
+		#  with normalization
+		self.action_space = spaces.Box(
+			low=np.array([-1, -1, -1]),
+			high=np.array([1, 1, 1]),
+			dtype=np.float32 
+		)
+		#  without normalization
+		#self.action_space = spaces.Box(
+		#	low=np.array([0, 0, 0]),
+		#	high=np.array([screen_width, screen_width, screen_height]),
+		#	dtype=np.float32 
+		#)
+		
+		# Observation space
+		self.observation_space = spaces.Box(
+			low=np.array([0] * 6),
+			high=np.array([
+				screen_width,
+				screen_height,
+				screen_width,
+				screen_height,
+				#self.content.max_speed,
+				#self.content.max_speed,
+				1,
+				1,
+			]),
+			dtype=np.float64
+		)
+	
+	def reset(self, seed=None, options=None):
+		super().reset(seed=seed)
+
+		# Reconstruct the game
+		del self.content
+
+		self.box_start     = (randint(0, screen_width), randint(0, screen_height))
+		self.goal_position = (randint(0, screen_width), randint(0, screen_height))
+		self.content = Block2d(
+			Vector2(*self.box_start),
+			Vector2(*self.goal_position),
+		)
+
+		obs = np.array([
+			self.content.position.x,
+			self.content.position.y,
+			self.content.goal_position.x,
+			self.content.goal_position.y,
+			#self.content.speed.x,
+			#self.content.speed.y,
+			1,
+			1,
+		])
+
+		return obs, {}
+	
+	def step(self, action):
+		def calc_reward(is_done : bool) -> float:
+			# We have many strategies to reward the model,
+			#  one worse than the other.
+			def keep_moving_large_proximity_big_earner():
+				if is_done: return 100
+				diff = (
+					abs(self.content.goal_position.x - action[0]),
+					abs(self.content.goal_position.y - action[1]),
+				)
+				if diff[0] < 100 and diff[1] < 100:
+					return 1 / (diff[0] + diff[1])
+				if self.content.speed.x == 0 and self.content.speed.y == 0:
+					return -0.001
+				return -0.0001
+			def precise_proximity_punisher():
+				r = 0
+				diffs = (
+					self.content.goal_position.x - self.content.position.x,
+					self.content.goal_position.y - self.content.position.y
+				)
+				diffs = (abs(diffs[0]), abs(diffs[1]))
+				speed_sum = self.content.speed.x + self.content.speed.y
+				r -= (diffs[0] + diffs[1]) / 1000
+				r -= speed_sum / 1200
+				return r
+			def take_it_slow_large_proximity_big_earner():
+				if is_done: return 100
+				elif (
+					self.content.speed.x < 100 and
+					self.content.speed.y < 100 and
+					abs(self.content.position.x - self.content.goal_position.x) < 50 and
+					abs(self.content.position.y - self.content.goal_position.y) < 50
+				): return 1
+				else: return -0.0001
+			def just_move_near_it_im_begging_you():
+				r = 0
+				diffs = (
+					self.content.goal_position.x - self.content.position.x,
+					self.content.goal_position.y - self.content.position.y
+				)
+				dist = np.sqrt(diffs[0]**2 + diffs[1]**2)
+				r = dist / 10_000
+				return -r
+			def just_move_near_it_im_begging_you_normalized():
+				if is_done: return 10
+				r = 0
+				max_dist = np.sqrt(screen_width**2 + screen_height**2)
+				diffs = (
+					self.content.goal_position.x - self.content.position.x,
+					self.content.goal_position.y - self.content.position.y
+				)
+				dist = np.sqrt(diffs[0]**2 + diffs[1]**2)
+				r = dist / max_dist
+				return -r
+			return just_move_near_it_im_begging_you_normalized()
+
+		# normalization
+		#should_move = action[0] > 0.8
+		click = Vector2(action[1], action[2])
+		click.x = ((click.x + 1) * screen_width) / 2
+		click.y = ((click.y + 1) * screen_height) / 2
+		should_move = True
+		#click = Vector2(action[1], action[2])
+
+		if should_move:
+			self.content.control(click)
+
+		self.content.update()
+
+		obs = np.array([
+			self.content.position.x,
+			self.content.position.y,
+			self.content.goal_position.x,
+			self.content.goal_position.y,
+			#self.content.speed.x,
+			#self.content.speed.y,
+			1,
+			1,
+		])
+
+		is_done = self.content.is_win_condition()
+		reward = calc_reward(is_done)
+
+		# printf debugging
+		#print(f"obs: {obs}")
+		#print(f"act: {action}")
+		#print(f"rew: {reward}")
+
+		return obs, reward, is_done, False, {}
+
+	def render(self, mode="human"):
+		begin_drawing()
+		clear_background(RAYWHITE)
+		self.content.display()
+		end_drawing()
+
+		if self.frame_counter < 400: take_screenshot(f"frame_{self.frame_counter:05d}.png")
+		else: sleep(0.01)
+
+		self.frame_counter += 1
+	
+	def close(self):
+		close_window()
+
+
+
+# Env init
+gym.envs.registration.register(
+	id="BlockGameEnvironment-v0",
+	entry_point=__name__+":BlockGameEnvironment",
+)
+env = gym.make("BlockGameEnvironment-v0", render_mode="human")
+# very useful check, however it clones the environment,
+#  which is bad in our case because raylib uses global data
+#check_env(env.unwrapped)
+
+
+
+# Model init
+def get_mymodel(name : str, do_train : bool):
+	model_name = "custom_model_-_sorta_works"
+	model = None
+	match name:
+		case "myPPO":
+			model = PPO(
+				"MlpPolicy",
+				env,
+				learning_rate=0.001,
+				ent_coef=0.1,
+				batch_size=2, # NOTE: this is why we dont marge the two cases
+				verbose=1,
+				tensorboard_log="logs/",
+			)
+		case "myA2C":
+			model = A2C( # really, really fast to train
+				"MlpPolicy",
+				env,
+				verbose=1,
+				tensorboard_log="logs/",
+			)
+	if do_train:
+		for i in range(5000): model.learn(100)
+		model.save(model_name)
+	else:
+		match name:
+			case "myPPO":
+				model = PPO.load(model_name, env=env)
+			case "myA2C":
+				model = A2C.load(model_name, env=env)
+	return model
+
+model = get_mymodel(
+#	"myPPO",
+	"myA2C",
+#	True,
+	False,
+)
+
+# Show what the model learned
+while True:
+	obs, _ = env.reset()
+	for _ in range(1000):
+		action, _ = model.predict(obs)
+		obs, _, done, _, _ = env.step(action)
+		print(action)
+		env.render()
+		if done: break
--- a/Python/ai/block_game_gym/requirements.txt
+++ b/Python/ai/block_game_gym/requirements.txt
@ -0,0 +1,6 @@
+matplotlib
+numpy
+gymnasium
+raylib
+stable_baselines3
+tensorboard