Module pyboy.openai_gym

Expand source code
#
# License: See LICENSE.md file
# GitHub: https://github.com/Baekalfen/PyBoy
#

import numpy as np

from .botsupport.constants import TILES
from .utils import WindowEvent

try:
    from gym import Env
    from gym.spaces import Discrete, MultiDiscrete, Box
    enabled = True
except ImportError:

    class Env:
        pass

    enabled = False


class PyBoyGymEnv(Env):
    """ A gym environement built from a `pyboy.PyBoy`

    This function requires PyBoy to implement a Game Wrapper for the loaded ROM. You can find the supported games in pyboy.plugins.
    Additional kwargs are passed to the start_game method of the game_wrapper.

    Args:
        observation_type (str): Define what the agent will be able to see:
        * `"raw"`: Gives the raw pixels color
        * `"tiles"`:  Gives the id of the sprites and tiles in 8x8 pixel zones of the game_area.
        * `"compressed"`: Like `"tiles"` but with slightly simplified id's (i.e. each type of enemy has a unique id).
        * `"minimal"`: Like `"compressed"` but gives a minimal representation (recommended; i.e. all enemies have the same id).

        action_type (str): Define how the agent will interact with button inputs
        * `"press"`: The agent will only press inputs for 1 frame an then release it.
        * `"toggle"`: The agent will toggle inputs, first time it press and second time it release.
        * `"all"`: The agent have access to all inputs, press and release are separated.

        simultaneous_actions (bool): Allow to inject multiple input at once. This dramatically increases the action_space: \\(n \\rightarrow 2^n\\)

    Attributes:
        game_wrapper (`pyboy.plugins.base_plugin.PyBoyGameWrapper`): The game_wrapper of the PyBoy game instance over which the environment is built.
        action_space (Gym space): The action space of the environment.
        observation_space (Gym space): The observation space of the environment (depends of observation_type).
        actions (list): The list of input IDs of allowed input for the agent (depends of action_type).

    """
    def __init__(self, pyboy, observation_type="tiles", action_type="toggle", simultaneous_actions=False, **kwargs):
        # Build pyboy game
        self.pyboy = pyboy
        if str(type(pyboy)) != "<class 'pyboy.pyboy.PyBoy'>":
            raise TypeError("pyboy must be a Pyboy object")

        # Build game_wrapper
        self.game_wrapper = pyboy.game_wrapper()
        if self.game_wrapper is None:
            raise ValueError(
                "You need to build a game_wrapper to use this function. Otherwise there is no way to build a reward function automaticaly."
            )
        self.last_fitness = self.game_wrapper.fitness

        # Building the action_space
        self._DO_NOTHING = WindowEvent.PASS
        self._buttons = [
            WindowEvent.PRESS_ARROW_UP, WindowEvent.PRESS_ARROW_DOWN, WindowEvent.PRESS_ARROW_RIGHT,
            WindowEvent.PRESS_ARROW_LEFT, WindowEvent.PRESS_BUTTON_A, WindowEvent.PRESS_BUTTON_B,
            WindowEvent.PRESS_BUTTON_SELECT, WindowEvent.PRESS_BUTTON_START
        ]
        self._button_is_pressed = {button: False for button in self._buttons}

        self._buttons_release = [
            WindowEvent.RELEASE_ARROW_UP, WindowEvent.RELEASE_ARROW_DOWN, WindowEvent.RELEASE_ARROW_RIGHT,
            WindowEvent.RELEASE_ARROW_LEFT, WindowEvent.RELEASE_BUTTON_A, WindowEvent.RELEASE_BUTTON_B,
            WindowEvent.RELEASE_BUTTON_SELECT, WindowEvent.RELEASE_BUTTON_START
        ]
        self._release_button = {button: r_button for button, r_button in zip(self._buttons, self._buttons_release)}

        self.actions = [self._DO_NOTHING] + self._buttons
        if action_type == "all":
            self.actions += self._buttons_release
        elif action_type not in ["press", "toggle"]:
            raise ValueError(f"action_type {action_type} is invalid")
        self.action_type = action_type

        if simultaneous_actions:
            raise NotImplementedError("Not implemented yet, raise an issue on GitHub if needed")
        else:
            self.action_space = Discrete(len(self.actions))

        # Building the observation_space
        if observation_type == "raw":
            screen = np.asarray(self.pyboy.botsupport_manager().screen().screen_ndarray())
            self.observation_space = Box(low=0, high=255, shape=screen.shape, dtype=np.uint8)
        elif observation_type in ["tiles", "compressed", "minimal"]:
            size_ids = TILES
            if observation_type == "compressed":
                try:
                    size_ids = np.max(self.game_wrapper.tiles_compressed) + 1
                except AttributeError:
                    raise AttributeError(
                        "You need to add the tiles_compressed attibute to the game_wrapper to use the compressed observation_type"
                    )
            elif observation_type == "minimal":
                try:
                    size_ids = np.max(self.game_wrapper.tiles_minimal) + 1
                except AttributeError:
                    raise AttributeError(
                        "You need to add the tiles_minimal attibute to the game_wrapper to use the minimal observation_type"
                    )
            nvec = size_ids * np.ones(self.game_wrapper.shape)
            self.observation_space = MultiDiscrete(nvec)
        else:
            raise NotImplementedError(f"observation_type {observation_type} is invalid")
        self.observation_type = observation_type

        self._started = False
        self._kwargs = kwargs

    def _get_observation(self):
        if self.observation_type == "raw":
            observation = np.asarray(self.pyboy.botsupport_manager().screen().screen_ndarray(), dtype=np.uint8)
        elif self.observation_type in ["tiles", "compressed", "minimal"]:
            observation = self.game_wrapper._game_area_np(self.observation_type)
        else:
            raise NotImplementedError(f"observation_type {self.observation_type} is invalid")
        return observation

    def step(self, action_id):
        info = {}

        action = self.actions[action_id]
        if action == self._DO_NOTHING:
            pyboy_done = self.pyboy.tick()
        else:
            if self.action_type == "toggle":
                if self._button_is_pressed[action]:
                    self._button_is_pressed[action] = False
                    action = self._release_button[action]
                else:
                    self._button_is_pressed[action] = True

            self.pyboy.send_input(action)
            pyboy_done = self.pyboy.tick()

            if self.action_type == "press":
                self.pyboy.send_input(self._release_button[action])

        new_fitness = self.game_wrapper.fitness
        reward = new_fitness - self.last_fitness
        self.last_fitness = new_fitness

        observation = self._get_observation()
        done = pyboy_done or self.game_wrapper.game_over()

        return observation, reward, done, info

    def reset(self):
        """ Reset (or start) the gym environment throught the game_wrapper """
        if not self._started:
            self.game_wrapper.start_game(**self._kwargs)
            self._started = True
        else:
            self.game_wrapper.reset_game()
        self.last_fitness = self.game_wrapper.fitness
        self.button_is_pressed = {button: False for button in self._buttons}
        return self._get_observation()

    def render(self):
        pass

    def close(self):
        self.pyboy.stop(save=False)

Classes

class PyBoyGymEnv (pyboy, observation_type='tiles', action_type='toggle', simultaneous_actions=False, **kwargs)

A gym environement built from a PyBoy

This function requires PyBoy to implement a Game Wrapper for the loaded ROM. You can find the supported games in pyboy.plugins. Additional kwargs are passed to the start_game method of the game_wrapper.

Args

observation_type : str
Define what the agent will be able to see:
  • "raw": Gives the raw pixels color
  • "tiles": Gives the id of the sprites and tiles in 8x8 pixel zones of the game_area.
  • "compressed": Like "tiles" but with slightly simplified id's (i.e. each type of enemy has a unique id).
  • "minimal": Like "compressed" but gives a minimal representation (recommended; i.e. all enemies have the same id).
action_type : str
Define how the agent will interact with button inputs
  • "press": The agent will only press inputs for 1 frame an then release it.
  • "toggle": The agent will toggle inputs, first time it press and second time it release.
  • "all": The agent have access to all inputs, press and release are separated.
simultaneous_actions : bool
Allow to inject multiple input at once. This dramatically increases the action_space: n \rightarrow 2^n

Attributes

game_wrapper (PyBoyGameWrapper): The game_wrapper of the PyBoy game instance over which the environment is built.
action_space : Gym space
The action space of the environment.
observation_space : Gym space
The observation space of the environment (depends of observation_type).
actions : list
The list of input IDs of allowed input for the agent (depends of action_type).
Expand source code
class PyBoyGymEnv(Env):
    """ A gym environement built from a `pyboy.PyBoy`

    This function requires PyBoy to implement a Game Wrapper for the loaded ROM. You can find the supported games in pyboy.plugins.
    Additional kwargs are passed to the start_game method of the game_wrapper.

    Args:
        observation_type (str): Define what the agent will be able to see:
        * `"raw"`: Gives the raw pixels color
        * `"tiles"`:  Gives the id of the sprites and tiles in 8x8 pixel zones of the game_area.
        * `"compressed"`: Like `"tiles"` but with slightly simplified id's (i.e. each type of enemy has a unique id).
        * `"minimal"`: Like `"compressed"` but gives a minimal representation (recommended; i.e. all enemies have the same id).

        action_type (str): Define how the agent will interact with button inputs
        * `"press"`: The agent will only press inputs for 1 frame an then release it.
        * `"toggle"`: The agent will toggle inputs, first time it press and second time it release.
        * `"all"`: The agent have access to all inputs, press and release are separated.

        simultaneous_actions (bool): Allow to inject multiple input at once. This dramatically increases the action_space: \\(n \\rightarrow 2^n\\)

    Attributes:
        game_wrapper (`pyboy.plugins.base_plugin.PyBoyGameWrapper`): The game_wrapper of the PyBoy game instance over which the environment is built.
        action_space (Gym space): The action space of the environment.
        observation_space (Gym space): The observation space of the environment (depends of observation_type).
        actions (list): The list of input IDs of allowed input for the agent (depends of action_type).

    """
    def __init__(self, pyboy, observation_type="tiles", action_type="toggle", simultaneous_actions=False, **kwargs):
        # Build pyboy game
        self.pyboy = pyboy
        if str(type(pyboy)) != "<class 'pyboy.pyboy.PyBoy'>":
            raise TypeError("pyboy must be a Pyboy object")

        # Build game_wrapper
        self.game_wrapper = pyboy.game_wrapper()
        if self.game_wrapper is None:
            raise ValueError(
                "You need to build a game_wrapper to use this function. Otherwise there is no way to build a reward function automaticaly."
            )
        self.last_fitness = self.game_wrapper.fitness

        # Building the action_space
        self._DO_NOTHING = WindowEvent.PASS
        self._buttons = [
            WindowEvent.PRESS_ARROW_UP, WindowEvent.PRESS_ARROW_DOWN, WindowEvent.PRESS_ARROW_RIGHT,
            WindowEvent.PRESS_ARROW_LEFT, WindowEvent.PRESS_BUTTON_A, WindowEvent.PRESS_BUTTON_B,
            WindowEvent.PRESS_BUTTON_SELECT, WindowEvent.PRESS_BUTTON_START
        ]
        self._button_is_pressed = {button: False for button in self._buttons}

        self._buttons_release = [
            WindowEvent.RELEASE_ARROW_UP, WindowEvent.RELEASE_ARROW_DOWN, WindowEvent.RELEASE_ARROW_RIGHT,
            WindowEvent.RELEASE_ARROW_LEFT, WindowEvent.RELEASE_BUTTON_A, WindowEvent.RELEASE_BUTTON_B,
            WindowEvent.RELEASE_BUTTON_SELECT, WindowEvent.RELEASE_BUTTON_START
        ]
        self._release_button = {button: r_button for button, r_button in zip(self._buttons, self._buttons_release)}

        self.actions = [self._DO_NOTHING] + self._buttons
        if action_type == "all":
            self.actions += self._buttons_release
        elif action_type not in ["press", "toggle"]:
            raise ValueError(f"action_type {action_type} is invalid")
        self.action_type = action_type

        if simultaneous_actions:
            raise NotImplementedError("Not implemented yet, raise an issue on GitHub if needed")
        else:
            self.action_space = Discrete(len(self.actions))

        # Building the observation_space
        if observation_type == "raw":
            screen = np.asarray(self.pyboy.botsupport_manager().screen().screen_ndarray())
            self.observation_space = Box(low=0, high=255, shape=screen.shape, dtype=np.uint8)
        elif observation_type in ["tiles", "compressed", "minimal"]:
            size_ids = TILES
            if observation_type == "compressed":
                try:
                    size_ids = np.max(self.game_wrapper.tiles_compressed) + 1
                except AttributeError:
                    raise AttributeError(
                        "You need to add the tiles_compressed attibute to the game_wrapper to use the compressed observation_type"
                    )
            elif observation_type == "minimal":
                try:
                    size_ids = np.max(self.game_wrapper.tiles_minimal) + 1
                except AttributeError:
                    raise AttributeError(
                        "You need to add the tiles_minimal attibute to the game_wrapper to use the minimal observation_type"
                    )
            nvec = size_ids * np.ones(self.game_wrapper.shape)
            self.observation_space = MultiDiscrete(nvec)
        else:
            raise NotImplementedError(f"observation_type {observation_type} is invalid")
        self.observation_type = observation_type

        self._started = False
        self._kwargs = kwargs

    def _get_observation(self):
        if self.observation_type == "raw":
            observation = np.asarray(self.pyboy.botsupport_manager().screen().screen_ndarray(), dtype=np.uint8)
        elif self.observation_type in ["tiles", "compressed", "minimal"]:
            observation = self.game_wrapper._game_area_np(self.observation_type)
        else:
            raise NotImplementedError(f"observation_type {self.observation_type} is invalid")
        return observation

    def step(self, action_id):
        info = {}

        action = self.actions[action_id]
        if action == self._DO_NOTHING:
            pyboy_done = self.pyboy.tick()
        else:
            if self.action_type == "toggle":
                if self._button_is_pressed[action]:
                    self._button_is_pressed[action] = False
                    action = self._release_button[action]
                else:
                    self._button_is_pressed[action] = True

            self.pyboy.send_input(action)
            pyboy_done = self.pyboy.tick()

            if self.action_type == "press":
                self.pyboy.send_input(self._release_button[action])

        new_fitness = self.game_wrapper.fitness
        reward = new_fitness - self.last_fitness
        self.last_fitness = new_fitness

        observation = self._get_observation()
        done = pyboy_done or self.game_wrapper.game_over()

        return observation, reward, done, info

    def reset(self):
        """ Reset (or start) the gym environment throught the game_wrapper """
        if not self._started:
            self.game_wrapper.start_game(**self._kwargs)
            self._started = True
        else:
            self.game_wrapper.reset_game()
        self.last_fitness = self.game_wrapper.fitness
        self.button_is_pressed = {button: False for button in self._buttons}
        return self._get_observation()

    def render(self):
        pass

    def close(self):
        self.pyboy.stop(save=False)

Ancestors

  • gym.core.Env
  • typing.Generic

Methods

def step(self, action_id)

Run one timestep of the environment's dynamics.

When end of episode is reached, you are responsible for calling :meth:reset to reset this environment's state. Accepts an action and returns either a tuple (observation, reward, terminated, truncated, info).

Args

action : ActType
an action provided by the agent

Returns

observation (object): this will be an element of the environment's :attr:observation_space.
This may, for instance, be a numpy array containing the positions and velocities of certain objects.
reward (float): The amount of reward returned as a result of taking the action.
terminated (bool): whether a terminal state (as defined under the MDP of the task) is reached.
In this case further step() calls could return undefined results.
truncated (bool): whether a truncation condition outside the scope of the MDP is satisfied.
Typically a timelimit, but could also be used to indicate agent physically going out of bounds.
Can be used to end the episode prematurely before a terminal state is reached.
info (dictionary): info contains auxiliary diagnostic information (helpful for debugging, learning, and logging).
This might, for instance, contain
metrics that describe the agent's performance state, variables that are hidden from observations, or individual reward terms that are combined to produce the total reward. It also can contain information that distinguishes truncation and termination, however this is deprecated in favour of returning two booleans, and will be removed in a future version.
(deprecated)
done (bool): A boolean value for if the episode has ended, in which case further :meth:step calls will return undefined results.
A done signal may be emitted for different reasons
Maybe the task underlying the environment was solved successfully, a certain timelimit was exceeded, or the physics simulation has entered an invalid state.
Expand source code
def step(self, action_id):
    info = {}

    action = self.actions[action_id]
    if action == self._DO_NOTHING:
        pyboy_done = self.pyboy.tick()
    else:
        if self.action_type == "toggle":
            if self._button_is_pressed[action]:
                self._button_is_pressed[action] = False
                action = self._release_button[action]
            else:
                self._button_is_pressed[action] = True

        self.pyboy.send_input(action)
        pyboy_done = self.pyboy.tick()

        if self.action_type == "press":
            self.pyboy.send_input(self._release_button[action])

    new_fitness = self.game_wrapper.fitness
    reward = new_fitness - self.last_fitness
    self.last_fitness = new_fitness

    observation = self._get_observation()
    done = pyboy_done or self.game_wrapper.game_over()

    return observation, reward, done, info
def reset(self)

Reset (or start) the gym environment throught the game_wrapper

Expand source code
def reset(self):
    """ Reset (or start) the gym environment throught the game_wrapper """
    if not self._started:
        self.game_wrapper.start_game(**self._kwargs)
        self._started = True
    else:
        self.game_wrapper.reset_game()
    self.last_fitness = self.game_wrapper.fitness
    self.button_is_pressed = {button: False for button in self._buttons}
    return self._get_observation()
def render(self)

Compute the render frames as specified by render_mode attribute during initialization of the environment.

The set of supported modes varies per environment. (And some third-party environments may not support rendering at all.) By convention, if render_mode is:

  • None (default): no render is computed.
  • human: render return None. The environment is continuously rendered in the current display or terminal. Usually for human consumption.
  • rgb_array: return a single frame representing the current state of the environment. A frame is a numpy.ndarray with shape (x, y, 3) representing RGB values for an x-by-y pixel image.
  • rgb_array_list: return a list of frames representing the states of the environment since the last reset. Each frame is a numpy.ndarray with shape (x, y, 3), as with rgb_array.
  • ansi: Return a strings (str) or StringIO.StringIO containing a terminal-style text representation for each time step. The text can include newlines and ANSI escape sequences (e.g. for colors).

Note

Make sure that your class's metadata 'render_modes' key includes the list of supported modes. It's recommended to call super() in implementations to use the functionality of this method.

Expand source code
def render(self):
    pass
def close(self)

Override close in your subclass to perform any necessary cleanup.

Environments will automatically :meth:close() themselves when garbage collected or when the program exits.

Expand source code
def close(self):
    self.pyboy.stop(save=False)