Module pyboy.openai_gym

Expand source code
#
# License: See LICENSE.md file
# GitHub: https://github.com/Baekalfen/PyBoy
#

import numpy as np

from .botsupport.constants import TILES
from .utils import WindowEvent

try:
    from gym import Env
    from gym.spaces import Discrete, MultiDiscrete, Box
    enabled = True
except ImportError:

    class Env:
        pass

    enabled = False


class PyBoyGymEnv(Env):
    """ A gym environement built from a `pyboy.PyBoy`

    This function requires PyBoy to implement a Game Wrapper for the loaded ROM. You can find the supported games in pyboy.plugins.
    Additional kwargs are passed to the start_game method of the game_wrapper.

    Args:
        observation_type (str): Define what the agent will be able to see:
        * `"raw"`: Gives the raw pixels color
        * `"tiles"`:  Gives the id of the sprites and tiles in 8x8 pixel zones of the game_area.
        * `"compressed"`: Like `"tiles"` but with slightly simplified id's (i.e. each type of enemy has a unique id).
        * `"minimal"`: Like `"compressed"` but gives a minimal representation (recommended; i.e. all enemies have the same id).

        action_type (str): Define how the agent will interact with button inputs
        * `"press"`: The agent will only press inputs for 1 frame an then release it.
        * `"toggle"`: The agent will toggle inputs, first time it press and second time it release.
        * `"all"`: The agent have acces to all inputs, press and release are separated.

        simultaneous_actions (bool): Allow to inject multiple input at once. This dramatically increases the action_space: \\(n \\rightarrow 2^n\\)

    Attributes:
        game_wrapper (`pyboy.plugins.base_plugin.PyBoyGameWrapper`): The game_wrapper of the PyBoy game instance over which the environment is built.
        action_space (Gym space): The action space of the environment.
        observation_space (Gym space): The observation space of the environment (depends of observation_type).
        actions (list): The list of input IDs of allowed input for the agent (depends of action_type).

    """
    def __init__(self, pyboy, observation_type="tiles", action_type="toggle", simultaneous_actions=False, **kwargs):
        # Build pyboy game
        self.pyboy = pyboy
        if str(type(pyboy)) != "<class 'pyboy.pyboy.PyBoy'>":
            raise TypeError("pyboy must be a Pyboy object")

        # Build game_wrapper
        self.game_wrapper = pyboy.game_wrapper()
        if self.game_wrapper is None:
            raise ValueError(
                "You need to build a game_wrapper to use this function. Otherwise there is no way to build a reward function automaticaly."
            )
        self.last_fitness = self.game_wrapper.fitness

        # Building the action_space
        self._DO_NOTHING = WindowEvent.PASS
        self._buttons = [
            WindowEvent.PRESS_ARROW_UP, WindowEvent.PRESS_ARROW_DOWN, WindowEvent.PRESS_ARROW_RIGHT,
            WindowEvent.PRESS_ARROW_LEFT, WindowEvent.PRESS_BUTTON_A, WindowEvent.PRESS_BUTTON_B,
            WindowEvent.PRESS_BUTTON_SELECT, WindowEvent.PRESS_BUTTON_START
        ]
        self._button_is_pressed = {button: False for button in self._buttons}

        self._buttons_release = [
            WindowEvent.RELEASE_ARROW_UP, WindowEvent.RELEASE_ARROW_DOWN, WindowEvent.RELEASE_ARROW_RIGHT,
            WindowEvent.RELEASE_ARROW_LEFT, WindowEvent.RELEASE_BUTTON_A, WindowEvent.RELEASE_BUTTON_B,
            WindowEvent.RELEASE_BUTTON_SELECT, WindowEvent.RELEASE_BUTTON_START
        ]
        self._release_button = {button: r_button for button, r_button in zip(self._buttons, self._buttons_release)}

        self.actions = [self._DO_NOTHING] + self._buttons
        if action_type == "all":
            self.actions += self._buttons_release
        elif action_type not in ["press", "toggle"]:
            raise ValueError(f"action_type {action_type} is invalid")
        self.action_type = action_type

        if simultaneous_actions:
            raise NotImplementedError("Not implemented yet, raise an issue on GitHub if needed")
        else:
            self.action_space = Discrete(len(self.actions))

        # Building the observation_space
        if observation_type == "raw":
            screen = np.asarray(self.pyboy.botsupport_manager().screen().screen_ndarray())
            self.observation_space = Box(low=0, high=255, shape=screen.shape, dtype=np.uint8)
        elif observation_type in ["tiles", "compressed", "minimal"]:
            size_ids = TILES
            if observation_type == "compressed":
                try:
                    size_ids = np.max(self.game_wrapper.tiles_compressed) + 1
                except AttributeError:
                    raise AttributeError(
                        "You need to add the tiles_compressed attibute to the game_wrapper to use the compressed observation_type"
                    )
            elif observation_type == "minimal":
                try:
                    size_ids = np.max(self.game_wrapper.tiles_minimal) + 1
                except AttributeError:
                    raise AttributeError(
                        "You need to add the tiles_minimal attibute to the game_wrapper to use the minimal observation_type"
                    )
            nvec = size_ids * np.ones(self.game_wrapper.shape)
            self.observation_space = MultiDiscrete(nvec)
        else:
            raise NotImplementedError(f"observation_type {observation_type} is invalid")
        self.observation_type = observation_type

        self._started = False
        self._kwargs = kwargs

    def _get_observation(self):
        if self.observation_type == "raw":
            observation = np.asarray(self.pyboy.botsupport_manager().screen().screen_ndarray(), dtype=np.uint8)
        elif self.observation_type in ["tiles", "compressed", "minimal"]:
            observation = self.game_wrapper._game_area_np(self.observation_type)
        else:
            raise NotImplementedError(f"observation_type {self.observation_type} is invalid")
        return observation

    def step(self, action_id):
        info = {}

        action = self.actions[action_id]
        if action == self._DO_NOTHING:
            pyboy_done = self.pyboy.tick()
        else:
            if self.action_type == "toggle":
                if self._button_is_pressed[action]:
                    self._button_is_pressed[action] = False
                    action = self._release_button[action]
                else:
                    self._button_is_pressed[action] = True

            self.pyboy.send_input(action)
            pyboy_done = self.pyboy.tick()

            if self.action_type == "press":
                self.pyboy.send_input(self._release_button[action])

        new_fitness = self.game_wrapper.fitness
        reward = new_fitness - self.last_fitness
        self.last_fitness = new_fitness

        observation = self._get_observation()
        done = pyboy_done or self.game_wrapper.game_over()

        return observation, reward, done, info

    def reset(self):
        """ Reset (or start) the gym environment throught the game_wrapper """
        if not self._started:
            self.game_wrapper.start_game(**self._kwargs)
            self._started = True
        else:
            self.game_wrapper.reset_game()
        self.last_fitness = self.game_wrapper.fitness
        self.button_is_pressed = {button: False for button in self._buttons}
        return self._get_observation()

    def render(self):
        pass

    def close(self):
        self.pyboy.stop(save=False)

Classes

class PyBoyGymEnv (pyboy, observation_type='tiles', action_type='toggle', simultaneous_actions=False, **kwargs)

A gym environement built from a PyBoy

This function requires PyBoy to implement a Game Wrapper for the loaded ROM. You can find the supported games in pyboy.plugins. Additional kwargs are passed to the start_game method of the game_wrapper.

Args

observation_type : str
Define what the agent will be able to see:
  • "raw": Gives the raw pixels color
  • "tiles": Gives the id of the sprites and tiles in 8x8 pixel zones of the game_area.
  • "compressed": Like "tiles" but with slightly simplified id's (i.e. each type of enemy has a unique id).
  • "minimal": Like "compressed" but gives a minimal representation (recommended; i.e. all enemies have the same id).
action_type : str
Define how the agent will interact with button inputs
  • "press": The agent will only press inputs for 1 frame an then release it.
  • "toggle": The agent will toggle inputs, first time it press and second time it release.
  • "all": The agent have acces to all inputs, press and release are separated.
simultaneous_actions : bool
Allow to inject multiple input at once. This dramatically increases the action_space: n \rightarrow 2^n

Attributes

game_wrapper (PyBoyGameWrapper): The game_wrapper of the PyBoy game instance over which the environment is built.
action_space : Gym space
The action space of the environment.
observation_space : Gym space
The observation space of the environment (depends of observation_type).
actions : list
The list of input IDs of allowed input for the agent (depends of action_type).
Expand source code
class PyBoyGymEnv(Env):
    """ A gym environement built from a `pyboy.PyBoy`

    This function requires PyBoy to implement a Game Wrapper for the loaded ROM. You can find the supported games in pyboy.plugins.
    Additional kwargs are passed to the start_game method of the game_wrapper.

    Args:
        observation_type (str): Define what the agent will be able to see:
        * `"raw"`: Gives the raw pixels color
        * `"tiles"`:  Gives the id of the sprites and tiles in 8x8 pixel zones of the game_area.
        * `"compressed"`: Like `"tiles"` but with slightly simplified id's (i.e. each type of enemy has a unique id).
        * `"minimal"`: Like `"compressed"` but gives a minimal representation (recommended; i.e. all enemies have the same id).

        action_type (str): Define how the agent will interact with button inputs
        * `"press"`: The agent will only press inputs for 1 frame an then release it.
        * `"toggle"`: The agent will toggle inputs, first time it press and second time it release.
        * `"all"`: The agent have acces to all inputs, press and release are separated.

        simultaneous_actions (bool): Allow to inject multiple input at once. This dramatically increases the action_space: \\(n \\rightarrow 2^n\\)

    Attributes:
        game_wrapper (`pyboy.plugins.base_plugin.PyBoyGameWrapper`): The game_wrapper of the PyBoy game instance over which the environment is built.
        action_space (Gym space): The action space of the environment.
        observation_space (Gym space): The observation space of the environment (depends of observation_type).
        actions (list): The list of input IDs of allowed input for the agent (depends of action_type).

    """
    def __init__(self, pyboy, observation_type="tiles", action_type="toggle", simultaneous_actions=False, **kwargs):
        # Build pyboy game
        self.pyboy = pyboy
        if str(type(pyboy)) != "<class 'pyboy.pyboy.PyBoy'>":
            raise TypeError("pyboy must be a Pyboy object")

        # Build game_wrapper
        self.game_wrapper = pyboy.game_wrapper()
        if self.game_wrapper is None:
            raise ValueError(
                "You need to build a game_wrapper to use this function. Otherwise there is no way to build a reward function automaticaly."
            )
        self.last_fitness = self.game_wrapper.fitness

        # Building the action_space
        self._DO_NOTHING = WindowEvent.PASS
        self._buttons = [
            WindowEvent.PRESS_ARROW_UP, WindowEvent.PRESS_ARROW_DOWN, WindowEvent.PRESS_ARROW_RIGHT,
            WindowEvent.PRESS_ARROW_LEFT, WindowEvent.PRESS_BUTTON_A, WindowEvent.PRESS_BUTTON_B,
            WindowEvent.PRESS_BUTTON_SELECT, WindowEvent.PRESS_BUTTON_START
        ]
        self._button_is_pressed = {button: False for button in self._buttons}

        self._buttons_release = [
            WindowEvent.RELEASE_ARROW_UP, WindowEvent.RELEASE_ARROW_DOWN, WindowEvent.RELEASE_ARROW_RIGHT,
            WindowEvent.RELEASE_ARROW_LEFT, WindowEvent.RELEASE_BUTTON_A, WindowEvent.RELEASE_BUTTON_B,
            WindowEvent.RELEASE_BUTTON_SELECT, WindowEvent.RELEASE_BUTTON_START
        ]
        self._release_button = {button: r_button for button, r_button in zip(self._buttons, self._buttons_release)}

        self.actions = [self._DO_NOTHING] + self._buttons
        if action_type == "all":
            self.actions += self._buttons_release
        elif action_type not in ["press", "toggle"]:
            raise ValueError(f"action_type {action_type} is invalid")
        self.action_type = action_type

        if simultaneous_actions:
            raise NotImplementedError("Not implemented yet, raise an issue on GitHub if needed")
        else:
            self.action_space = Discrete(len(self.actions))

        # Building the observation_space
        if observation_type == "raw":
            screen = np.asarray(self.pyboy.botsupport_manager().screen().screen_ndarray())
            self.observation_space = Box(low=0, high=255, shape=screen.shape, dtype=np.uint8)
        elif observation_type in ["tiles", "compressed", "minimal"]:
            size_ids = TILES
            if observation_type == "compressed":
                try:
                    size_ids = np.max(self.game_wrapper.tiles_compressed) + 1
                except AttributeError:
                    raise AttributeError(
                        "You need to add the tiles_compressed attibute to the game_wrapper to use the compressed observation_type"
                    )
            elif observation_type == "minimal":
                try:
                    size_ids = np.max(self.game_wrapper.tiles_minimal) + 1
                except AttributeError:
                    raise AttributeError(
                        "You need to add the tiles_minimal attibute to the game_wrapper to use the minimal observation_type"
                    )
            nvec = size_ids * np.ones(self.game_wrapper.shape)
            self.observation_space = MultiDiscrete(nvec)
        else:
            raise NotImplementedError(f"observation_type {observation_type} is invalid")
        self.observation_type = observation_type

        self._started = False
        self._kwargs = kwargs

    def _get_observation(self):
        if self.observation_type == "raw":
            observation = np.asarray(self.pyboy.botsupport_manager().screen().screen_ndarray(), dtype=np.uint8)
        elif self.observation_type in ["tiles", "compressed", "minimal"]:
            observation = self.game_wrapper._game_area_np(self.observation_type)
        else:
            raise NotImplementedError(f"observation_type {self.observation_type} is invalid")
        return observation

    def step(self, action_id):
        info = {}

        action = self.actions[action_id]
        if action == self._DO_NOTHING:
            pyboy_done = self.pyboy.tick()
        else:
            if self.action_type == "toggle":
                if self._button_is_pressed[action]:
                    self._button_is_pressed[action] = False
                    action = self._release_button[action]
                else:
                    self._button_is_pressed[action] = True

            self.pyboy.send_input(action)
            pyboy_done = self.pyboy.tick()

            if self.action_type == "press":
                self.pyboy.send_input(self._release_button[action])

        new_fitness = self.game_wrapper.fitness
        reward = new_fitness - self.last_fitness
        self.last_fitness = new_fitness

        observation = self._get_observation()
        done = pyboy_done or self.game_wrapper.game_over()

        return observation, reward, done, info

    def reset(self):
        """ Reset (or start) the gym environment throught the game_wrapper """
        if not self._started:
            self.game_wrapper.start_game(**self._kwargs)
            self._started = True
        else:
            self.game_wrapper.reset_game()
        self.last_fitness = self.game_wrapper.fitness
        self.button_is_pressed = {button: False for button in self._buttons}
        return self._get_observation()

    def render(self):
        pass

    def close(self):
        self.pyboy.stop(save=False)

Ancestors

  • gym.core.Env

Methods

def step(self, action_id)

Run one timestep of the environment's dynamics. When end of episode is reached, you are responsible for calling reset() to reset this environment's state.

Accepts an action and returns a tuple (observation, reward, done, info).

Args

action : object
an action provided by the agent

Returns

observation : object
agent's observation of the current environment
reward (float) : amount of reward returned after previous action
 
done : bool
whether the episode has ended, in which case further step() calls will return undefined results
info : dict
contains auxiliary diagnostic information (helpful for debugging, and sometimes learning)
Expand source code
def step(self, action_id):
    info = {}

    action = self.actions[action_id]
    if action == self._DO_NOTHING:
        pyboy_done = self.pyboy.tick()
    else:
        if self.action_type == "toggle":
            if self._button_is_pressed[action]:
                self._button_is_pressed[action] = False
                action = self._release_button[action]
            else:
                self._button_is_pressed[action] = True

        self.pyboy.send_input(action)
        pyboy_done = self.pyboy.tick()

        if self.action_type == "press":
            self.pyboy.send_input(self._release_button[action])

    new_fitness = self.game_wrapper.fitness
    reward = new_fitness - self.last_fitness
    self.last_fitness = new_fitness

    observation = self._get_observation()
    done = pyboy_done or self.game_wrapper.game_over()

    return observation, reward, done, info
def reset(self)

Reset (or start) the gym environment throught the game_wrapper

Expand source code
def reset(self):
    """ Reset (or start) the gym environment throught the game_wrapper """
    if not self._started:
        self.game_wrapper.start_game(**self._kwargs)
        self._started = True
    else:
        self.game_wrapper.reset_game()
    self.last_fitness = self.game_wrapper.fitness
    self.button_is_pressed = {button: False for button in self._buttons}
    return self._get_observation()
def render(self)

Renders the environment.

The set of supported modes varies per environment. (And some environments do not support rendering at all.) By convention, if mode is:

  • human: render to the current display or terminal and return nothing. Usually for human consumption.
  • rgb_array: Return an numpy.ndarray with shape (x, y, 3), representing RGB values for an x-by-y pixel image, suitable for turning into a video.
  • ansi: Return a string (str) or StringIO.StringIO containing a terminal-style text representation. The text can include newlines and ANSI escape sequences (e.g. for colors).

Note

Make sure that your class's metadata 'render.modes' key includes the list of supported modes. It's recommended to call super() in implementations to use the functionality of this method.

Args

mode : str
the mode to render with

Example:

class MyEnv(Env): metadata = {'render.modes': ['human', 'rgb_array']}

def render(self, mode='human'):
    if mode == 'rgb_array':
        return np.array(...) # return RGB frame suitable for video
    elif mode == 'human':
        ... # pop up a window and render
    else:
        super(MyEnv, self).render(mode=mode) # just raise an exception
Expand source code
def render(self):
    pass
def close(self)

Override close in your subclass to perform any necessary cleanup.

Environments will automatically close() themselves when garbage collected or when the program exits.

Expand source code
def close(self):
    self.pyboy.stop(save=False)