flatland.envs.rewards module#

class flatland.envs.rewards.BaseDefaultRewards(cancellation_factor: float = 1, cancellation_time_buffer: float = 0, intermediate_not_served_penalty: float = 1, intermediate_late_arrival_penalty_factor: float = 0.2, intermediate_early_departure_penalty_factor: float = 0.5, collision_factor: float = 0.0)[source]#

Bases: Rewards[Dict[str, float]]

Reward Function.

This scoring function is designed to capture key operational metrics such as punctuality, efficiency in responding to disruptions, and safety.

Punctuality and schedule adherence are rewarded based on the difference between actual and target arrival and departure times at each stop respectively, as well as penalties for intermediate stops not served or even journeys not started.

Safety measures are implemented as penalties for collisions which are directly proportional to the train’s speed at impact, ensuring that high-speed operations are managed with extra caution.

Parameters#

cancellation_factorfloat

Cancellation factor \(\phi \geq 0\). defaults to 1.

cancellation_time_bufferfloat

Cancellation time buffer \(\pi \geq 0\). Defaults to 0.

intermediate_not_served_penaltyfloat

Intermediate stop not served penalty \(\mu \geq 0\). Applied if one of the intermediates is not served or only run through without stopping. Defaults to 1.

intermediate_late_arrival_penalty_factorfloat

Intermediate late arrival penalty factor \(\alpha \geq 0\). Defaults to 0.2.

intermediate_early_departure_penalty_factorfloat

Intermediate early departure penalty factor \(\delta \geq 0\). Defaults to 0.5.

collision_factorfloat

Crash penalty factor \(\kappa \geq 0\). Defaults to 0.0.

cumulate(*rewards: float) Dict[str, float][source]#

Cumulate multiple rewards to one.

Parameters#

rewards

Returns#

Cumulative rewards

empty() Dict[str, float][source]#

Return empty initial value neutral for the cumulation.

end_of_episode_reward(agent: EnvAgent, distance_map: DistanceMap, elapsed_steps: int) Dict[str, float][source]#

Handles end-of-episode reward for a particular agent.

Parameters#

agent: EnvAgent distance_map: DistanceMap elapsed_steps: int

normalize(*rewards: float, num_agents: int, max_episode_steps: int) float[source]#

Return normalized cumulated rewards. Can be None for some rewards.

Parameters#

rewards : List[RewardType] num_agents : int max_episode_steps : int

Returns#

step_reward(agent: EnvAgent, agent_transition_data: AgentTransitionData, distance_map: DistanceMap, elapsed_steps: int) Dict[str, float][source]#

Handles end-of-step-reward for a particular agent.

Parameters#

agent: EnvAgent agent_transition_data: AgentTransitionData distance_map: DistanceMap elapsed_steps: int

class flatland.envs.rewards.BasicMultiObjectiveRewards(**kwargs)[source]#

Bases: DefaultRewards, Rewards[Tuple[float, float, float]]

Basic MORL (Multi-Objective Reinforcement Learning) Rewards: with 3 items
  • default score

  • energy efficiency: - square of (speed/max_speed).

  • smoothness: - square of speed differences

For illustration purposes.

cumulate(*rewards: Tuple[float, float, float]) Tuple[float, float, float][source]#

Cumulate multiple rewards to one.

Parameters#

rewards

Returns#

Cumulative rewards

empty() Tuple[float, float, float][source]#

Return empty initial value neutral for the cumulation.

end_of_episode_reward(agent: EnvAgent, distance_map: DistanceMap, elapsed_steps: int) Tuple[float, float, float][source]#

Handles end-of-episode reward for a particular agent.

Parameters#

agent: EnvAgent distance_map: DistanceMap elapsed_steps: int

normalize(*rewards: float, num_agents: int, max_episode_steps: int) float[source]#

Return normalized cumulated rewards. Can be None for some rewards.

Parameters#

rewards : List[RewardType] num_agents : int max_episode_steps : int

Returns#

step_reward(agent: EnvAgent, agent_transition_data: AgentTransitionData, distance_map: DistanceMap, elapsed_steps: int) Tuple[float, float, float][source]#

Handles end-of-step-reward for a particular agent.

Parameters#

agent: EnvAgent agent_transition_data: AgentTransitionData distance_map: DistanceMap elapsed_steps: int

class flatland.envs.rewards.DefaultPenalties(value: Any)[source]#

Bases: Enum

CANCELLATION = <DefaultPenalties.CANCELLATION: 'CANCELLATION'>#
COLLISION = <DefaultPenalties.COLLISION: 'COLLISION'>#
INTERMEDIATE_EARLY_DEPARTURE = <DefaultPenalties.INTERMEDIATE_EARLY_DEPARTURE: 'INTERMEDIATE_EARLY_DEPARTURE'>#
INTERMEDIATE_LATE_ARRIVAL = <DefaultPenalties.INTERMEDIATE_LATE_ARRIVAL: 'INTERMEDIATE_LATE_ARRIVAL'>#
INTERMEDIATE_NOT_SERVED = <DefaultPenalties.INTERMEDIATE_NOT_SERVED: 'INTERMEDIATE_NOT_SERVED'>#
TARGET_LATE_ARRIVAL = <DefaultPenalties.TARGET_LATE_ARRIVAL: 'TARGET_LATE_ARRIVAL'>#
TARGET_NOT_REACHED = <DefaultPenalties.TARGET_NOT_REACHED: 'TARGET_NOT_REACHED'>#
class flatland.envs.rewards.DefaultRewards(cancellation_factor: float = 1, cancellation_time_buffer: float = 0, intermediate_not_served_penalty: float = 1, intermediate_late_arrival_penalty_factor: float = 0.2, intermediate_early_departure_penalty_factor: float = 0.5, collision_factor: float = 0.0)[source]#

Bases: Rewards[float]

Aggregates FineDefaultRewards to single float.

property cancellation_factor#
property cancellation_time_buffer#
property collision_factor#
cumulate(*rewards: float) float[source]#

Cumulate multiple rewards to one.

Parameters#

rewards

Returns#

Cumulative rewards

empty() float[source]#

Return empty initial value neutral for the cumulation.

end_of_episode_reward(agent: EnvAgent, distance_map: DistanceMap, elapsed_steps: int) float[source]#

Handles end-of-episode reward for a particular agent.

Parameters#

agent: EnvAgent distance_map: DistanceMap elapsed_steps: int

property intermediate_early_departure_penalty_factor#
property intermediate_late_arrival_penalty_factor#
property intermediate_not_served_penalty#
normalize(*rewards: float, num_agents: int, max_episode_steps: int) float[source]#

Return normalized cumulated rewards. Can be None for some rewards.

Parameters#

rewards : List[RewardType] num_agents : int max_episode_steps : int

Returns#

step_reward(agent: EnvAgent, agent_transition_data: AgentTransitionData, distance_map: DistanceMap, elapsed_steps: int) float[source]#

Handles end-of-step-reward for a particular agent.

Parameters#

agent: EnvAgent agent_transition_data: AgentTransitionData distance_map: DistanceMap elapsed_steps: int

class flatland.envs.rewards.PunctualityRewards[source]#

Bases: Rewards[Tuple[int, int]]

Punctuality: n_stops_on_time / n_stops An agent is deemed not punctual at a stop if it arrives too late, departs too early or does not serve the stop at all. If an agent is punctual at a stop, n_stops_on_time is increased by 1.

The implementation returns the tuple (n_stops_on_time, n_stops).

cumulate(*rewards: Tuple[int, int]) Tuple[int, int][source]#

Cumulate multiple rewards to one.

Parameters#

rewards

Returns#

Cumulative rewards

empty() Tuple[int, int][source]#

Return empty initial value neutral for the cumulation.

end_of_episode_reward(agent: EnvAgent, distance_map: DistanceMap, elapsed_steps: int) Tuple[int, int][source]#

Handles end-of-episode reward for a particular agent.

Parameters#

agent: EnvAgent distance_map: DistanceMap elapsed_steps: int

step_reward(agent: EnvAgent, agent_transition_data: AgentTransitionData, distance_map: DistanceMap, elapsed_steps: int) Tuple[int, int][source]#

Handles end-of-step-reward for a particular agent.

Parameters#

agent: EnvAgent agent_transition_data: AgentTransitionData distance_map: DistanceMap elapsed_steps: int

class flatland.envs.rewards.Rewards[source]#

Bases: Generic[RewardType]

Reward Function Interface.

cumulate(*rewards: RewardType) RewardType[source]#

Cumulate multiple rewards to one.

Parameters#

rewards

Returns#

Cumulative rewards

empty() RewardType[source]#

Return empty initial value neutral for the cumulation.

end_of_episode_reward(agent: EnvAgent, distance_map: DistanceMap, elapsed_steps: int) RewardType[source]#

Handles end-of-episode reward for a particular agent.

Parameters#

agent: EnvAgent distance_map: DistanceMap elapsed_steps: int

normalize(*rewards: RewardType, num_agents: int, max_episode_steps: int) float | None[source]#

Return normalized cumulated rewards. Can be None for some rewards.

Parameters#

rewards : List[RewardType] num_agents : int max_episode_steps : int

Returns#

step_reward(agent: EnvAgent, agent_transition_data: AgentTransitionData, distance_map: DistanceMap, elapsed_steps: int) RewardType[source]#

Handles end-of-step-reward for a particular agent.

Parameters#

agent: EnvAgent agent_transition_data: AgentTransitionData distance_map: DistanceMap elapsed_steps: int

flatland.envs.rewards.defaultdict_list()[source]#
flatland.envs.rewards.defaultdict_set()[source]#