Skip to content

Commit f7794d0

Browse files
authoredApr 17, 2023
Merge pull request #17 from rufo123/experiment_minimap
Added Dynamic Reward/State Configuration
2 parents efb5425 + d75228d commit f7794d0

22 files changed

+596
-120
lines changed
 

‎action_translator_enum.py

+1
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ def take_action(self, par_controls: Controls,
4646
Returns:
4747
- The integer value of the action taken.
4848
"""
49+
print(par_sleep_time)
4950
executed_correctly: bool = False
5051
action = self.value
5152
par_controls.release_all_keys()

‎agents/ppo.py

+9-10
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,7 @@
1717
from torch.multiprocessing import Process, Pipe
1818

1919
import graph.make_graph
20-
from car_states.enabled_game_api_values import EnabledGameApiValues
21-
from envs.strategy.reward.a_reward_strategy import ARewardStrategy
22-
from envs.strategy.state_calc.a_state_calc_strategy import AStateCalculationStrategy
20+
from configuration.i_configuration import IConfiguration
2321
from game_inputs import GameInputs
2422
from utils.print_utils.printer import Printer
2523
from utils.stats import MovingAverageScore, write_to_file, append_to_file
@@ -82,7 +80,7 @@ def scalar_to_support(par_x, par_support_size):
8280
# pylint: disable=too-many-statements
8381
# noinspection DuplicatedCode
8482
def worker(connection, env_param, env_func, count_of_iterations, count_of_envs,
85-
count_of_steps, gamma, gae_lambda) -> None:
83+
count_of_steps, gamma, gae_lambda, start_iteration_number) -> None:
8684
"""
8785
worker function for Proximal Policy Optimization (PPO) agent training.
8886
@@ -100,7 +98,9 @@ def worker(connection, env_param, env_func, count_of_iterations, count_of_envs,
10098
None.
10199
"""
102100
envs = [env_func(*env_param) for _ in range(count_of_envs)]
103-
observations = torch.stack([torch.from_numpy(env.reset()) for env in envs])
101+
observations = torch.stack([torch.from_numpy(
102+
env.reset(start_iteration_number)
103+
) for env in envs])
104104
game_score = np.zeros(count_of_envs)
105105
steps_taken_storage = np.zeros(count_of_steps)
106106

@@ -141,7 +141,7 @@ def worker(connection, env_param, env_func, count_of_iterations, count_of_envs,
141141
game_score[idx] = 0
142142
steps_taken_storage[idx] = steps_took_to_complete
143143
steps_taken_list.append(steps_taken_storage[idx])
144-
observation = envs[idx].reset()
144+
observation = envs[idx].reset(start_iteration_number + iteration)
145145
# observations[idx] = observation.clone().detach()
146146
with warnings.catch_warnings():
147147
warnings.simplefilter("ignore")
@@ -219,9 +219,7 @@ def __init__(self, model, optimizer, gamma=0.997, epsilon=0.1,
219219
# pylint: disable=too-many-locals
220220
# pylint: disable=too-many-statements
221221
def train(self,
222-
env_param: tuple[GameInputs, ARewardStrategy,
223-
AStateCalculationStrategy, EnabledGameApiValues
224-
],
222+
env_param: tuple[GameInputs, IConfiguration],
225223
env_func, count_of_actions,
226224
count_of_iterations=10000, count_of_processes=2,
227225
count_of_envs=16, count_of_steps=128, count_of_epochs=4,
@@ -267,7 +265,8 @@ def train(self,
267265
parr_connection, child_connection = Pipe()
268266
process = Process(target=worker, args=(
269267
child_connection, env_param, env_func, count_of_iterations,
270-
count_of_envs, count_of_steps, self.gamma, self.gae_lambda))
268+
count_of_envs, count_of_steps, self.gamma, self.gae_lambda,
269+
self.start_iteration_value))
271270
connections.append(parr_connection)
272271
processes.append(process)
273272
process.start()

‎car_states/car_state_in_environment.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def assign_values(self, par_lap_progress_difference: float = -1,
8181
car.
8282
par_revolutions_per_minute (float): The number of revolutions of the car's engine per
8383
minute.
84-
par_wrong_way_indicator (int): A binary indicator (0 or 1) that shows if the car is
84+
par_wrong_way_indicator (float): A binary indicator (0 or 1) that shows if the car is
8585
going the wrong way.
8686
par_mini_map (numpy.ndarray): A 2D array that represents the mini-map of the track.
8787
par_car_state (CarState): An instance of the parent class `CarState` to copy values

‎configuration/experiments/fifth_experiment_removed_progress_reward.py

+20-2
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def return_model(self) -> AShortRaceFactory:
4444
)
4545
)
4646

47-
def return_reward_strategy(self) -> ARewardStrategy:
47+
def return_reward_strategy(self, par_iteration_number=0) -> ARewardStrategy:
4848
"""
4949
Return a reward strategy object for the fifth experiment.
5050
@@ -55,7 +55,7 @@ def return_reward_strategy(self) -> ARewardStrategy:
5555
RewardStrategyEnum.THIRD_REWARD_STRATEGY
5656
)
5757

58-
def return_state_calc_strategy(self) -> AStateCalculationStrategy:
58+
def return_state_calc_strategy(self, par_iteration_number=0) -> AStateCalculationStrategy:
5959
"""
6060
Return a state calculation strategy object for the fifth experiment.
6161
@@ -94,3 +94,21 @@ def return_dimensional_input(self) -> tuple:
9494

9595
def return_name(self) -> str:
9696
return "experiment_partial_terminal_lap_smaller_reward"
97+
98+
def return_max_speed_non_visualised(self) -> int:
99+
"""
100+
Return the maximum speed for the non-visualized experiment - 3.
101+
102+
Returns:
103+
int: The maximum speed for the non-visualized experiment - 3.
104+
"""
105+
return 3
106+
107+
def return_max_speed_visualised(self) -> int:
108+
"""
109+
Return the maximum speed for the visualized experiment - 3.
110+
111+
Returns:
112+
int: The maximum speed for the visualized experiment - 3.
113+
"""
114+
return 3

‎configuration/experiments/first_experiment_small_state.py

+20-2
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def return_model(self) -> AShortRaceFactory:
4444
)
4545
)
4646

47-
def return_reward_strategy(self) -> ARewardStrategy:
47+
def return_reward_strategy(self, par_iteration_number=0) -> ARewardStrategy:
4848
"""
4949
Return a reward strategy object for the first experiment.
5050
@@ -55,7 +55,7 @@ def return_reward_strategy(self) -> ARewardStrategy:
5555
RewardStrategyEnum.FIRST_REWARD_STRATEGY
5656
)
5757

58-
def return_state_calc_strategy(self) -> AStateCalculationStrategy:
58+
def return_state_calc_strategy(self, par_iteration_number=0) -> AStateCalculationStrategy:
5959
"""
6060
Return a state calculation strategy object for the first experiment.
6161
@@ -94,3 +94,21 @@ def return_dimensional_input(self) -> tuple:
9494

9595
def return_name(self) -> str:
9696
return "experiment_first_small_state"
97+
98+
def return_max_speed_non_visualised(self) -> int:
99+
"""
100+
Return the maximum speed for the non-visualized experiment - 3.
101+
102+
Returns:
103+
int: The maximum speed for the non-visualized experiment - 3.
104+
"""
105+
return 3
106+
107+
def return_max_speed_visualised(self) -> int:
108+
"""
109+
Return the maximum speed for the visualized experiment - 3.
110+
111+
Returns:
112+
int: The maximum speed for the visualized experiment - 3.
113+
"""
114+
return 3

‎configuration/experiments/fourth_experiment_lap_terminal_scaled_reward.py

+20-2
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def return_model(self) -> AShortRaceFactory:
4444
)
4545
)
4646

47-
def return_reward_strategy(self) -> ARewardStrategy:
47+
def return_reward_strategy(self, par_iteration_number=0) -> ARewardStrategy:
4848
"""
4949
Returns an instance of `ARewardStrategy` that is used to calculate the rewards in the
5050
reinforcement learning algorithm for the fourth experiment.
@@ -56,7 +56,7 @@ def return_reward_strategy(self) -> ARewardStrategy:
5656
RewardStrategyEnum.SECOND_REWARD_STRATEGY
5757
)
5858

59-
def return_state_calc_strategy(self) -> AStateCalculationStrategy:
59+
def return_state_calc_strategy(self, par_iteration_number=0) -> AStateCalculationStrategy:
6060
"""
6161
Returns an instance of `AStateCalculationStrategy` that is used to calculate the state
6262
representation for the reinforcement learning algorithm for the fourth experiment.
@@ -96,3 +96,21 @@ def return_dimensional_input(self) -> tuple:
9696

9797
def return_name(self) -> str:
9898
return "experiment_partial_terminal_lap"
99+
100+
def return_max_speed_non_visualised(self) -> int:
101+
"""
102+
Return the maximum speed for the non-visualized experiment - 3.
103+
104+
Returns:
105+
int: The maximum speed for the non-visualized experiment - 3.
106+
"""
107+
return 3
108+
109+
def return_max_speed_visualised(self) -> int:
110+
"""
111+
Return the maximum speed for the visualized experiment - 3.
112+
113+
Returns:
114+
int: The maximum speed for the visualized experiment - 3.
115+
"""
116+
return 3

‎configuration/experiments/second_experiment_bigger_state.py

+20-2
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def return_model(self) -> AShortRaceFactory:
4545
)
4646
)
4747

48-
def return_reward_strategy(self) -> ARewardStrategy:
48+
def return_reward_strategy(self, par_iteration_number=0) -> ARewardStrategy:
4949
"""
5050
Returns an instance of `ARewardStrategy` that is used to calculate the rewards in the
5151
reinforcement learning algorithm for the second experiment.
@@ -57,7 +57,7 @@ def return_reward_strategy(self) -> ARewardStrategy:
5757
RewardStrategyEnum.FIRST_REWARD_STRATEGY
5858
)
5959

60-
def return_state_calc_strategy(self) -> AStateCalculationStrategy:
60+
def return_state_calc_strategy(self, par_iteration_number=0) -> AStateCalculationStrategy:
6161
"""
6262
Returns an instance of `AStateCalculationStrategy` that is used to calculate the state
6363
representation for the reinforcement learning algorithm for the second experiment.
@@ -97,3 +97,21 @@ def return_dimensional_input(self) -> tuple:
9797

9898
def return_name(self) -> str:
9999
return "experiment_second_bigger_state"
100+
101+
def return_max_speed_non_visualised(self) -> int:
102+
"""
103+
Return the maximum speed for the non-visualized experiment - 3.
104+
105+
Returns:
106+
int: The maximum speed for the non-visualized experiment - 3.
107+
"""
108+
return 3
109+
110+
def return_max_speed_visualised(self) -> int:
111+
"""
112+
Return the maximum speed for the visualized experiment - 3.
113+
114+
Returns:
115+
int: The maximum speed for the visualized experiment - 3.
116+
"""
117+
return 3

‎configuration/experiments/sixth_experiment_minimap.py

+30-6
Original file line numberDiff line numberDiff line change
@@ -48,28 +48,34 @@ def return_model(self) -> AShortRaceFactory:
4848
)
4949
)
5050

51-
def return_reward_strategy(self) -> ARewardStrategy:
51+
def return_reward_strategy(self, par_iteration_number=0) -> ARewardStrategy:
5252
"""
5353
Returns an instance of `ARewardStrategy` that is used to calculate the rewards in the
5454
reinforcement learning algorithm for the sixth experiment.
5555
5656
Returns:
5757
ARewardStrategy: An instance of `ARewardStrategy`.
5858
"""
59-
return self.a_configuration_factory.create_reward_strategy(
60-
RewardStrategyEnum.FIFTH_REWARD_STRATEGY
59+
return self.a_configuration_factory.create_dynamic_reward_strategy(
60+
[1680, 5110, 10770],
61+
[RewardStrategyEnum.FOURTH_MINIMAP_10_PERCENT,
62+
RewardStrategyEnum.FOURTH_MINIMAP_20_PERCENT,
63+
RewardStrategyEnum.FOURTH_MINIMAP_40_PERCENT],
64+
par_iteration_number
6165
)
6266

63-
def return_state_calc_strategy(self) -> AStateCalculationStrategy:
67+
def return_state_calc_strategy(self, par_iteration_number=0) -> AStateCalculationStrategy:
6468
"""
6569
Returns an instance of `AStateCalculationStrategy` that is used to calculate the state
6670
representation for the reinforcement learning algorithm for the sixth experiment.
6771
6872
Returns:
6973
AStateCalculationStrategy: An instance of `AStateCalculationStrategy`.
7074
"""
71-
return self.a_configuration_factory.create_state_calc_strategy(
72-
StateStrategyEnum.MINIMAP_STATE_STRATEGY
75+
return self.a_configuration_factory.create_dynamic_state_calc_strategy(
76+
[1680, 5110],
77+
[StateStrategyEnum.MINIMAP_STATE_STRATEGY,
78+
StateStrategyEnum.MINIMAP_STATE_NORMALIZED_STRATEGY]
7379
)
7480

7581
def return_enabled_game_api_values(self) -> EnabledGameApiValues:
@@ -100,3 +106,21 @@ def return_dimensional_input(self) -> tuple:
100106

101107
def return_name(self) -> str:
102108
return "experiment_mini_map"
109+
110+
def return_max_speed_non_visualised(self) -> int:
111+
"""
112+
Return the maximum speed for the non-visualized experiment - 3.
113+
114+
Returns:
115+
int: The maximum speed for the non-visualized experiment - 3.
116+
"""
117+
return 6
118+
119+
def return_max_speed_visualised(self) -> int:
120+
"""
121+
Return the maximum speed for the visualized experiment - 3.
122+
123+
Returns:
124+
int: The maximum speed for the visualized experiment - 3.
125+
"""
126+
return 3

‎configuration/experiments/third_experiment_bigger_state_normalized.py

+20-2
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def return_model(self) -> AShortRaceFactory:
4444
)
4545
)
4646

47-
def return_reward_strategy(self) -> ARewardStrategy:
47+
def return_reward_strategy(self, par_iteration_number=0) -> ARewardStrategy:
4848
"""
4949
Returns an instance of `ARewardStrategy` that is used to calculate the rewards in the
5050
reinforcement learning algorithm for the third experiment.
@@ -56,7 +56,7 @@ def return_reward_strategy(self) -> ARewardStrategy:
5656
RewardStrategyEnum.FIRST_REWARD_STRATEGY
5757
)
5858

59-
def return_state_calc_strategy(self) -> AStateCalculationStrategy:
59+
def return_state_calc_strategy(self, par_iteration_number=0) -> AStateCalculationStrategy:
6060
"""
6161
Returns an instance of `AStateCalculationStrategy` that is used to calculate the state
6262
representation for the reinforcement learning algorithm for the third experiment.
@@ -96,3 +96,21 @@ def return_dimensional_input(self) -> tuple:
9696

9797
def return_name(self) -> str:
9898
return "experiment_third_bigger_state_normalized"
99+
100+
def return_max_speed_non_visualised(self) -> int:
101+
"""
102+
Return the maximum speed for the non-visualized experiment - 3.
103+
104+
Returns:
105+
int: The maximum speed for the non-visualized experiment - 3.
106+
"""
107+
return 3
108+
109+
def return_max_speed_visualised(self) -> int:
110+
"""
111+
Return the maximum speed for the visualized experiment - 3.
112+
113+
Returns:
114+
int: The maximum speed for the visualized experiment - 3.
115+
"""
116+
return 3

‎configuration/factory/configuration_factory.py

+72
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,42 @@ def create_reward_strategy(self, par_reward_strategy: RewardStrategyEnum) -> ARe
3636
"""
3737
return par_reward_strategy.return_strategy()
3838

39+
def create_dynamic_reward_strategy(self,
40+
par_border_iteration_numbers: list[int],
41+
par_reward_strategies: list[RewardStrategyEnum],
42+
par_iteration_number: int = 0):
43+
"""
44+
Creates a dynamic reward strategy for the reinforcement learning agent based on the
45+
position of iteration number
46+
in the par_border_iteration_numbers list.
47+
48+
Args:
49+
- par_border_iteration_numbers (list[int]): A list of border iteration numbers.
50+
- par_reward_strategies (list[RewardStrategyEnum]): A list of reward strategies.
51+
52+
Returns:
53+
- ARewardStrategy: An object that implements the dynamic reward strategy.
54+
"""
55+
# Check if the lengths of the input lists match
56+
if len(par_border_iteration_numbers) != len(par_reward_strategies):
57+
raise ValueError(
58+
"Length of border iteration numbers list does"
59+
" not match length of reward strategies list.")
60+
61+
# Get the current iteration number
62+
current_iteration = par_iteration_number
63+
64+
# Iterate through the border iteration numbers list and compare with current iteration
65+
# number
66+
for border_iteration, reward_strategy in zip(par_border_iteration_numbers,
67+
par_reward_strategies):
68+
if current_iteration < border_iteration:
69+
return reward_strategy.return_strategy()
70+
71+
# If current iteration number is greater than all border iteration numbers,
72+
# return the last reward strategy
73+
return par_reward_strategies[-1].return_strategy()
74+
3975
def create_state_calc_strategy(
4076
self,
4177
par_state_calc_strategy: StateStrategyEnum
@@ -49,6 +85,42 @@ def create_state_calc_strategy(
4985
"""
5086
return par_state_calc_strategy.return_strategy()
5187

88+
def create_dynamic_state_calc_strategy(self,
89+
par_border_iteration_numbers: list[int],
90+
par_state_calc_strategies: list[StateStrategyEnum],
91+
par_iteration_number: int = 0):
92+
"""
93+
Creates a dynamic state clac strategy for the reinforcement learning agent based on the
94+
position of iteration number
95+
in the par_state_calc_strategies list.
96+
97+
Args:
98+
- par_border_iteration_numbers (list[int]): A list of border iteration numbers.
99+
- par_state_calc_strategies (list[StateStrategyEnum]): A list of state calc strategies.
100+
101+
Returns:
102+
- ARewardStrategy: An object that implements the dynamic reward strategy.
103+
"""
104+
# Check if the lengths of the input lists match
105+
if len(par_border_iteration_numbers) != len(par_state_calc_strategies):
106+
raise ValueError(
107+
"Length of border iteration numbers list does"
108+
" not match length of reward strategies list.")
109+
110+
# Get the current iteration number
111+
current_iteration = par_iteration_number
112+
113+
# Iterate through the border iteration numbers list and compare with current iteration
114+
# number
115+
for border_iteration, reward_strategy in zip(par_border_iteration_numbers,
116+
par_state_calc_strategies):
117+
if current_iteration < border_iteration:
118+
return reward_strategy.return_strategy()
119+
120+
# If current iteration number is greater than all border iteration numbers,
121+
# return the last reward strategy
122+
return par_state_calc_strategies[-1].return_strategy()
123+
52124
def create_dimensional_input(self, par_input_dim: Union[tuple, int] = 4) -> tuple:
53125
"""
54126
Creates a dimensional input tuple.

‎configuration/i_configuration.py

+24-2
Original file line numberDiff line numberDiff line change
@@ -25,21 +25,25 @@ def return_model(self) -> AShortRaceFactory:
2525
"""
2626

2727
@abstractmethod
28-
def return_reward_strategy(self) -> ARewardStrategy:
28+
def return_reward_strategy(self, par_iteration_number=0) -> ARewardStrategy:
2929
"""
3030
Return an instance of ARewardStrategy.
3131
3232
Returns:
3333
ARewardStrategy: An instance of ARewardStrategy.
34+
par_iteration_number: A optional value to set dynamic reward strategies based on
35+
current iteration number
3436
"""
3537

3638
@abstractmethod
37-
def return_state_calc_strategy(self) -> AStateCalculationStrategy:
39+
def return_state_calc_strategy(self, par_iteration_number=0) -> AStateCalculationStrategy:
3840
"""
3941
Return an instance of AStateCalculationStrategy.
4042
4143
Returns:
4244
AStateCalculationStrategy: An instance of AStateCalculationStrategy.
45+
par_iteration_number: A optional value to set dynamic state calc strategies based on
46+
current iteration number
4347
"""
4448

4549
@abstractmethod
@@ -68,3 +72,21 @@ def return_dimensional_input(self) -> tuple:
6872
Returns:
6973
tuple: a dimensional input for the experiment.
7074
"""
75+
76+
@abstractmethod
77+
def return_max_speed_non_visualised(self) -> int:
78+
"""
79+
Return the maximum speed for the non-visualized experiment.
80+
81+
Returns:
82+
int: The maximum speed for the non-visualized experiment.
83+
"""
84+
85+
@abstractmethod
86+
def return_max_speed_visualised(self) -> int:
87+
"""
88+
Return the maximum speed for the visualized experiment.
89+
90+
Returns:
91+
int: The maximum speed for the visualized experiment.
92+
"""

‎envs/short_race_env.py

+29-17
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from car_states.car_state import CarState
1212
from car_states.car_state_in_environment import CarStateInEnvironment
1313
from car_states.enabled_game_api_values import EnabledGameApiValues
14+
from configuration.i_configuration import IConfiguration
1415
from envs.strategy.reward.a_reward_strategy import ARewardStrategy
1516
from envs.strategy.state_calc.a_state_calc_strategy import AStateCalculationStrategy
1617
from game_inputs import GameInputs
@@ -38,6 +39,7 @@ class Env:
3839
a_state_calculation_strategy: AStateCalculationStrategy
3940
a_enabled_game_api_values: EnabledGameApiValues
4041
a_state_matrix: np.ndarray
42+
a_configuration: IConfiguration
4143

4244
a_edited_car_state: CarStateInEnvironment
4345

@@ -49,9 +51,7 @@ class Env:
4951
}
5052

5153
def __init__(self, par_game_inputs: GameInputs,
52-
par_reward_strategy: ARewardStrategy,
53-
par_state_calc_strategy: AStateCalculationStrategy,
54-
par_enabled_game_api_values: EnabledGameApiValues
54+
par_configuration: IConfiguration
5555
):
5656
"""
5757
Initializes an instance of the Env class.
@@ -60,14 +60,17 @@ def __init__(self, par_game_inputs: GameInputs,
6060
par_game_inputs: A GameInputs object representing the input to the game.
6161
"""
6262
super().__init__()
63+
print("KEK")
6364
self.a_game_speed: int = 1
6465
self.env = None
6566
self.action_counter = 0
6667
self.controls = Controls()
6768
self.game_steps_per_episode: int = self.default_settings['game_steps_per_episode']
68-
self.a_reward_strategy = par_reward_strategy
69-
self.a_state_calculation_strategy = par_state_calc_strategy
70-
self.a_enabled_game_api_values = par_enabled_game_api_values
69+
self.a_reward_strategy = par_configuration.return_reward_strategy()
70+
self.a_state_calculation_strategy = par_configuration.return_state_calc_strategy()
71+
self.a_enabled_game_api_values = par_configuration.return_enabled_game_api_values()
72+
73+
self.a_configuration = par_configuration
7174

7275
self.a_game_inputs: GameInputs = par_game_inputs
7376
self.a_lap_percent_curr = 0.00
@@ -77,6 +80,13 @@ def __init__(self, par_game_inputs: GameInputs,
7780

7881
self.a_state_matrix = np.zeros((5, 5), dtype=float) - 1
7982

83+
par_game_inputs.agent_settings_to_game.put((
84+
self.default_settings['visualize'],
85+
self.default_settings['realtime']
86+
))
87+
88+
print("Values")
89+
8090
def make_state(self):
8191
"""
8292
Generates the state tuple to be used in the next step of the environment.
@@ -106,13 +116,19 @@ def make_state(self):
106116

107117
return state, tmp_reward, terminal
108118

109-
def reset(self):
119+
def reset(self, iteration_number: int):
110120
"""
111121
Resets the environment to its initial state.
112122
113123
Returns:
114124
A numpy array representing the initial state of the environment.
115125
"""
126+
Printer.print_error(str(iteration_number))
127+
self.a_reward_strategy = \
128+
self.a_configuration.return_reward_strategy(iteration_number)
129+
self.a_state_calculation_strategy = \
130+
self.a_configuration.return_state_calc_strategy(iteration_number)
131+
116132
self.a_lap_percent_curr = 0.00
117133
self.controls.release_all_keys()
118134
self.controls.reset_directional_controls()
@@ -122,7 +138,7 @@ def reset(self):
122138
tmp_queue_game_inputs: multiprocessing.Queue = \
123139
self.a_game_inputs.game_initialization_inputs.get()
124140
# noinspection PyUnresolvedReferences
125-
self.a_game_speed = tmp_queue_game_inputs[1]
141+
self.a_game_speed = tmp_queue_game_inputs[1] if not self.default_settings['realtime'] else 1
126142
self.a_game_inputs.game_initialization_inputs.put(tmp_queue_game_inputs)
127143

128144
self.controls.a_is_executing_critical_action = True
@@ -184,6 +200,8 @@ def step(self, action):
184200
while not tmp_car_state_from_game.has_non_default_values():
185201
tmp_car_state_from_game: CarState = self.a_game_inputs.agent_inputs_state.get()
186202

203+
Printer.print_info("SPEED " + str(tmp_car_state_from_game.speed_mph), "ENV")
204+
187205
tmp_lap_progress_diff: float = \
188206
self.get_lap_progress_dif(tmp_car_state_from_game.lap_progress)
189207

@@ -244,21 +262,15 @@ def close(self):
244262

245263

246264
def create_env(par_game_inputs: GameInputs,
247-
par_reward_strategy: ARewardStrategy,
248-
par_state_calc_strategy: AStateCalculationStrategy,
249-
par_enabled_game_api_values: EnabledGameApiValues) -> Env:
265+
par_configuration: IConfiguration) -> Env:
250266
"""
251267
Creates a game environment for playing the game.
252268
253269
:param par_game_inputs: The game inputs for the environment.
254-
:param par_reward_strategy: The reward strategy.
255-
:param par_state_calc_strategy: The state calculation strategy.
256-
:param par_enabled_game_api_values: The enabled game api values.
270+
:param par_configuration: The specified configuration.
257271
:return: An instance of the game environment.
258272
"""
259273
return Env(
260274
par_game_inputs,
261-
par_reward_strategy,
262-
par_state_calc_strategy,
263-
par_enabled_game_api_values
275+
par_configuration
264276
)

‎envs/strategy/reward/fouth_minimap/__init__.py

Whitespace-only changes.

‎envs/strategy/reward/fourth_reward_strategy.py ‎envs/strategy/reward/fouth_minimap/fourth_minimap_10_percent_strateg.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
# pylint: disable=too-few-public-methods
1717
# pylint: disable=R0801
18-
class FourthRewardStrategy(ARewardStrategy):
18+
class FourthMinimap10PercentRewardStrategy(ARewardStrategy):
1919
"""
2020
This class is an implementation of the ARewardStrategy abstract class.
2121
This implementation gives:

‎envs/strategy/reward/fifth_reward_strategy.py ‎envs/strategy/reward/fouth_minimap/fourth_minimap_20_percent_strategy.py

+3-6
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,16 @@
1515

1616
# pylint: disable=too-few-public-methods
1717
# pylint: disable=R0801
18-
class FifthRewardStrategy(ARewardStrategy):
18+
class FourthMinimap20PercentRewardStrategy(ARewardStrategy):
1919
"""
2020
This class is an implementation of the ARewardStrategy abstract class.
2121
This implementation gives:
2222
- positive reward for:
23-
Offset (Distance From Road Centre): <0, 1> and <-1, 0>
24-
25-
Completing The Race (Partially - 10%)
23+
Completing The Race (Partially - 20%)
2624
2725
- negative reward for:
28-
Offset (Distance From Road Centre): (-10, -1> and (-inf, -10>
29-
3026
Not Completing The Race in specified count_of_steps
27+
3128
"""
3229

3330
# noinspection DuplicatedCode
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
"""
2+
Module: first_reward_strategy
3+
4+
This module contains the FirstRewardStrategy class which is an implementation of
5+
the ARewardStrategy abstract class.
6+
7+
Classes:
8+
FirstRewardStrategy
9+
10+
"""
11+
from car_states.car_state_in_environment import CarStateInEnvironment
12+
from envs.strategy.reward.a_reward_strategy import ARewardStrategy
13+
from utils.print_utils.printer import Printer
14+
15+
16+
# pylint: disable=too-few-public-methods
17+
# pylint: disable=R0801
18+
class FourthMinimap40PercentRewardStrategy(ARewardStrategy):
19+
"""
20+
This class is an implementation of the ARewardStrategy abstract class.
21+
This implementation gives:
22+
- positive reward for:
23+
Completing The Race (Partially - 40%)
24+
25+
- negative reward for:
26+
Not Completing The Race in specified count_of_steps
27+
28+
"""
29+
30+
# noinspection DuplicatedCode
31+
def evaluate_reward(self, par_env_inputs: CarStateInEnvironment,
32+
par_game_steps_per_episode: int,
33+
par_env_steps_counter: int,
34+
par_terminal: bool) -> tuple[float, bool]:
35+
"""
36+
This method calculates the reward of the current step for the ShortRaceEnv environment.
37+
38+
Args:
39+
par_env_inputs (CarStateInEnvironment): Object containing car state represented by
40+
the environment
41+
par_game_steps_per_episode (int): Count of Configured Game Steps per Env Episode
42+
par_env_steps_counter: (int) Count of passed game Steps in Env
43+
par_terminal (bool): If the environment has reached a terminal state.
44+
45+
Returns:
46+
Tuple[float, bool]: The reward value and if the episode is finished.
47+
"""
48+
reward: float = 0
49+
terminal: bool = par_terminal
50+
51+
# Ako daleko som od idealnej linie?
52+
53+
# Fiat Punto Top Speed - 179 # Zatial docasne prec
54+
55+
# 0 - 50 - Negative Reward ((-1) - 0)
56+
# if -1 >= tmp_speed < 50:
57+
# reward += (((50 - tmp_speed) / 50) / 255) * -1
58+
# 50 - 100 - Positive Reward ( 0 - 1)
59+
# elif 50 <= tmp_speed <= 100:
60+
# reward += (((tmp_speed - 50) / 50) / 255)
61+
# 100 - 179 - Reward 1 - (-1)
62+
# else:
63+
# reward += (((179 - tmp_speed) / 39.5) - 1) / 255
64+
65+
tmp_normalization_value: int = par_game_steps_per_episode
66+
67+
reward += self.__lap_progress_reward(par_env_inputs.lap_progress_difference,
68+
tmp_normalization_value)
69+
70+
if par_env_steps_counter >= par_game_steps_per_episode or par_env_inputs.lap_progress >= 40:
71+
terminal = True
72+
if par_env_steps_counter >= par_game_steps_per_episode:
73+
Printer.print_info("Exceeded Step Limit", "FOURTH_REWARD_STRATEGY", )
74+
reward += ((par_env_inputs.lap_progress / 20) - 1)
75+
if par_env_inputs.lap_progress >= 40:
76+
reward += 1
77+
Printer.print_success("Lap Complete", "FOURTH_REWARD_STRATEGY")
78+
Printer.print_info("TERMINAL STATE ACHIEVED", "FOURTH_REWARD_STRATEGY")
79+
return reward, terminal
80+
81+
def __lap_progress_reward(self, par_lap_progress_diff: float,
82+
par_normalization_value: int) -> float:
83+
"""
84+
Calculates the lap progress reward based on the difference in lap progress
85+
between the current and previous time step.
86+
87+
:param par_lap_progress_diff: A float representing the difference in lap
88+
progress between the current and previous time step. The value should
89+
be between -1 and 1, where negative values represent falling behind
90+
and positive values represent making progress.
91+
:param par_normalization_value: An integer representing the normalization
92+
value to use in the reward calculation. This value should be greater
93+
than zero to avoid division by zero errors.
94+
95+
:return: A float representing the lap progress reward. The value will be
96+
positive if the agent is making progress and negative if the agent
97+
is falling behind. The magnitude of the reward will be proportional
98+
to the magnitude of the lap progress difference, divided by the
99+
normalization value.
100+
"""
101+
Printer.print_basic("Progress: " + str(par_lap_progress_diff), "FOURTH_REWARD_STRATEGY")
102+
return par_lap_progress_diff / par_normalization_value

‎envs/strategy/reward/reward_strategy_enum.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,13 @@
66
from enum import Enum
77

88
from envs.strategy.reward.a_reward_strategy import ARewardStrategy
9-
from envs.strategy.reward.fifth_reward_strategy import FifthRewardStrategy
109
from envs.strategy.reward.first_reward_strategy import FirstRewardStrategy
11-
from envs.strategy.reward.fourth_reward_strategy import FourthRewardStrategy
10+
from envs.strategy.reward.fouth_minimap.fourth_minimap_10_percent_strateg import \
11+
FourthMinimap10PercentRewardStrategy
12+
from envs.strategy.reward.fouth_minimap.fourth_minimap_20_percent_strategy import \
13+
FourthMinimap20PercentRewardStrategy
14+
from envs.strategy.reward.fouth_minimap.fourth_minimap_40_percent_strategy import \
15+
FourthMinimap40PercentRewardStrategy
1216
from envs.strategy.reward.second_reward_strategy import SecondRewardStrategy
1317
from envs.strategy.reward.third_reward_strategy import ThirdRewardStrategy
1418

@@ -20,8 +24,9 @@ class RewardStrategyEnum(Enum):
2024
FIRST_REWARD_STRATEGY = 0
2125
SECOND_REWARD_STRATEGY = 1
2226
THIRD_REWARD_STRATEGY = 2
23-
FOURTH_REWARD_STRATEGY = 3
24-
FIFTH_REWARD_STRATEGY = 4
27+
FOURTH_MINIMAP_10_PERCENT = 3
28+
FOURTH_MINIMAP_20_PERCENT = 4
29+
FOURTH_MINIMAP_40_PERCENT = 5
2530

2631
def return_strategy(self) -> ARewardStrategy:
2732
"""
@@ -42,8 +47,9 @@ def return_strategy(self) -> ARewardStrategy:
4247
self.FIRST_REWARD_STRATEGY: FirstRewardStrategy(),
4348
self.SECOND_REWARD_STRATEGY: SecondRewardStrategy(),
4449
self.THIRD_REWARD_STRATEGY: ThirdRewardStrategy(),
45-
self.FOURTH_REWARD_STRATEGY: FourthRewardStrategy(),
46-
self.FIFTH_REWARD_STRATEGY: FifthRewardStrategy(),
50+
self.FOURTH_MINIMAP_10_PERCENT: FourthMinimap10PercentRewardStrategy(),
51+
self.FOURTH_MINIMAP_20_PERCENT: FourthMinimap20PercentRewardStrategy(),
52+
self.FOURTH_MINIMAP_40_PERCENT: FourthMinimap40PercentRewardStrategy(),
4753
}
4854

4955
try:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
"""
2+
Module for implementing a state calculation strategy that stores a 5x5 matrix of past states
3+
and actions and returns it as a normalized flattened Torch Tensor.
4+
5+
This module defines the `BiggerStateStrategy` class, which inherits from the abstract base class
6+
`StateCalculationStrategy`.
7+
This class overrides the `calculate_state` method to store the last
8+
five observations and actions in a matrix, which is then flattened and returned as a Torch Tensor.
9+
"""
10+
from typing import Union
11+
12+
import numpy
13+
import torch
14+
from numpy import ndarray
15+
16+
from car_states.car_state_in_environment import CarStateInEnvironment
17+
from envs.strategy.state_calc.a_state_calc_strategy import AStateCalculationStrategy
18+
19+
20+
# pylint: disable=R0801
21+
# noinspection DuplicatedCode
22+
class MinimapStateNormalizedStrategy(AStateCalculationStrategy):
23+
"""
24+
A state calculation strategy that stores the last five observations and actions in a 5x5 matrix
25+
and returns it as a normalized flattened Torch Tensor.
26+
27+
This class inherits from the abstract base class `StateCalculationStrategy`. It overrides the
28+
`calculate_state` method to store the last five observations and actions in a 5x5 matrix. The
29+
matrix is then flattened and returned as a Torch Tensor.
30+
31+
Attributes:
32+
a_car_state_in_environment (np.ndarray): A object represent car state in environment
33+
"""
34+
35+
a_car_state_in_environment: CarStateInEnvironment
36+
37+
def __init__(self):
38+
self.a_car_state_in_environment = CarStateInEnvironment()
39+
40+
def calculate_state(self, par_car_state: CarStateInEnvironment,
41+
par_action_taken: Union[int, None]) -> torch.Tensor:
42+
"""
43+
Calculate the state tensor for a given car state and action taken.
44+
45+
Args:
46+
par_car_state (CarStateInEnvironment): The car state in the environment.
47+
par_action_taken (Union[int, None]): The action taken by the car, or None if no
48+
has been taken.
49+
50+
Returns:
51+
torch.Tensor: A 3D tensor of shape (4, 48, 48) representing the current state of the
52+
car.
53+
The four layers of the tensor represent the mini-map, lap progress, car speed,
54+
and wrong-way indicator, respectively.
55+
The values in the mini-map layer are resized to 48x48 pixels and the lap progress and
56+
car speed layers are rounded to a specified number of digits.
57+
The wrong-way indicator layer contains binary values indicating whether the car is going
58+
the wrong way on the track or not.
59+
"""
60+
normalized_state_values: CarStateInEnvironment = self.normalize_state_values(par_car_state)
61+
62+
mini_map_resized_2d = normalized_state_values.mini_map
63+
lap_progress_2d = numpy.full((48, 48), normalized_state_values.lap_progress)
64+
car_speed_2d = numpy.full((48, 48), normalized_state_values.speed_mph)
65+
wrong_way_2d = numpy.full((48, 48), normalized_state_values.wrong_way_indicator)
66+
67+
new_state = numpy.stack((mini_map_resized_2d, lap_progress_2d, car_speed_2d, wrong_way_2d))
68+
69+
# returns 3D matrix of size 4x48x48
70+
return torch.from_numpy(new_state)
71+
72+
def normalize_state_values(self, par_car_state_not_normalized: CarStateInEnvironment) \
73+
-> CarStateInEnvironment:
74+
"""
75+
Normalizes the input state values and returns a tuple of normalized values.
76+
Args:
77+
par_car_state_not_normalized (CarState): A CarState object of un-normalized state
78+
values, including the car speed, distance offset, lap progress, and direction
79+
offset.
80+
Returns:
81+
CarState: A CarState object of normalized state values, including the
82+
normalized car speed, normalized distance offset, normalized lap progress,
83+
and normalized direction offset.
84+
"""
85+
86+
self.a_car_state_in_environment.reset_car_state()
87+
88+
tmp_normalized_minimap: ndarray = \
89+
par_car_state_not_normalized.mini_map.astype('float32') / 255.0
90+
91+
tmp_car_top_speed: float = 111
92+
tmp_normalized_speed = par_car_state_not_normalized.speed_mph / tmp_car_top_speed
93+
94+
tmp_normalized_lap_progress: float = par_car_state_not_normalized.lap_progress / 100
95+
96+
self.a_car_state_in_environment.reset_car_state()
97+
self.a_car_state_in_environment.assign_values(
98+
par_speed_mph=tmp_normalized_speed,
99+
par_lap_progress=tmp_normalized_lap_progress,
100+
par_wrong_way_indicator=par_car_state_not_normalized.wrong_way_indicator,
101+
par_mini_map=tmp_normalized_minimap
102+
)
103+
return self.a_car_state_in_environment

‎envs/strategy/state_calc/state_strategy_enum.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
from envs.strategy.state_calc.basic_state_strategy import BasicStateStrategy
1010
from envs.strategy.state_calc.bigger_state_normalized_strategy import BiggerStateNormalizedStrategy
1111
from envs.strategy.state_calc.bigger_state_strategy import BiggerStateStrategy
12+
from envs.strategy.state_calc.minimap_state_normalized_strategy import \
13+
MinimapStateNormalizedStrategy
1214
from envs.strategy.state_calc.minimap_state_strategy import MinimapStateStrategy
1315

1416

@@ -20,6 +22,7 @@ class StateStrategyEnum(Enum):
2022
BIGGER_STATE_STRATEGY = 1
2123
BIGGER_STATE_STRATEGY_NORMALIZED = 2
2224
MINIMAP_STATE_STRATEGY = 3
25+
MINIMAP_STATE_NORMALIZED_STRATEGY = 4
2326

2427
def return_strategy(self) -> AStateCalculationStrategy:
2528
"""
@@ -41,7 +44,8 @@ def return_strategy(self) -> AStateCalculationStrategy:
4144
self.BASIC_STATE_STRATEGY: BasicStateStrategy(),
4245
self.BIGGER_STATE_STRATEGY: BiggerStateStrategy(),
4346
self.BIGGER_STATE_STRATEGY_NORMALIZED: BiggerStateNormalizedStrategy(),
44-
self.MINIMAP_STATE_STRATEGY: MinimapStateStrategy()
47+
self.MINIMAP_STATE_STRATEGY: MinimapStateStrategy(),
48+
self.MINIMAP_STATE_NORMALIZED_STRATEGY: MinimapStateNormalizedStrategy()
4549
}
4650

4751
try:

‎game_api/game.py

+79-43
Original file line numberDiff line numberDiff line change
@@ -78,51 +78,39 @@ class Game:
7878
a_gps: GPS
7979

8080
a_speedometer: Speedometer
81-
8281
a_lap_progress: LapProgress
83-
8482
a_lap_time: LapTime
85-
8683
a_revolutions_per_minute: RevolutionsPerMinute
87-
8884
a_wrong_way: WrongWay
8985

9086
a_is_recording: bool
9187

9288
a_user23 = ctypes.windll.user32
93-
9489
a_list_bitmap: []
95-
9690
a_controls: Controls
97-
9891
a_font_settings: FontSettings
99-
10092
a_speed: int
101-
10293
a_car_distance_offset: float
103-
10494
a_car_direction_offset: int
105-
10695
a_race_initialised: bool
107-
10896
a_cycles_passed: int
109-
11097
a_cuda_device: None
11198

11299
a_gps_img_rcg_strategy: AGpsImageRecognitionStrategy
113-
114100
a_gps_strategy_enum: GPSStrategyEnum
115-
116101
a_cheat_engine: CheatEngine
117-
118102
a_image_manipulation: ImageManipulation
119103

120104
a_dictionary_menus: dict[str, str]
121-
122105
a_enabled_game_api_values: EnabledGameApiValues
123-
124106
a_car_state: CarState
125107

108+
a_speed_with_visualiser: int
109+
a_speed_without_visualiser: int
110+
111+
a_hwnd: None
112+
a_visualise: bool
113+
126114
def __init__(self) -> None:
127115
self.a_image_manipulation = ImageManipulation()
128116
self.a_image_manipulation.load_comparable_images()
@@ -132,7 +120,7 @@ def __init__(self) -> None:
132120
self.a_cycles_passed = 0
133121
self.a_cheat_engine = CheatEngine()
134122
self.a_list_bitmap = []
135-
self.a_speed = 3
123+
self.a_speed = 6
136124
self.a_controls = Controls()
137125
self.a_font_settings = FontSettings(
138126
par_font=cv2.FONT_HERSHEY_SIMPLEX,
@@ -146,6 +134,7 @@ def __init__(self) -> None:
146134
'standing_menu': 'standings_menu',
147135
'attention_restart': 'attention_restart'
148136
}
137+
self.a_visualise = True
149138

150139
cuda.printCudaDeviceInfo(0)
151140

@@ -177,7 +166,10 @@ def init_game_memory_objects(self) -> None:
177166
self.a_wrong_way.construct()
178167

179168
def initialize_game(self, par_game_inputs: GameInputs,
180-
par_enabled_game_api_values: EnabledGameApiValues) -> None:
169+
par_enabled_game_api_values: EnabledGameApiValues,
170+
par_max_speed_with_visualiser: int,
171+
par_max_speed_without_visualiser: int
172+
) -> None:
181173
"""
182174
Initializes the game by starting the game, waiting for it to start, creating and
183175
initializing required game objects,and setting the game speed and cheat engine.
@@ -187,12 +179,17 @@ def initialize_game(self, par_game_inputs: GameInputs,
187179
game inputs.
188180
par_enabled_game_api_values (EnabledGameApiValues): an instance of EnabledGameApiValues
189181
class containing the enabled game api values.
182+
par_max_speed_with_visualiser (int): max speed multiplier with visualiser enabled
183+
par_max_speed_without_visualiser (int): max speed multiplier without visualiser enabled
184+
190185
191186
Returns:
192187
None
193188
"""
194189
self.a_game_state = GameStateStarting()
195190
self.a_enabled_game_api_values = par_enabled_game_api_values
191+
self.a_speed_with_visualiser = par_max_speed_with_visualiser
192+
self.a_speed_without_visualiser = par_max_speed_without_visualiser
196193
self.start_game()
197194
# self.start_cheat_engine()
198195

@@ -237,19 +234,43 @@ class containing the enabled game api values.
237234

238235
self.a_cycles_passed = 0
239236

237+
par_game_inputs.game_initialization_inputs.put((
238+
self.a_race_initialised
239+
))
240+
241+
agent_settings: tuple[bool, bool] = par_game_inputs.agent_settings_to_game.get()
242+
243+
self.a_visualise = agent_settings[0]
244+
245+
# If Not Realtime set speed based on enabled visualiser
246+
if not agent_settings[1]:
247+
if self.a_visualise:
248+
self.a_speed = self.a_speed_with_visualiser
249+
else:
250+
self.a_speed = self.a_speed_without_visualiser
251+
# If Realtime set speed to 1
252+
else:
253+
self.a_speed = 1
254+
240255
par_game_inputs.game_initialization_inputs.put((
241256
self.a_race_initialised,
242257
self.a_speed
243258
))
244259

260+
self.a_cheat_engine.reconfigure_speed(self.a_speed)
261+
245262
self.a_car_state = self.create_empty_car_state()
246263

247264
# pylint: disable=too-many-locals
248265
# pylint: disable=too-many-branches
249266
# pylint: disable=too-many-statements
267+
# pylint: disable=too-many-arguments
250268
def main_loop(self, par_game_inputs: GameInputs,
251269
par_results_path: str,
252-
par_enabled_game_api_values: EnabledGameApiValues):
270+
par_enabled_game_api_values: EnabledGameApiValues,
271+
par_max_speed_with_visualiser: int,
272+
par_max_speed_without_visualiser: int
273+
):
253274
"""
254275
Main Loop That Controls All The Game Logic
255276
@@ -259,12 +280,17 @@ def main_loop(self, par_game_inputs: GameInputs,
259280
par_results_path (str): Path of the folder containing results including graph images
260281
par_enabled_game_api_values (EnabledGameApiValues): an instance of EnabledGameApiValues
261282
class containing the enabled game api values.
283+
par_max_speed_with_visualiser (int): max speed multiplier with visualiser enabled
284+
par_max_speed_without_visualiser (int): max speed multiplier without visualiser enabled
262285
263286
Returns:
264287
None: This method doesn't return anything.
265288
"""
266289

267-
self.initialize_game(par_game_inputs, par_enabled_game_api_values)
290+
self.initialize_game(par_game_inputs,
291+
par_enabled_game_api_values,
292+
par_max_speed_with_visualiser,
293+
par_max_speed_without_visualiser)
268294

269295
tmp_start_time = time.time()
270296
tmp_speed_constant = 1 / self.a_speed
@@ -276,7 +302,7 @@ class containing the enabled game api values.
276302
try:
277303
tmp_wrong_way_value = self.a_wrong_way.return_is_wrong_way()
278304
# pylint: disable=broad-except
279-
except Exception as exception:
305+
except pymem.exception.MemoryReadError as exception:
280306
Printer.print_info(f"Waiting for pointers to initialize {exception}", "GAME")
281307
time.sleep(1)
282308

@@ -287,10 +313,12 @@ class containing the enabled game api values.
287313

288314
self.a_car_state.reset_car_state()
289315

290-
# Check for quit key -> !! WARNING - Without this all the windows will be BLANK GREY !!!
291-
if cv2.waitKey(1) == ord('q'):
292-
cv2.destroyAllWindows()
293-
break
316+
if self.a_visualise:
317+
# Check for quit key -> !! WARNING - Without this all the windows will be
318+
# BLANK GREY !!!
319+
if cv2.waitKey(1) == ord('q'):
320+
cv2.destroyAllWindows()
321+
break
294322

295323
# Check for record key
296324
if keyboard.is_pressed('r'):
@@ -340,11 +368,12 @@ class containing the enabled game api values.
340368
self.a_car_distance_offset = tmp_car_offset_distance
341369
self.a_car_direction_offset = tmp_car_offset_direction
342370

343-
if tmp_contour is not None:
371+
if tmp_contour is not None and self.a_visualise:
344372
cv2.drawContours(self.a_screenshot, [tmp_contour], -1, (255, 0, 255), -1)
345373

346374
backup_screenshot: ndarray = self.a_screenshot
347-
self.show_graph(par_image_path=par_results_path + 'scatter_plot.png')
375+
if self.a_visualise:
376+
self.show_graph(par_image_path=par_results_path + 'scatter_plot.png')
348377

349378
tmp_frame_counter += tmp_speed_constant
350379

@@ -359,7 +388,8 @@ class containing the enabled game api values.
359388
tmp_needs_restart: bool = par_game_inputs.game_restart_inputs.get()
360389
if tmp_needs_restart:
361390
self.a_game_state = GameStateRestarting()
362-
self.update_state_on_screen(self.a_screenshot)
391+
if self.a_visualise:
392+
self.update_state_on_screen(self.a_screenshot)
363393

364394
par_game_inputs.game_restart_inputs.put(tmp_needs_restart)
365395
self.reset_game_race(0.7 / float(self.a_speed), 0.01 / float(self.a_speed))
@@ -387,16 +417,16 @@ class containing the enabled game api values.
387417
)
388418

389419
par_game_inputs.agent_inputs_state.put(self.a_car_state, )
420+
if self.a_visualise:
421+
self.show_texts_on_image(par_image=backup_screenshot,
422+
par_font_color=(159, 43, 104),
423+
par_car_state=self.a_car_state
424+
)
390425

391-
self.show_texts_on_image(par_image=backup_screenshot,
392-
par_font_color=(159, 43, 104),
393-
par_car_state=self.a_car_state
394-
)
395-
396-
self.show_state_on_image(par_image=backup_screenshot,
397-
par_game_state=self.a_game_state)
426+
self.show_state_on_image(par_image=backup_screenshot,
427+
par_game_state=self.a_game_state)
398428

399-
cv2.imshow('Main Vision', backup_screenshot)
429+
cv2.imshow('Main Vision', backup_screenshot)
400430

401431
def is_race_initialised(self) -> bool:
402432
"""
@@ -633,9 +663,11 @@ def window_capture(self) -> Tuple[np.ndarray, int, int]:
633663
- int: Width of the captured image
634664
- int: Height of the captured image
635665
"""
636-
# Find the game window
666+
# Find the game window4
637667
hwnd = win32gui.FindWindow(None, self.api_settings['game_title_name'])
638668

669+
# win32gui.SendMessage(hwnd, win32con.WM_ACTIVATE, win32con.WA_CLICKACTIVE, hwnd)
670+
639671
# Get the window device context
640672
w_dc = win32gui.GetWindowDC(hwnd)
641673

@@ -831,7 +863,8 @@ def is_in_correct_restart_state(self, par_screen_image: ndarray) -> RestartState
831863

832864
return RestartStateEnum.UNKNOWN_STATE
833865

834-
def reset_game_race(self, par_sleep_time_delay: float, par_sleep_time_key_press: float) -> None:
866+
def reset_game_race(self, par_sleep_time_delay: float,
867+
par_sleep_time_key_press: float) -> None:
835868
"""
836869
Reset the game race to the initial state.
837870
@@ -872,7 +905,8 @@ def reset_game_race(self, par_sleep_time_delay: float, par_sleep_time_key_press:
872905
# Press Enter - Restarts The Race
873906
# Then Prompt Will Appear - We Move To The OK Button
874907
for key_to_press in keys_to_press:
875-
self.a_controls.press_and_release_key(key_to_press, par_sleep_time_key_press, True)
908+
self.a_controls.press_and_release_key(key_to_press,
909+
par_sleep_time_key_press, True)
876910
time.sleep(par_sleep_time_delay)
877911

878912
tmp_restart_state = self.is_in_correct_restart_state(self.window_capture()[0])
@@ -887,12 +921,14 @@ def reset_game_race(self, par_sleep_time_delay: float, par_sleep_time_key_press:
887921
# If the prompt with Attention (Do you Really Want to Restart) appears press Enter
888922
time.sleep(par_sleep_time_delay)
889923

890-
self.a_controls.press_and_release_key(self.a_controls.ENTER, par_sleep_time_key_press, True)
924+
self.a_controls.press_and_release_key(self.a_controls.ENTER,
925+
par_sleep_time_key_press, True)
891926
time.sleep(par_sleep_time_delay)
892927

893928
time.sleep(1 * self.a_speed)
894929

895-
def init_game_race(self, par_sleep_time_delay: float, par_sleep_time_key_press: float):
930+
def init_game_race(self,
931+
par_sleep_time_delay: float, par_sleep_time_key_press: float):
896932
"""
897933
Initializes a race in the game by navigating through the game's menu system.
898934

‎game_inputs.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,15 @@ class GameInputs:
2424
__a_game_initialization_inputs: mp.Queue
2525
# Inputs Given If Restart is Needed
2626
__a_game_restart_inputs: mp.Queue
27+
# Settings from Agent to Game
28+
__a_agent_settings_to_game: mp.Queue
2729

2830
def __init__(self, par_agent_inputs_state: mp.Queue, par_game_initialization_inputs: mp.Queue,
29-
par_game_restart_inputs: mp.Queue):
31+
par_game_restart_inputs: mp.Queue, par_agent_settings_to_game: mp.Queue):
3032
self.__a_agent_inputs_state = par_agent_inputs_state
3133
self.__a_game_initialization_inputs = par_game_initialization_inputs
3234
self.__a_game_restart_inputs = par_game_restart_inputs
35+
self.__a_agent_settings_to_game = par_agent_settings_to_game
3336

3437
@property
3538
def agent_inputs_state(self) -> mp.Queue:
@@ -60,3 +63,13 @@ def game_restart_inputs(self) -> mp.Queue:
6063
mp.Queue: The queue containing the inputs.
6164
"""
6265
return self.__a_game_restart_inputs
66+
67+
@property
68+
def agent_settings_to_game(self) -> mp.Queue:
69+
"""
70+
Gets the settings given by agent to a game
71+
72+
Returns:
73+
mp.Queue: The queue containing the settings.
74+
"""
75+
return self.__a_agent_settings_to_game

‎main.py

+11-16
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,10 @@ def game_loop_thread(par_game_inputs: GameInputs) -> None:
5151
tmp_game.main_loop(
5252
par_game_inputs=par_game_inputs,
5353
par_results_path=results_path,
54-
par_enabled_game_api_values=selected_configuration.return_enabled_game_api_values()
54+
par_enabled_game_api_values=selected_configuration.return_enabled_game_api_values(),
55+
par_max_speed_with_visualiser=selected_configuration.return_max_speed_visualised(),
56+
par_max_speed_without_visualiser=
57+
selected_configuration.return_max_speed_non_visualised(),
5558
)
5659
except Exception as exception:
5760
Printer.print_error("An error occurred in Game Api", "MAIN", exception)
@@ -74,7 +77,7 @@ def agent_loop(par_game_inputs: GameInputs) -> None:
7477
settings = {
7578
'create_scatter_plot': False,
7679
'load_previous_model': True,
77-
'previous_model_iter_number': 4420
80+
'previous_model_iter_number': 8000
7881
}
7982
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
8083
Printer.print_basic(torch.version.cuda, "MAIN")
@@ -86,9 +89,7 @@ def agent_loop(par_game_inputs: GameInputs) -> None:
8689

8790
env_param = (
8891
par_game_inputs,
89-
selected_configuration.return_reward_strategy(),
90-
selected_configuration.return_state_calc_strategy(),
91-
selected_configuration.return_enabled_game_api_values()
92+
selected_configuration
9293
)
9394

9495
count_of_iterations = 20000
@@ -140,20 +141,12 @@ def agent_loop(par_game_inputs: GameInputs) -> None:
140141
pass
141142
results_time = ''
142143

143-
tmp_game_variables: tuple = par_game_inputs.game_initialization_inputs.get()
144-
145-
tmp_is_game_started: bool = tmp_game_variables[0]
146-
147-
par_game_inputs.game_initialization_inputs.put(tmp_game_variables)
144+
tmp_is_game_started: bool = par_game_inputs.game_initialization_inputs.get()
148145

149146
while not tmp_is_game_started:
150147
Printer.print_info("Waiting for Race to Initialise", "MAIN")
151148

152-
tmp_game_variables: tuple = par_game_inputs.game_initialization_inputs.get()
153-
154-
tmp_is_game_started: bool = tmp_game_variables[0]
155-
156-
par_game_inputs.game_initialization_inputs.put(tmp_game_variables)
149+
tmp_is_game_started: bool = par_game_inputs.game_initialization_inputs.get()
157150

158151
time.sleep(1)
159152

@@ -192,11 +185,13 @@ def agent_loop(par_game_inputs: GameInputs) -> None:
192185
tmp_queue_env_inputs: multiprocessing.Queue = multiprocessing.Queue()
193186
tmp_queue_game_started_inputs: multiprocessing.Queue = multiprocessing.Queue()
194187
tmp_queue_restart_game_input: multiprocessing.Queue = multiprocessing.Queue()
188+
tmp_queue_agent_settings_to_game: multiprocessing.Queue = multiprocessing.Queue()
195189

196190
game_inputs: GameInputs = GameInputs(
197191
tmp_queue_env_inputs,
198192
tmp_queue_game_started_inputs,
199-
tmp_queue_restart_game_input
193+
tmp_queue_restart_game_input,
194+
tmp_queue_agent_settings_to_game,
200195
)
201196

202197
tmp_game_thread = multiprocessing.Process(target=game_loop_thread, args=(game_inputs,))

0 commit comments

Comments
 (0)
Please sign in to comment.