优化PPO自适应回充与泛化特征
This commit is contained in:
@@ -158,7 +158,7 @@ class EpisodeRunner:
|
||||
result_str = "WIN"
|
||||
else:
|
||||
if fm.battery <= 0 or remaining_charge <= 0:
|
||||
final_reward = -4.0 + 6.0 * cleaning_ratio
|
||||
final_reward = -fm.battery_fail_penalty() + 4.0 * cleaning_ratio
|
||||
result_str = "BATTERY_FAIL"
|
||||
elif fm.npc_danger or fm.nearest_npc_dist <= 1:
|
||||
final_reward = -3.0 + 6.0 * cleaning_ratio
|
||||
|
||||
Reference in New Issue
Block a user