Optimize PPO coverage and recharge strategy

This commit is contained in:
2026-04-26 19:25:05 +08:00
parent 220de372e0
commit 5b6133db13
4 changed files with 399 additions and 108 deletions

View File

@@ -160,7 +160,7 @@ class EpisodeRunner:
if fm.battery <= 0 or remaining_charge <= 0:
final_reward = -fm.battery_fail_penalty() + 4.0 * cleaning_ratio
result_str = "BATTERY_FAIL"
elif fm.npc_danger or fm.nearest_npc_dist <= 1:
elif fm.npc_danger or fm.npc_predicted_danger or fm.nearest_npc_dist <= 1:
final_reward = -3.0 + 6.0 * cleaning_ratio
result_str = "NPC_FAIL"
else: