优化PPO自适应回充与泛化特征

This commit is contained in:
2026-04-26 18:35:23 +08:00
parent 00b26af3ed
commit e99a224d86
2 changed files with 101 additions and 18 deletions

View File

@@ -158,7 +158,7 @@ class EpisodeRunner:
result_str = "WIN"
else:
if fm.battery <= 0 or remaining_charge <= 0:
final_reward = -4.0 + 6.0 * cleaning_ratio
final_reward = -fm.battery_fail_penalty() + 4.0 * cleaning_ratio
result_str = "BATTERY_FAIL"
elif fm.npc_danger or fm.nearest_npc_dist <= 1:
final_reward = -3.0 + 6.0 * cleaning_ratio