优化PPO充电与避障策略

扩展观测特征到157维,加入充电桩、NPC、电量安全余量、地图统计和本步清扫信息。

增加低电量回充动作过滤、NPC危险区过滤,并调整奖励和终局日志以突出充电、避障和真实清扫得分。
This commit is contained in:
2026-04-26 14:14:18 +08:00
parent eb3efa4df7
commit efbc612945
6 changed files with 441 additions and 52 deletions

View File

@@ -132,7 +132,14 @@ class EpisodeRunner:
final_reward = 0.0
if done:
fm = self.agent.preprocessor
total_score = env_obs["observation"]["env_info"]["total_score"]
env_info = env_obs["observation"]["env_info"]
extra_info = env_obs.get("extra_info", {})
total_score = env_info.get("total_score", fm.total_score)
remaining_charge = env_info.get("remaining_charge", fm.remaining_charge)
charge_count = env_info.get("charge_count", fm.charge_count)
finished_steps = env_info.get("finished_steps", step)
result_message = extra_info.get("result_message", "")
result_code = extra_info.get("result_code", "")
if truncated:
# Survived to max steps: higher cleaning ratio → more reward
@@ -141,19 +148,31 @@ class EpisodeRunner:
final_reward = 2.0 + 8.0 * cleaning_ratio
result_str = "WIN"
else:
# Battery-depleted episodes are common with short runs; keep
# cleaning progress as the dominant terminal signal.
# 短训中电量耗尽较常见,终局奖励仍以清扫比例为主。
cleaning_ratio = fm.dirt_cleaned / max(fm.total_dirt, 1)
final_reward = -1.0 + 6.0 * cleaning_ratio
result_str = "FAIL"
if fm.battery <= 0 or remaining_charge <= 0:
final_reward = -4.0 + 6.0 * cleaning_ratio
result_str = "BATTERY_FAIL"
elif fm.npc_danger or fm.nearest_npc_dist <= 1:
final_reward = -3.0 + 6.0 * cleaning_ratio
result_str = "NPC_FAIL"
else:
final_reward = -2.0 + 6.0 * cleaning_ratio
result_str = "FAIL"
self.logger.info(
f"[GAMEOVER] ep:{self.episode_cnt} steps:{step} "
f"finished_steps:{finished_steps} "
f"result:{result_str} final_bonus:{final_reward:.2f} "
f"total_reward:{total_reward:.3f} "
f"dirt_cleaned:{fm.dirt_cleaned}/{fm.total_dirt} "
f"total_score:{total_score}"
f"total_score:{total_score} "
f"remaining_charge:{remaining_charge} "
f"charge_count:{charge_count} "
f"recharge_steps:{fm.recharge_steps} "
f"nearest_charger:{fm.nearest_charger_range_dist:.1f} "
f"nearest_npc:{fm.nearest_npc_dist:.1f} "
f"result_code:{result_code} "
f"result_message:{result_message}"
)
# Build sample frame