修正PPO充电奖励防止蹲桩

This commit is contained in:
2026-04-26 15:08:43 +08:00
parent efbc612945
commit ba6cf2a797
2 changed files with 89 additions and 12 deletions

View File

@@ -140,15 +140,19 @@ class EpisodeRunner:
finished_steps = env_info.get("finished_steps", step)
result_message = extra_info.get("result_message", "")
result_code = extra_info.get("result_code", "")
cleaning_ratio = fm.dirt_cleaned / max(fm.total_dirt, 1)
score_per_step = total_score / max(finished_steps, 1)
if truncated:
# Survived to max steps: higher cleaning ratio → more reward
# 存活到最大步数:清扫比例越高奖励越多
cleaning_ratio = fm.dirt_cleaned / max(fm.total_dirt, 1)
final_reward = 2.0 + 8.0 * cleaning_ratio
result_str = "WIN"
if score_per_step < 0.25:
final_reward = -3.0 + 6.0 * cleaning_ratio
result_str = "STALL_TRUNCATED"
else:
# Survived to max steps: higher cleaning ratio → more reward
# 存活到最大步数:清扫比例越高奖励越多
final_reward = 2.0 + 8.0 * cleaning_ratio
result_str = "WIN"
else:
cleaning_ratio = fm.dirt_cleaned / max(fm.total_dirt, 1)
if fm.battery <= 0 or remaining_charge <= 0:
final_reward = -4.0 + 6.0 * cleaning_ratio
result_str = "BATTERY_FAIL"