调整PPO回充模式清扫与探索奖励

2026-04-26 16:33:44 +08:00
parent 3c3332e126
commit e0756b4846
1 changed files with 6 additions and 2 deletions
--- a/agent_ppo/feature/preprocessor.py
+++ b/agent_ppo/feature/preprocessor.py
@@ -695,7 +695,8 @@ class Preprocessor:
        # Cleaning reward / 清扫奖励
        cleaned_this_step = max(0, self.dirt_cleaned - self.last_dirt_cleaned)
        cleaned_cells = self.step_cleaned_count if self.step_cleaned_count > 0 else cleaned_this_step
-        cleaning_reward = 0.7 * cleaned_cells
+        cleaning_scale = 0.2 if self.recharge_mode else 0.7
        cleaning_reward = cleaning_scale * cleaned_cells
        # Step penalty / 时间惩罚
        step_penalty = -0.002
@@ -734,6 +735,9 @@ class Preprocessor:
        # Encourage covering new passable cells and mildly discourage loops.
        # 鼓励探索新格子，轻微惩罚反复绕圈。
        if self.recharge_mode:
            exploration_reward = 0.0
        else:
            exploration_reward = 0.004 if self.is_new_cell else -0.0015 * min(self.current_visit_count, 6)
        # Collision/stuck signal: invalid moves waste both step and battery.