From 220de372e01ac7698b1c9a655228995a60141eba Mon Sep 17 00:00:00 2001 From: gqt <3217233537@qq.com> Date: Sun, 26 Apr 2026 18:56:42 +0800 Subject: [PATCH] =?UTF-8?q?=E8=B0=83=E6=95=B4PPO=E5=A5=96=E5=8A=B1?= =?UTF-8?q?=E7=AA=81=E5=87=BA=E6=9C=89=E6=95=88=E5=85=85=E7=94=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- agent_ppo/feature/preprocessor.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/agent_ppo/feature/preprocessor.py b/agent_ppo/feature/preprocessor.py index 7e70ee5..90d556f 100644 --- a/agent_ppo/feature/preprocessor.py +++ b/agent_ppo/feature/preprocessor.py @@ -527,8 +527,8 @@ class Preprocessor: prev_low_risk = max(0.0, self.recharge_low_battery_ratio - prev_battery_ratio) prev_low_risk /= max(self.recharge_low_battery_ratio, 1e-6) risk = max(self._recharge_risk_score(), prev_low_risk) - mode_bonus = 0.25 if self.was_recharge_mode or self.prev_low_battery else 0.0 - return float(np.clip(1.2 + 1.1 * risk + mode_bonus, 1.2, 2.6)) + mode_bonus = 0.4 if self.was_recharge_mode or self.prev_low_battery else 0.0 + return float(np.clip(2.0 + 1.8 * risk + mode_bonus, 2.0, 4.2)) def battery_fail_penalty(self): """Adaptive terminal penalty for running out of battery before max steps.""" @@ -951,13 +951,6 @@ class Preprocessor: # Step penalty / 时间惩罚 step_penalty = -0.002 - # Dense guidance: prefer moving toward visible dirt. - # 稠密引导:鼓励向视野内污渍靠近。 - approach_reward = 0.0 - if not self.recharge_mode and (self.last_nearest_dirt_dist < 200.0 or self.nearest_dirt_dist < 200.0): - dist_delta = float(np.clip(self.last_nearest_dirt_dist - self.nearest_dirt_dist, -5.0, 5.0)) - approach_reward = 0.01 * dist_delta if dist_delta > 0 else 0.006 * dist_delta - # Recharge guidance only activates when battery safety is the bottleneck. # 仅在低电量/回充模式下引导靠近充电桩,避免高电量蹲充电桩。 charge_reward = 0.0 @@ -1015,7 +1008,6 @@ class Preprocessor: return ( cleaning_reward - + approach_reward + charge_reward + exploration_reward + stuck_penalty