From e0756b4846d6c9d33e029698edaf985c5fa5c212 Mon Sep 17 00:00:00 2001 From: gqt <3217233537@qq.com> Date: Sun, 26 Apr 2026 16:33:44 +0800 Subject: [PATCH] =?UTF-8?q?=E8=B0=83=E6=95=B4PPO=E5=9B=9E=E5=85=85?= =?UTF-8?q?=E6=A8=A1=E5=BC=8F=E6=B8=85=E6=89=AB=E4=B8=8E=E6=8E=A2=E7=B4=A2?= =?UTF-8?q?=E5=A5=96=E5=8A=B1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- agent_ppo/feature/preprocessor.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/agent_ppo/feature/preprocessor.py b/agent_ppo/feature/preprocessor.py index c9d6692..c4fbf20 100644 --- a/agent_ppo/feature/preprocessor.py +++ b/agent_ppo/feature/preprocessor.py @@ -695,7 +695,8 @@ class Preprocessor: # Cleaning reward / 清扫奖励 cleaned_this_step = max(0, self.dirt_cleaned - self.last_dirt_cleaned) cleaned_cells = self.step_cleaned_count if self.step_cleaned_count > 0 else cleaned_this_step - cleaning_reward = 0.7 * cleaned_cells + cleaning_scale = 0.2 if self.recharge_mode else 0.7 + cleaning_reward = cleaning_scale * cleaned_cells # Step penalty / 时间惩罚 step_penalty = -0.002 @@ -734,7 +735,10 @@ class Preprocessor: # Encourage covering new passable cells and mildly discourage loops. # 鼓励探索新格子,轻微惩罚反复绕圈。 - exploration_reward = 0.004 if self.is_new_cell else -0.0015 * min(self.current_visit_count, 6) + if self.recharge_mode: + exploration_reward = 0.0 + else: + exploration_reward = 0.004 if self.is_new_cell else -0.0015 * min(self.current_visit_count, 6) # Collision/stuck signal: invalid moves waste both step and battery. # 撞墙/原地不动会浪费步数和电量。