From f04feb0cd9a21dfdc62669e7357baefe2a6b6c76 Mon Sep 17 00:00:00 2001
From: gqt <3217233537@qq.com>
Date: Sun, 26 Apr 2026 17:06:54 +0800
Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0PPO=E5=9B=9E=E5=85=85?=
 =?UTF-8?q?=E5=AE=89=E5=85=A8=E5=8A=A8=E4=BD=9C=E7=BA=A6=E6=9D=9F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 agent_ppo/feature/preprocessor.py | 38 +++++++++++++++++++------------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/agent_ppo/feature/preprocessor.py b/agent_ppo/feature/preprocessor.py
index c4fbf20..1a59529 100644
--- a/agent_ppo/feature/preprocessor.py
+++ b/agent_ppo/feature/preprocessor.py
@@ -603,36 +603,46 @@ class Preprocessor:
         return False
 
     def _filter_recharge_actions(self, legal_action):
-        """Restrict low-battery actions to moves that approach the charger."""
+        """Restrict recharge-mode actions to safe moves toward the charger range."""
         if not self.has_charger:
             return list(legal_action)
 
         hx, hz = self.cur_pos
-        current_dist = max(abs(self.nearest_charger_dx), abs(self.nearest_charger_dz))
+        current_dist = self._min_charger_range_dist(hx, hz)
         scored = []
         for action, (dx, dz) in enumerate(self.ACTION_DIRS):
             if legal_action[action] <= 0:
                 continue
-            next_dx = self.nearest_charger_dx - dx
-            next_dz = self.nearest_charger_dz - dz
-            next_dist = max(abs(next_dx), abs(next_dz))
-            improvement = current_dist - next_dist
+            nx, nz = hx + dx, hz + dz
+            next_dist = self._min_charger_range_dist(nx, nz)
             alignment = dx * self.nearest_charger_dx + dz * self.nearest_charger_dz
-            scored.append((improvement, alignment, action))
+            scored.append((next_dist, alignment, action))
 
         if not scored:
             return list(legal_action)
 
-        best_improvement = max(item[0] for item in scored)
+        # When already inside the charger range, stay inside until recharge mode exits.
+        # 已经在充电区域内时，回充模式退出前不要离开充电区域。
+        if current_dist <= 0.0:
+            stay = [0] * 8
+            for next_dist, _, action in scored:
+                if next_dist <= 0.0:
+                    stay[action] = 1
+            if any(stay):
+                return stay
+
+        best_next_dist = min(item[0] for item in scored)
+        best_alignment = max(alignment for next_dist, alignment, _ in scored if next_dist <= best_next_dist + 0.1)
+
         recharge = [0] * 8
-        if best_improvement > 0:
-            for improvement, _, action in scored:
-                if improvement >= best_improvement - 0.1:
-                    recharge[action] = 1
-        else:
+        for next_dist, alignment, action in scored:
+            if next_dist <= best_next_dist + 0.1 and alignment >= best_alignment - 0.1:
+                recharge[action] = 1
+
+        if not any(recharge):
             best_alignment = max(item[1] for item in scored)
             for _, alignment, action in scored:
-                if alignment >= best_alignment:
+                if alignment >= best_alignment - 0.1:
                     recharge[action] = 1
 
         return recharge if any(recharge) else list(legal_action)