From 5c2df1015047ce295f3e56f1ccef9eb458582ae0 Mon Sep 17 00:00:00 2001 From: gqt <3217233537@qq.com> Date: Sun, 26 Apr 2026 17:37:17 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E4=BD=8E=E7=94=B5=E9=87=8F?= =?UTF-8?q?=E5=9B=9E=E5=85=85=E5=8D=A1=E4=BD=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- agent_ppo/feature/preprocessor.py | 71 ++++++++++++++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/agent_ppo/feature/preprocessor.py b/agent_ppo/feature/preprocessor.py index 396b619..cdcdd19 100644 --- a/agent_ppo/feature/preprocessor.py +++ b/agent_ppo/feature/preprocessor.py @@ -77,6 +77,9 @@ class Preprocessor: self.current_visit_count = 0 self.is_new_cell = False self.last_action = -1 + self.stuck_steps = 0 + self.recharge_no_progress_steps = 0 + self.fake_charger_steps = 0 self.dirt_cleaned = 0 self.last_dirt_cleaned = 0 @@ -219,6 +222,7 @@ class Preprocessor: self._update_charger_state(hx, hz, organs) self._update_npc_state(hx, hz, self.npcs) self._update_recharge_mode() + self._update_motion_health() def _update_passable(self, hx, hz): """Write local view into global passable map. @@ -403,6 +407,33 @@ class Preprocessor: if self.recharge_mode: self.recharge_steps += 1 + def _update_motion_health(self): + """Track recharge-mode stalls so action masking can recover.""" + if self.prev_pos is not None and self.cur_pos == self.prev_pos and 0 <= self.last_action < 8: + if self.charge_delta <= 0: + self.stuck_steps += 1 + else: + self.stuck_steps = 0 + else: + self.stuck_steps = 0 + + battery_ratio = self.battery / max(self.battery_max, 1) + battery_increased = self.battery > self.prev_battery + 1 + maybe_fake_charger = self.on_charger and battery_ratio < 0.9 and self.charge_delta <= 0 and not battery_increased + self.fake_charger_steps = self.fake_charger_steps + 1 if maybe_fake_charger else 0 + + no_progress = ( + self.recharge_mode + and self.has_charger + and self.charge_delta <= 0 + and not battery_increased + and self.nearest_charger_path_dist >= self.last_nearest_charger_path_dist - 0.1 + ) + self.recharge_no_progress_steps = self.recharge_no_progress_steps + 1 if no_progress else 0 + + def _need_recharge_escape(self): + return self.stuck_steps >= 2 or self.recharge_no_progress_steps >= 5 or self.fake_charger_steps >= 2 + def _charger_safety_buffer(self): # One move roughly costs one charge; reserve extra for detours, local obstacles, and policy noise. base = max(24.0, 0.16 * float(self.battery_max)) @@ -613,8 +644,10 @@ class Preprocessor: """ legal = self._filter_blocked_actions(self._legal_act) legal = self._filter_npc_danger_actions(legal) + safe_legal = list(legal) if self.recharge_mode: legal = self._filter_recharge_actions(legal) + legal = self._filter_recharge_escape_actions(legal, safe_legal) elif self.on_charger and self.battery / max(self.battery_max, 1) > 0.65: legal = self._filter_leave_charger_actions(legal) return list(legal) @@ -695,7 +728,8 @@ class Preprocessor: # When already inside the charger range, stay inside until recharge mode exits. # 已经在充电区域内时,回充模式退出前不要离开充电区域。 - if current_range_dist <= 0.0: + confirmed_charger = self.charge_delta > 0 or self.battery > self.prev_battery + 1 + if current_range_dist <= 0.0 and confirmed_charger: stay = [0] * 8 for _, _, next_range_dist, action in scored: if next_range_dist <= 0.0: @@ -718,6 +752,39 @@ class Preprocessor: return recharge if any(recharge) else list(legal_action) + def _filter_recharge_escape_actions(self, recharge_action, safe_action): + """Escape repeated no-move states during low-battery recharge.""" + if not self._need_recharge_escape(): + return list(recharge_action) + + hx, hz = self.cur_pos + current_dist = self._charger_move_distance(hx, hz) + ranked = [] + for action, (dx, dz) in enumerate(self.ACTION_DIRS): + if safe_action[action] <= 0: + continue + nx, nz = hx + dx, hz + dz + next_dist = self._charger_move_distance(nx, nz) + visit_count = 0 + if 0 <= nx < self.GRID_SIZE and 0 <= nz < self.GRID_SIZE: + visit_count = int(self.visit_count_map[nx, nz]) + failed_action_penalty = 6.0 if action == self.last_action and self.stuck_steps >= 2 else 0.0 + no_progress_penalty = 1.5 if next_dist > current_dist + 0.1 else 0.0 + ranked.append((next_dist + 0.05 * min(visit_count, 20) + failed_action_penalty + no_progress_penalty, action)) + + if not ranked: + return list(recharge_action) + + ranked.sort() + escape = [0] * 8 + for _, action in ranked[: min(4, len(ranked))]: + escape[action] = 1 + + if self.stuck_steps >= 2 and sum(escape) > 1 and 0 <= self.last_action < 8: + escape[self.last_action] = 0 + + return escape if any(escape) else list(recharge_action) + def _filter_leave_charger_actions(self, legal_action): """Prefer moves that leave charger range when battery is healthy.""" if not self.has_charger: @@ -826,6 +893,8 @@ class Preprocessor: stuck_penalty = 0.0 if self.prev_pos is not None and self.cur_pos == self.prev_pos and 0 <= self.last_action < 8: stuck_penalty = -0.03 + if self.recharge_mode: + stuck_penalty -= 0.02 * min(self.stuck_steps, 5) npc_penalty = 0.0 if self.npc_danger: