修复低电量回充卡住

This commit is contained in:
2026-04-26 17:37:17 +08:00
parent f44e2483fc
commit 5c2df10150

View File

@@ -77,6 +77,9 @@ class Preprocessor:
self.current_visit_count = 0 self.current_visit_count = 0
self.is_new_cell = False self.is_new_cell = False
self.last_action = -1 self.last_action = -1
self.stuck_steps = 0
self.recharge_no_progress_steps = 0
self.fake_charger_steps = 0
self.dirt_cleaned = 0 self.dirt_cleaned = 0
self.last_dirt_cleaned = 0 self.last_dirt_cleaned = 0
@@ -219,6 +222,7 @@ class Preprocessor:
self._update_charger_state(hx, hz, organs) self._update_charger_state(hx, hz, organs)
self._update_npc_state(hx, hz, self.npcs) self._update_npc_state(hx, hz, self.npcs)
self._update_recharge_mode() self._update_recharge_mode()
self._update_motion_health()
def _update_passable(self, hx, hz): def _update_passable(self, hx, hz):
"""Write local view into global passable map. """Write local view into global passable map.
@@ -403,6 +407,33 @@ class Preprocessor:
if self.recharge_mode: if self.recharge_mode:
self.recharge_steps += 1 self.recharge_steps += 1
def _update_motion_health(self):
"""Track recharge-mode stalls so action masking can recover."""
if self.prev_pos is not None and self.cur_pos == self.prev_pos and 0 <= self.last_action < 8:
if self.charge_delta <= 0:
self.stuck_steps += 1
else:
self.stuck_steps = 0
else:
self.stuck_steps = 0
battery_ratio = self.battery / max(self.battery_max, 1)
battery_increased = self.battery > self.prev_battery + 1
maybe_fake_charger = self.on_charger and battery_ratio < 0.9 and self.charge_delta <= 0 and not battery_increased
self.fake_charger_steps = self.fake_charger_steps + 1 if maybe_fake_charger else 0
no_progress = (
self.recharge_mode
and self.has_charger
and self.charge_delta <= 0
and not battery_increased
and self.nearest_charger_path_dist >= self.last_nearest_charger_path_dist - 0.1
)
self.recharge_no_progress_steps = self.recharge_no_progress_steps + 1 if no_progress else 0
def _need_recharge_escape(self):
return self.stuck_steps >= 2 or self.recharge_no_progress_steps >= 5 or self.fake_charger_steps >= 2
def _charger_safety_buffer(self): def _charger_safety_buffer(self):
# One move roughly costs one charge; reserve extra for detours, local obstacles, and policy noise. # One move roughly costs one charge; reserve extra for detours, local obstacles, and policy noise.
base = max(24.0, 0.16 * float(self.battery_max)) base = max(24.0, 0.16 * float(self.battery_max))
@@ -613,8 +644,10 @@ class Preprocessor:
""" """
legal = self._filter_blocked_actions(self._legal_act) legal = self._filter_blocked_actions(self._legal_act)
legal = self._filter_npc_danger_actions(legal) legal = self._filter_npc_danger_actions(legal)
safe_legal = list(legal)
if self.recharge_mode: if self.recharge_mode:
legal = self._filter_recharge_actions(legal) legal = self._filter_recharge_actions(legal)
legal = self._filter_recharge_escape_actions(legal, safe_legal)
elif self.on_charger and self.battery / max(self.battery_max, 1) > 0.65: elif self.on_charger and self.battery / max(self.battery_max, 1) > 0.65:
legal = self._filter_leave_charger_actions(legal) legal = self._filter_leave_charger_actions(legal)
return list(legal) return list(legal)
@@ -695,7 +728,8 @@ class Preprocessor:
# When already inside the charger range, stay inside until recharge mode exits. # When already inside the charger range, stay inside until recharge mode exits.
# 已经在充电区域内时,回充模式退出前不要离开充电区域。 # 已经在充电区域内时,回充模式退出前不要离开充电区域。
if current_range_dist <= 0.0: confirmed_charger = self.charge_delta > 0 or self.battery > self.prev_battery + 1
if current_range_dist <= 0.0 and confirmed_charger:
stay = [0] * 8 stay = [0] * 8
for _, _, next_range_dist, action in scored: for _, _, next_range_dist, action in scored:
if next_range_dist <= 0.0: if next_range_dist <= 0.0:
@@ -718,6 +752,39 @@ class Preprocessor:
return recharge if any(recharge) else list(legal_action) return recharge if any(recharge) else list(legal_action)
def _filter_recharge_escape_actions(self, recharge_action, safe_action):
"""Escape repeated no-move states during low-battery recharge."""
if not self._need_recharge_escape():
return list(recharge_action)
hx, hz = self.cur_pos
current_dist = self._charger_move_distance(hx, hz)
ranked = []
for action, (dx, dz) in enumerate(self.ACTION_DIRS):
if safe_action[action] <= 0:
continue
nx, nz = hx + dx, hz + dz
next_dist = self._charger_move_distance(nx, nz)
visit_count = 0
if 0 <= nx < self.GRID_SIZE and 0 <= nz < self.GRID_SIZE:
visit_count = int(self.visit_count_map[nx, nz])
failed_action_penalty = 6.0 if action == self.last_action and self.stuck_steps >= 2 else 0.0
no_progress_penalty = 1.5 if next_dist > current_dist + 0.1 else 0.0
ranked.append((next_dist + 0.05 * min(visit_count, 20) + failed_action_penalty + no_progress_penalty, action))
if not ranked:
return list(recharge_action)
ranked.sort()
escape = [0] * 8
for _, action in ranked[: min(4, len(ranked))]:
escape[action] = 1
if self.stuck_steps >= 2 and sum(escape) > 1 and 0 <= self.last_action < 8:
escape[self.last_action] = 0
return escape if any(escape) else list(recharge_action)
def _filter_leave_charger_actions(self, legal_action): def _filter_leave_charger_actions(self, legal_action):
"""Prefer moves that leave charger range when battery is healthy.""" """Prefer moves that leave charger range when battery is healthy."""
if not self.has_charger: if not self.has_charger:
@@ -826,6 +893,8 @@ class Preprocessor:
stuck_penalty = 0.0 stuck_penalty = 0.0
if self.prev_pos is not None and self.cur_pos == self.prev_pos and 0 <= self.last_action < 8: if self.prev_pos is not None and self.cur_pos == self.prev_pos and 0 <= self.last_action < 8:
stuck_penalty = -0.03 stuck_penalty = -0.03
if self.recharge_mode:
stuck_penalty -= 0.02 * min(self.stuck_steps, 5)
npc_penalty = 0.0 npc_penalty = 0.0
if self.npc_danger: if self.npc_danger: