Avoid wall-hugging during unknown recharge routes

This commit is contained in:
2026-04-26 20:33:51 +08:00
parent 69b8a692db
commit 524ca8c070

View File

@@ -1212,9 +1212,15 @@ class Preprocessor:
score += 0.35 if visit_count == 0 else -0.05 * min(visit_count, 10)
if self.recharge_mode:
if self.charger_route_known:
score += 2.2 * float(self.charger_action_delta[action])
if self._charger_move_distance(nx, nz) < self._charger_move_distance(hx, hz):
score += 0.8
else:
score += 2.0 * float(self.frontier_action_delta[action])
score += 0.7 * max(float(self.global_dirty_action_delta[action]), 0.0)
if self._min_charger_range_dist(nx, nz) < self._min_charger_range_dist(hx, hz):
score += 0.15
else:
if self.global_dirty_path_dist < self.GRID_SIZE:
score += 1.8 * float(self.global_dirty_action_delta[action])
@@ -1336,18 +1342,17 @@ class Preprocessor:
if any(stay):
return stay
if not self.charger_route_known:
return self._filter_recharge_discovery_actions(legal_action, scored, current_range_dist)
recharge = [0] * 8
best_next_dist = min(item[0] for item in scored)
ranked = sorted(scored, key=lambda item: (item[0], -item[1]))
max_recharge_actions = 4 if self.charger_route_known else 5
dist_slack = 2.5 if self.charger_route_known else 4.0
max_recharge_actions = 4
dist_slack = 2.5
for next_dist, alignment, next_range_dist, action in ranked:
route_progress = next_dist <= current_move_dist + 0.1
range_progress = next_range_dist <= current_range_dist
direction_progress = alignment > 0
if next_dist <= best_next_dist + dist_slack and (
route_progress or (not self.charger_route_known and (range_progress or direction_progress))
):
if next_dist <= best_next_dist + dist_slack and route_progress:
recharge[action] = 1
if sum(recharge) >= max_recharge_actions:
break
@@ -1358,6 +1363,46 @@ class Preprocessor:
return recharge if any(recharge) else list(legal_action)
def _filter_recharge_discovery_actions(self, legal_action, scored, current_range_dist):
"""When charger route is unknown, search for a route instead of pushing into walls."""
ranked = []
hx, hz = self.cur_pos
for next_dist, alignment, next_range_dist, action in scored:
if legal_action[action] <= 0:
continue
dx, dz = self.ACTION_DIRS[action]
nx, nz = hx + dx, hz + dz
visit_count = int(self.visit_count_map[nx, nz]) if 0 <= nx < self.GRID_SIZE and 0 <= nz < self.GRID_SIZE else 0
frontier_gain = float(self.frontier_action_delta[action])
dirty_gain = float(self.global_dirty_action_delta[action])
range_gain = float(np.clip(current_range_dist - next_range_dist, -2.0, 2.0)) / 2.0
alignment_gain = 0.25 if alignment > 0 else 0.0
repeat_penalty = 0.8 if action == self.last_action and self.recharge_no_progress_steps >= 2 else 0.0
wall_hug_penalty = 0.35 * float(self.local_obstacle_ratio)
score = (
2.4 * frontier_gain
+ 0.8 * max(dirty_gain, 0.0)
+ 0.35 * range_gain
+ alignment_gain
- 0.04 * min(visit_count, 12)
- repeat_penalty
- wall_hug_penalty
)
ranked.append((score, action))
if not ranked:
return list(legal_action)
ranked.sort(reverse=True)
best_score = ranked[0][0]
discovery = [0] * 8
for score, action in ranked:
if score >= best_score - 0.35 or sum(discovery) < 3:
discovery[action] = 1
if sum(discovery) >= 5:
break
return discovery if any(discovery) else list(legal_action)
def _filter_recharge_escape_actions(self, recharge_action, safe_action):
"""Escape repeated no-move states during low-battery recharge."""
if not self._need_recharge_escape():
@@ -1474,15 +1519,25 @@ class Preprocessor:
charge_reward -= 0.25 * min(self.charge_delta, 3)
if self.has_charger and (self.recharge_mode or self.low_battery):
recharge_risk = self._recharge_risk_score()
if not self.charger_route_known:
frontier_progress = float(
np.clip(self.last_frontier_path_dist - self.frontier_path_dist, -3.0, 3.0)
)
range_delta = float(
np.clip(self.last_nearest_charger_range_dist - self.nearest_charger_range_dist, -2.0, 2.0)
)
discovery_scale = 0.035 + 0.035 * recharge_risk
range_scale = 0.015 + 0.015 * recharge_risk
charge_reward += discovery_scale * frontier_progress
if self.prev_pos is not None and self.cur_pos != self.prev_pos and self.stuck_steps == 0:
charge_reward += range_scale * range_delta
else:
dist_delta = float(
np.clip(self.last_nearest_charger_path_dist - self.nearest_charger_path_dist, -4.0, 4.0)
)
recharge_risk = self._recharge_risk_score()
approach_scale = 0.07 + 0.06 * recharge_risk
retreat_scale = 0.035 + 0.045 * recharge_risk
if not self.charger_route_known:
approach_scale += 0.02
retreat_scale += 0.01
charge_reward += approach_scale * dist_delta if dist_delta > 0 else retreat_scale * dist_delta
if self.charger_safety_margin < self.recharge_enter_margin:
safety_shortage = self.recharge_enter_margin - self.charger_safety_margin