Avoid wall-hugging during unknown recharge routes
This commit is contained in:
@@ -1212,9 +1212,15 @@ class Preprocessor:
|
||||
score += 0.35 if visit_count == 0 else -0.05 * min(visit_count, 10)
|
||||
|
||||
if self.recharge_mode:
|
||||
if self.charger_route_known:
|
||||
score += 2.2 * float(self.charger_action_delta[action])
|
||||
if self._charger_move_distance(nx, nz) < self._charger_move_distance(hx, hz):
|
||||
score += 0.8
|
||||
else:
|
||||
score += 2.0 * float(self.frontier_action_delta[action])
|
||||
score += 0.7 * max(float(self.global_dirty_action_delta[action]), 0.0)
|
||||
if self._min_charger_range_dist(nx, nz) < self._min_charger_range_dist(hx, hz):
|
||||
score += 0.15
|
||||
else:
|
||||
if self.global_dirty_path_dist < self.GRID_SIZE:
|
||||
score += 1.8 * float(self.global_dirty_action_delta[action])
|
||||
@@ -1336,18 +1342,17 @@ class Preprocessor:
|
||||
if any(stay):
|
||||
return stay
|
||||
|
||||
if not self.charger_route_known:
|
||||
return self._filter_recharge_discovery_actions(legal_action, scored, current_range_dist)
|
||||
|
||||
recharge = [0] * 8
|
||||
best_next_dist = min(item[0] for item in scored)
|
||||
ranked = sorted(scored, key=lambda item: (item[0], -item[1]))
|
||||
max_recharge_actions = 4 if self.charger_route_known else 5
|
||||
dist_slack = 2.5 if self.charger_route_known else 4.0
|
||||
max_recharge_actions = 4
|
||||
dist_slack = 2.5
|
||||
for next_dist, alignment, next_range_dist, action in ranked:
|
||||
route_progress = next_dist <= current_move_dist + 0.1
|
||||
range_progress = next_range_dist <= current_range_dist
|
||||
direction_progress = alignment > 0
|
||||
if next_dist <= best_next_dist + dist_slack and (
|
||||
route_progress or (not self.charger_route_known and (range_progress or direction_progress))
|
||||
):
|
||||
if next_dist <= best_next_dist + dist_slack and route_progress:
|
||||
recharge[action] = 1
|
||||
if sum(recharge) >= max_recharge_actions:
|
||||
break
|
||||
@@ -1358,6 +1363,46 @@ class Preprocessor:
|
||||
|
||||
return recharge if any(recharge) else list(legal_action)
|
||||
|
||||
def _filter_recharge_discovery_actions(self, legal_action, scored, current_range_dist):
|
||||
"""When charger route is unknown, search for a route instead of pushing into walls."""
|
||||
ranked = []
|
||||
hx, hz = self.cur_pos
|
||||
for next_dist, alignment, next_range_dist, action in scored:
|
||||
if legal_action[action] <= 0:
|
||||
continue
|
||||
dx, dz = self.ACTION_DIRS[action]
|
||||
nx, nz = hx + dx, hz + dz
|
||||
visit_count = int(self.visit_count_map[nx, nz]) if 0 <= nx < self.GRID_SIZE and 0 <= nz < self.GRID_SIZE else 0
|
||||
frontier_gain = float(self.frontier_action_delta[action])
|
||||
dirty_gain = float(self.global_dirty_action_delta[action])
|
||||
range_gain = float(np.clip(current_range_dist - next_range_dist, -2.0, 2.0)) / 2.0
|
||||
alignment_gain = 0.25 if alignment > 0 else 0.0
|
||||
repeat_penalty = 0.8 if action == self.last_action and self.recharge_no_progress_steps >= 2 else 0.0
|
||||
wall_hug_penalty = 0.35 * float(self.local_obstacle_ratio)
|
||||
score = (
|
||||
2.4 * frontier_gain
|
||||
+ 0.8 * max(dirty_gain, 0.0)
|
||||
+ 0.35 * range_gain
|
||||
+ alignment_gain
|
||||
- 0.04 * min(visit_count, 12)
|
||||
- repeat_penalty
|
||||
- wall_hug_penalty
|
||||
)
|
||||
ranked.append((score, action))
|
||||
|
||||
if not ranked:
|
||||
return list(legal_action)
|
||||
|
||||
ranked.sort(reverse=True)
|
||||
best_score = ranked[0][0]
|
||||
discovery = [0] * 8
|
||||
for score, action in ranked:
|
||||
if score >= best_score - 0.35 or sum(discovery) < 3:
|
||||
discovery[action] = 1
|
||||
if sum(discovery) >= 5:
|
||||
break
|
||||
return discovery if any(discovery) else list(legal_action)
|
||||
|
||||
def _filter_recharge_escape_actions(self, recharge_action, safe_action):
|
||||
"""Escape repeated no-move states during low-battery recharge."""
|
||||
if not self._need_recharge_escape():
|
||||
@@ -1474,15 +1519,25 @@ class Preprocessor:
|
||||
charge_reward -= 0.25 * min(self.charge_delta, 3)
|
||||
|
||||
if self.has_charger and (self.recharge_mode or self.low_battery):
|
||||
recharge_risk = self._recharge_risk_score()
|
||||
if not self.charger_route_known:
|
||||
frontier_progress = float(
|
||||
np.clip(self.last_frontier_path_dist - self.frontier_path_dist, -3.0, 3.0)
|
||||
)
|
||||
range_delta = float(
|
||||
np.clip(self.last_nearest_charger_range_dist - self.nearest_charger_range_dist, -2.0, 2.0)
|
||||
)
|
||||
discovery_scale = 0.035 + 0.035 * recharge_risk
|
||||
range_scale = 0.015 + 0.015 * recharge_risk
|
||||
charge_reward += discovery_scale * frontier_progress
|
||||
if self.prev_pos is not None and self.cur_pos != self.prev_pos and self.stuck_steps == 0:
|
||||
charge_reward += range_scale * range_delta
|
||||
else:
|
||||
dist_delta = float(
|
||||
np.clip(self.last_nearest_charger_path_dist - self.nearest_charger_path_dist, -4.0, 4.0)
|
||||
)
|
||||
recharge_risk = self._recharge_risk_score()
|
||||
approach_scale = 0.07 + 0.06 * recharge_risk
|
||||
retreat_scale = 0.035 + 0.045 * recharge_risk
|
||||
if not self.charger_route_known:
|
||||
approach_scale += 0.02
|
||||
retreat_scale += 0.01
|
||||
charge_reward += approach_scale * dist_delta if dist_delta > 0 else retreat_scale * dist_delta
|
||||
if self.charger_safety_margin < self.recharge_enter_margin:
|
||||
safety_shortage = self.recharge_enter_margin - self.charger_safety_margin
|
||||
|
||||
Reference in New Issue
Block a user