Compare commits
2 Commits
f44e2483fc
...
00b26af3ed
| Author | SHA1 | Date | |
|---|---|---|---|
| 00b26af3ed | |||
| 5c2df10150 |
@@ -98,6 +98,81 @@ def build_monitor():
|
|||||||
)
|
)
|
||||||
.end_panel()
|
.end_panel()
|
||||||
.end_group()
|
.end_group()
|
||||||
|
.add_group(
|
||||||
|
group_name="行为指标",
|
||||||
|
group_name_en="behavior",
|
||||||
|
)
|
||||||
|
.add_panel(
|
||||||
|
name="原地撞墙步数",
|
||||||
|
name_en="stuck",
|
||||||
|
type="line",
|
||||||
|
)
|
||||||
|
.add_metric(
|
||||||
|
metrics_name="stuck_count",
|
||||||
|
expr="avg(stuck_count{})",
|
||||||
|
)
|
||||||
|
.add_metric(
|
||||||
|
metrics_name="max_stuck_steps",
|
||||||
|
expr="avg(max_stuck_steps{})",
|
||||||
|
)
|
||||||
|
.end_panel()
|
||||||
|
.add_panel(
|
||||||
|
name="回充逃脱次数",
|
||||||
|
name_en="recharge_escape",
|
||||||
|
type="line",
|
||||||
|
)
|
||||||
|
.add_metric(
|
||||||
|
metrics_name="recharge_escape_count",
|
||||||
|
expr="avg(recharge_escape_count{})",
|
||||||
|
)
|
||||||
|
.end_panel()
|
||||||
|
.add_panel(
|
||||||
|
name="NPC危险接近",
|
||||||
|
name_en="npc_danger",
|
||||||
|
type="line",
|
||||||
|
)
|
||||||
|
.add_metric(
|
||||||
|
metrics_name="npc_close_steps",
|
||||||
|
expr="avg(npc_close_steps{})",
|
||||||
|
)
|
||||||
|
.add_metric(
|
||||||
|
metrics_name="npc_danger_steps",
|
||||||
|
expr="avg(npc_danger_steps{})",
|
||||||
|
)
|
||||||
|
.end_panel()
|
||||||
|
.add_panel(
|
||||||
|
name="失败类型",
|
||||||
|
name_en="failure_type",
|
||||||
|
type="line",
|
||||||
|
)
|
||||||
|
.add_metric(
|
||||||
|
metrics_name="npc_collision",
|
||||||
|
expr="avg(npc_collision{})",
|
||||||
|
)
|
||||||
|
.add_metric(
|
||||||
|
metrics_name="battery_fail",
|
||||||
|
expr="avg(battery_fail{})",
|
||||||
|
)
|
||||||
|
.end_panel()
|
||||||
|
.add_panel(
|
||||||
|
name="得分与充电",
|
||||||
|
name_en="score_charge",
|
||||||
|
type="line",
|
||||||
|
)
|
||||||
|
.add_metric(
|
||||||
|
metrics_name="total_score",
|
||||||
|
expr="avg(total_score{})",
|
||||||
|
)
|
||||||
|
.add_metric(
|
||||||
|
metrics_name="charge_count",
|
||||||
|
expr="avg(charge_count{})",
|
||||||
|
)
|
||||||
|
.add_metric(
|
||||||
|
metrics_name="remaining_charge",
|
||||||
|
expr="avg(remaining_charge{})",
|
||||||
|
)
|
||||||
|
.end_panel()
|
||||||
|
.end_group()
|
||||||
.build()
|
.build()
|
||||||
)
|
)
|
||||||
return config_dict
|
return config_dict
|
||||||
|
|||||||
@@ -77,6 +77,12 @@ class Preprocessor:
|
|||||||
self.current_visit_count = 0
|
self.current_visit_count = 0
|
||||||
self.is_new_cell = False
|
self.is_new_cell = False
|
||||||
self.last_action = -1
|
self.last_action = -1
|
||||||
|
self.stuck_steps = 0
|
||||||
|
self.recharge_no_progress_steps = 0
|
||||||
|
self.fake_charger_steps = 0
|
||||||
|
self.stuck_count = 0
|
||||||
|
self.max_stuck_steps = 0
|
||||||
|
self.recharge_escape_count = 0
|
||||||
|
|
||||||
self.dirt_cleaned = 0
|
self.dirt_cleaned = 0
|
||||||
self.last_dirt_cleaned = 0
|
self.last_dirt_cleaned = 0
|
||||||
@@ -135,6 +141,10 @@ class Preprocessor:
|
|||||||
self.nearest_npc_dist = float(self.GRID_SIZE)
|
self.nearest_npc_dist = float(self.GRID_SIZE)
|
||||||
self.npc_danger = False
|
self.npc_danger = False
|
||||||
self.npcs = []
|
self.npcs = []
|
||||||
|
self.npc_close_steps = 0
|
||||||
|
self.npc_danger_steps = 0
|
||||||
|
self.npc_collision = 0
|
||||||
|
self.battery_fail = 0
|
||||||
|
|
||||||
self.local_dirt_ratio = 0.0
|
self.local_dirt_ratio = 0.0
|
||||||
self.local_obstacle_ratio = 0.0
|
self.local_obstacle_ratio = 0.0
|
||||||
@@ -219,6 +229,7 @@ class Preprocessor:
|
|||||||
self._update_charger_state(hx, hz, organs)
|
self._update_charger_state(hx, hz, organs)
|
||||||
self._update_npc_state(hx, hz, self.npcs)
|
self._update_npc_state(hx, hz, self.npcs)
|
||||||
self._update_recharge_mode()
|
self._update_recharge_mode()
|
||||||
|
self._update_motion_health()
|
||||||
|
|
||||||
def _update_passable(self, hx, hz):
|
def _update_passable(self, hx, hz):
|
||||||
"""Write local view into global passable map.
|
"""Write local view into global passable map.
|
||||||
@@ -403,6 +414,46 @@ class Preprocessor:
|
|||||||
if self.recharge_mode:
|
if self.recharge_mode:
|
||||||
self.recharge_steps += 1
|
self.recharge_steps += 1
|
||||||
|
|
||||||
|
def _update_motion_health(self):
|
||||||
|
"""Track recharge-mode stalls so action masking can recover."""
|
||||||
|
if self.prev_pos is not None and self.cur_pos == self.prev_pos and 0 <= self.last_action < 8:
|
||||||
|
if self.charge_delta <= 0:
|
||||||
|
self.stuck_steps += 1
|
||||||
|
self.stuck_count += 1
|
||||||
|
self.max_stuck_steps = max(self.max_stuck_steps, self.stuck_steps)
|
||||||
|
else:
|
||||||
|
self.stuck_steps = 0
|
||||||
|
else:
|
||||||
|
self.stuck_steps = 0
|
||||||
|
|
||||||
|
battery_ratio = self.battery / max(self.battery_max, 1)
|
||||||
|
battery_increased = self.battery > self.prev_battery + 1
|
||||||
|
maybe_fake_charger = self.on_charger and battery_ratio < 0.9 and self.charge_delta <= 0 and not battery_increased
|
||||||
|
self.fake_charger_steps = self.fake_charger_steps + 1 if maybe_fake_charger else 0
|
||||||
|
|
||||||
|
no_progress = (
|
||||||
|
self.recharge_mode
|
||||||
|
and self.has_charger
|
||||||
|
and self.charge_delta <= 0
|
||||||
|
and not battery_increased
|
||||||
|
and self.nearest_charger_path_dist >= self.last_nearest_charger_path_dist - 0.1
|
||||||
|
)
|
||||||
|
self.recharge_no_progress_steps = self.recharge_no_progress_steps + 1 if no_progress else 0
|
||||||
|
|
||||||
|
if self.step_no > 0 and self.nearest_npc_dist <= 3:
|
||||||
|
self.npc_close_steps += 1
|
||||||
|
if self.step_no > 0 and self.npc_danger:
|
||||||
|
self.npc_danger_steps += 1
|
||||||
|
|
||||||
|
if self.terminated and not self.truncated:
|
||||||
|
if self.battery <= 0 or self.remaining_charge <= 0:
|
||||||
|
self.battery_fail = 1
|
||||||
|
if self.npc_danger or self.nearest_npc_dist <= 1:
|
||||||
|
self.npc_collision = 1
|
||||||
|
|
||||||
|
def _need_recharge_escape(self):
|
||||||
|
return self.stuck_steps >= 2 or self.recharge_no_progress_steps >= 5 or self.fake_charger_steps >= 2
|
||||||
|
|
||||||
def _charger_safety_buffer(self):
|
def _charger_safety_buffer(self):
|
||||||
# One move roughly costs one charge; reserve extra for detours, local obstacles, and policy noise.
|
# One move roughly costs one charge; reserve extra for detours, local obstacles, and policy noise.
|
||||||
base = max(24.0, 0.16 * float(self.battery_max))
|
base = max(24.0, 0.16 * float(self.battery_max))
|
||||||
@@ -613,8 +664,10 @@ class Preprocessor:
|
|||||||
"""
|
"""
|
||||||
legal = self._filter_blocked_actions(self._legal_act)
|
legal = self._filter_blocked_actions(self._legal_act)
|
||||||
legal = self._filter_npc_danger_actions(legal)
|
legal = self._filter_npc_danger_actions(legal)
|
||||||
|
safe_legal = list(legal)
|
||||||
if self.recharge_mode:
|
if self.recharge_mode:
|
||||||
legal = self._filter_recharge_actions(legal)
|
legal = self._filter_recharge_actions(legal)
|
||||||
|
legal = self._filter_recharge_escape_actions(legal, safe_legal)
|
||||||
elif self.on_charger and self.battery / max(self.battery_max, 1) > 0.65:
|
elif self.on_charger and self.battery / max(self.battery_max, 1) > 0.65:
|
||||||
legal = self._filter_leave_charger_actions(legal)
|
legal = self._filter_leave_charger_actions(legal)
|
||||||
return list(legal)
|
return list(legal)
|
||||||
@@ -695,7 +748,8 @@ class Preprocessor:
|
|||||||
|
|
||||||
# When already inside the charger range, stay inside until recharge mode exits.
|
# When already inside the charger range, stay inside until recharge mode exits.
|
||||||
# 已经在充电区域内时,回充模式退出前不要离开充电区域。
|
# 已经在充电区域内时,回充模式退出前不要离开充电区域。
|
||||||
if current_range_dist <= 0.0:
|
confirmed_charger = self.charge_delta > 0 or self.battery > self.prev_battery + 1
|
||||||
|
if current_range_dist <= 0.0 and confirmed_charger:
|
||||||
stay = [0] * 8
|
stay = [0] * 8
|
||||||
for _, _, next_range_dist, action in scored:
|
for _, _, next_range_dist, action in scored:
|
||||||
if next_range_dist <= 0.0:
|
if next_range_dist <= 0.0:
|
||||||
@@ -718,6 +772,40 @@ class Preprocessor:
|
|||||||
|
|
||||||
return recharge if any(recharge) else list(legal_action)
|
return recharge if any(recharge) else list(legal_action)
|
||||||
|
|
||||||
|
def _filter_recharge_escape_actions(self, recharge_action, safe_action):
|
||||||
|
"""Escape repeated no-move states during low-battery recharge."""
|
||||||
|
if not self._need_recharge_escape():
|
||||||
|
return list(recharge_action)
|
||||||
|
|
||||||
|
hx, hz = self.cur_pos
|
||||||
|
current_dist = self._charger_move_distance(hx, hz)
|
||||||
|
ranked = []
|
||||||
|
for action, (dx, dz) in enumerate(self.ACTION_DIRS):
|
||||||
|
if safe_action[action] <= 0:
|
||||||
|
continue
|
||||||
|
nx, nz = hx + dx, hz + dz
|
||||||
|
next_dist = self._charger_move_distance(nx, nz)
|
||||||
|
visit_count = 0
|
||||||
|
if 0 <= nx < self.GRID_SIZE and 0 <= nz < self.GRID_SIZE:
|
||||||
|
visit_count = int(self.visit_count_map[nx, nz])
|
||||||
|
failed_action_penalty = 6.0 if action == self.last_action and self.stuck_steps >= 2 else 0.0
|
||||||
|
no_progress_penalty = 1.5 if next_dist > current_dist + 0.1 else 0.0
|
||||||
|
ranked.append((next_dist + 0.05 * min(visit_count, 20) + failed_action_penalty + no_progress_penalty, action))
|
||||||
|
|
||||||
|
if not ranked:
|
||||||
|
return list(recharge_action)
|
||||||
|
|
||||||
|
self.recharge_escape_count += 1
|
||||||
|
ranked.sort()
|
||||||
|
escape = [0] * 8
|
||||||
|
for _, action in ranked[: min(4, len(ranked))]:
|
||||||
|
escape[action] = 1
|
||||||
|
|
||||||
|
if self.stuck_steps >= 2 and sum(escape) > 1 and 0 <= self.last_action < 8:
|
||||||
|
escape[self.last_action] = 0
|
||||||
|
|
||||||
|
return escape if any(escape) else list(recharge_action)
|
||||||
|
|
||||||
def _filter_leave_charger_actions(self, legal_action):
|
def _filter_leave_charger_actions(self, legal_action):
|
||||||
"""Prefer moves that leave charger range when battery is healthy."""
|
"""Prefer moves that leave charger range when battery is healthy."""
|
||||||
if not self.has_charger:
|
if not self.has_charger:
|
||||||
@@ -826,6 +914,8 @@ class Preprocessor:
|
|||||||
stuck_penalty = 0.0
|
stuck_penalty = 0.0
|
||||||
if self.prev_pos is not None and self.cur_pos == self.prev_pos and 0 <= self.last_action < 8:
|
if self.prev_pos is not None and self.cur_pos == self.prev_pos and 0 <= self.last_action < 8:
|
||||||
stuck_penalty = -0.03
|
stuck_penalty = -0.03
|
||||||
|
if self.recharge_mode:
|
||||||
|
stuck_penalty -= 0.02 * min(self.stuck_steps, 5)
|
||||||
|
|
||||||
npc_penalty = 0.0
|
npc_penalty = 0.0
|
||||||
if self.npc_danger:
|
if self.npc_danger:
|
||||||
|
|||||||
@@ -177,6 +177,12 @@ class EpisodeRunner:
|
|||||||
f"remaining_charge:{remaining_charge} "
|
f"remaining_charge:{remaining_charge} "
|
||||||
f"charge_count:{charge_count} "
|
f"charge_count:{charge_count} "
|
||||||
f"recharge_steps:{fm.recharge_steps} "
|
f"recharge_steps:{fm.recharge_steps} "
|
||||||
|
f"stuck_count:{fm.stuck_count} "
|
||||||
|
f"max_stuck_steps:{fm.max_stuck_steps} "
|
||||||
|
f"recharge_escape_count:{fm.recharge_escape_count} "
|
||||||
|
f"npc_close_steps:{fm.npc_close_steps} "
|
||||||
|
f"npc_danger_steps:{fm.npc_danger_steps} "
|
||||||
|
f"npc_collision:{fm.npc_collision} "
|
||||||
f"nearest_charger:{fm.nearest_charger_range_dist:.1f} "
|
f"nearest_charger:{fm.nearest_charger_range_dist:.1f} "
|
||||||
f"nearest_npc:{fm.nearest_npc_dist:.1f} "
|
f"nearest_npc:{fm.nearest_npc_dist:.1f} "
|
||||||
f"result_code:{result_code} "
|
f"result_code:{result_code} "
|
||||||
@@ -215,6 +221,16 @@ class EpisodeRunner:
|
|||||||
os.getpid(): {
|
os.getpid(): {
|
||||||
"reward": total_reward + final_reward,
|
"reward": total_reward + final_reward,
|
||||||
"episode_cnt": self.episode_cnt,
|
"episode_cnt": self.episode_cnt,
|
||||||
|
"total_score": float(total_score),
|
||||||
|
"stuck_count": float(fm.stuck_count),
|
||||||
|
"max_stuck_steps": float(fm.max_stuck_steps),
|
||||||
|
"recharge_escape_count": float(fm.recharge_escape_count),
|
||||||
|
"npc_close_steps": float(fm.npc_close_steps),
|
||||||
|
"npc_danger_steps": float(fm.npc_danger_steps),
|
||||||
|
"npc_collision": float(fm.npc_collision),
|
||||||
|
"battery_fail": float(fm.battery_fail),
|
||||||
|
"charge_count": float(charge_count),
|
||||||
|
"remaining_charge": float(remaining_charge),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user