diff --git a/agent_ppo/conf/monitor_builder.py b/agent_ppo/conf/monitor_builder.py index 049c0ea..9d8ece6 100644 --- a/agent_ppo/conf/monitor_builder.py +++ b/agent_ppo/conf/monitor_builder.py @@ -98,6 +98,81 @@ def build_monitor(): ) .end_panel() .end_group() + .add_group( + group_name="行为指标", + group_name_en="behavior", + ) + .add_panel( + name="原地撞墙步数", + name_en="stuck", + type="line", + ) + .add_metric( + metrics_name="stuck_count", + expr="avg(stuck_count{})", + ) + .add_metric( + metrics_name="max_stuck_steps", + expr="avg(max_stuck_steps{})", + ) + .end_panel() + .add_panel( + name="回充逃脱次数", + name_en="recharge_escape", + type="line", + ) + .add_metric( + metrics_name="recharge_escape_count", + expr="avg(recharge_escape_count{})", + ) + .end_panel() + .add_panel( + name="NPC危险接近", + name_en="npc_danger", + type="line", + ) + .add_metric( + metrics_name="npc_close_steps", + expr="avg(npc_close_steps{})", + ) + .add_metric( + metrics_name="npc_danger_steps", + expr="avg(npc_danger_steps{})", + ) + .end_panel() + .add_panel( + name="失败类型", + name_en="failure_type", + type="line", + ) + .add_metric( + metrics_name="npc_collision", + expr="avg(npc_collision{})", + ) + .add_metric( + metrics_name="battery_fail", + expr="avg(battery_fail{})", + ) + .end_panel() + .add_panel( + name="得分与充电", + name_en="score_charge", + type="line", + ) + .add_metric( + metrics_name="total_score", + expr="avg(total_score{})", + ) + .add_metric( + metrics_name="charge_count", + expr="avg(charge_count{})", + ) + .add_metric( + metrics_name="remaining_charge", + expr="avg(remaining_charge{})", + ) + .end_panel() + .end_group() .build() ) return config_dict diff --git a/agent_ppo/feature/preprocessor.py b/agent_ppo/feature/preprocessor.py index cdcdd19..7121dde 100644 --- a/agent_ppo/feature/preprocessor.py +++ b/agent_ppo/feature/preprocessor.py @@ -80,6 +80,9 @@ class Preprocessor: self.stuck_steps = 0 self.recharge_no_progress_steps = 0 self.fake_charger_steps = 0 + self.stuck_count = 0 + self.max_stuck_steps = 0 + self.recharge_escape_count = 0 self.dirt_cleaned = 0 self.last_dirt_cleaned = 0 @@ -138,6 +141,10 @@ class Preprocessor: self.nearest_npc_dist = float(self.GRID_SIZE) self.npc_danger = False self.npcs = [] + self.npc_close_steps = 0 + self.npc_danger_steps = 0 + self.npc_collision = 0 + self.battery_fail = 0 self.local_dirt_ratio = 0.0 self.local_obstacle_ratio = 0.0 @@ -412,6 +419,8 @@ class Preprocessor: if self.prev_pos is not None and self.cur_pos == self.prev_pos and 0 <= self.last_action < 8: if self.charge_delta <= 0: self.stuck_steps += 1 + self.stuck_count += 1 + self.max_stuck_steps = max(self.max_stuck_steps, self.stuck_steps) else: self.stuck_steps = 0 else: @@ -431,6 +440,17 @@ class Preprocessor: ) self.recharge_no_progress_steps = self.recharge_no_progress_steps + 1 if no_progress else 0 + if self.step_no > 0 and self.nearest_npc_dist <= 3: + self.npc_close_steps += 1 + if self.step_no > 0 and self.npc_danger: + self.npc_danger_steps += 1 + + if self.terminated and not self.truncated: + if self.battery <= 0 or self.remaining_charge <= 0: + self.battery_fail = 1 + if self.npc_danger or self.nearest_npc_dist <= 1: + self.npc_collision = 1 + def _need_recharge_escape(self): return self.stuck_steps >= 2 or self.recharge_no_progress_steps >= 5 or self.fake_charger_steps >= 2 @@ -775,6 +795,7 @@ class Preprocessor: if not ranked: return list(recharge_action) + self.recharge_escape_count += 1 ranked.sort() escape = [0] * 8 for _, action in ranked[: min(4, len(ranked))]: diff --git a/agent_ppo/workflow/train_workflow.py b/agent_ppo/workflow/train_workflow.py index f24dc7b..14a73aa 100644 --- a/agent_ppo/workflow/train_workflow.py +++ b/agent_ppo/workflow/train_workflow.py @@ -177,6 +177,12 @@ class EpisodeRunner: f"remaining_charge:{remaining_charge} " f"charge_count:{charge_count} " f"recharge_steps:{fm.recharge_steps} " + f"stuck_count:{fm.stuck_count} " + f"max_stuck_steps:{fm.max_stuck_steps} " + f"recharge_escape_count:{fm.recharge_escape_count} " + f"npc_close_steps:{fm.npc_close_steps} " + f"npc_danger_steps:{fm.npc_danger_steps} " + f"npc_collision:{fm.npc_collision} " f"nearest_charger:{fm.nearest_charger_range_dist:.1f} " f"nearest_npc:{fm.nearest_npc_dist:.1f} " f"result_code:{result_code} " @@ -215,6 +221,16 @@ class EpisodeRunner: os.getpid(): { "reward": total_reward + final_reward, "episode_cnt": self.episode_cnt, + "total_score": float(total_score), + "stuck_count": float(fm.stuck_count), + "max_stuck_steps": float(fm.max_stuck_steps), + "recharge_escape_count": float(fm.recharge_escape_count), + "npc_close_steps": float(fm.npc_close_steps), + "npc_danger_steps": float(fm.npc_danger_steps), + "npc_collision": float(fm.npc_collision), + "battery_fail": float(fm.battery_fail), + "charge_count": float(charge_count), + "remaining_charge": float(remaining_charge), } } )