Improve PPO diagnostics and recharge behavior

This commit is contained in:
2026-04-26 20:24:26 +08:00
parent 5b6133db13
commit 69b8a692db
6 changed files with 463 additions and 31 deletions

View File

@@ -50,6 +50,11 @@ class Config:
NORMALIZE_ADVANTAGE = True
TARGET_KL = 0.04
# Evaluation tie-break: when policy probabilities are close, prefer safer
# coverage/recharge actions with a lightweight heuristic.
EVAL_TIE_BREAK_PROB_GAP = 0.015
EVAL_TIE_BREAK_SCORE_SCALE = 0.01
LABEL_SIZE_LIST = [ACTION_NUM]
LEGAL_ACTION_SIZE_LIST = LABEL_SIZE_LIST.copy()

View File

@@ -125,6 +125,10 @@ def build_monitor():
metrics_name="recharge_escape_count",
expr="avg(recharge_escape_count{})",
)
.add_metric(
metrics_name="recharge_steps",
expr="avg(recharge_steps{})",
)
.end_panel()
.add_panel(
name="NPC危险接近",
@@ -172,6 +176,42 @@ def build_monitor():
expr="avg(remaining_charge{})",
)
.end_panel()
.add_panel(
name="动作掩码健康",
name_en="mask_health",
type="line",
)
.add_metric(
metrics_name="mask_final_avg",
expr="avg(mask_final_avg{})",
)
.add_metric(
metrics_name="mask_one_action_steps",
expr="avg(mask_one_action_steps{})",
)
.add_metric(
metrics_name="mask_two_or_less_action_steps",
expr="avg(mask_two_or_less_action_steps{})",
)
.add_metric(
metrics_name="mask_zero_final_steps",
expr="avg(mask_zero_final_steps{})",
)
.end_panel()
.add_panel(
name="回充动作掩码",
name_en="recharge_mask",
type="line",
)
.add_metric(
metrics_name="mask_recharge_active",
expr="avg(mask_recharge_active{})",
)
.add_metric(
metrics_name="mask_recharge_changed",
expr="avg(mask_recharge_changed{})",
)
.end_panel()
.end_group()
.build()
)