Improve PPO diagnostics and recharge behavior
This commit is contained in:
@@ -50,6 +50,11 @@ class Config:
|
||||
NORMALIZE_ADVANTAGE = True
|
||||
TARGET_KL = 0.04
|
||||
|
||||
# Evaluation tie-break: when policy probabilities are close, prefer safer
|
||||
# coverage/recharge actions with a lightweight heuristic.
|
||||
EVAL_TIE_BREAK_PROB_GAP = 0.015
|
||||
EVAL_TIE_BREAK_SCORE_SCALE = 0.01
|
||||
|
||||
LABEL_SIZE_LIST = [ACTION_NUM]
|
||||
LEGAL_ACTION_SIZE_LIST = LABEL_SIZE_LIST.copy()
|
||||
|
||||
|
||||
@@ -125,6 +125,10 @@ def build_monitor():
|
||||
metrics_name="recharge_escape_count",
|
||||
expr="avg(recharge_escape_count{})",
|
||||
)
|
||||
.add_metric(
|
||||
metrics_name="recharge_steps",
|
||||
expr="avg(recharge_steps{})",
|
||||
)
|
||||
.end_panel()
|
||||
.add_panel(
|
||||
name="NPC危险接近",
|
||||
@@ -172,6 +176,42 @@ def build_monitor():
|
||||
expr="avg(remaining_charge{})",
|
||||
)
|
||||
.end_panel()
|
||||
.add_panel(
|
||||
name="动作掩码健康",
|
||||
name_en="mask_health",
|
||||
type="line",
|
||||
)
|
||||
.add_metric(
|
||||
metrics_name="mask_final_avg",
|
||||
expr="avg(mask_final_avg{})",
|
||||
)
|
||||
.add_metric(
|
||||
metrics_name="mask_one_action_steps",
|
||||
expr="avg(mask_one_action_steps{})",
|
||||
)
|
||||
.add_metric(
|
||||
metrics_name="mask_two_or_less_action_steps",
|
||||
expr="avg(mask_two_or_less_action_steps{})",
|
||||
)
|
||||
.add_metric(
|
||||
metrics_name="mask_zero_final_steps",
|
||||
expr="avg(mask_zero_final_steps{})",
|
||||
)
|
||||
.end_panel()
|
||||
.add_panel(
|
||||
name="回充动作掩码",
|
||||
name_en="recharge_mask",
|
||||
type="line",
|
||||
)
|
||||
.add_metric(
|
||||
metrics_name="mask_recharge_active",
|
||||
expr="avg(mask_recharge_active{})",
|
||||
)
|
||||
.add_metric(
|
||||
metrics_name="mask_recharge_changed",
|
||||
expr="avg(mask_recharge_changed{})",
|
||||
)
|
||||
.end_panel()
|
||||
.end_group()
|
||||
.build()
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user