Improve PPO diagnostics and recharge behavior

2026-04-26 20:24:26 +08:00
parent 5b6133db13
commit 69b8a692db
6 changed files with 463 additions and 31 deletions
--- a/agent_ppo/conf/conf.py
+++ b/agent_ppo/conf/conf.py
@@ -50,6 +50,11 @@ class Config:
    NORMALIZE_ADVANTAGE = True
    TARGET_KL = 0.04

+    # Evaluation tie-break: when policy probabilities are close, prefer safer
+    # coverage/recharge actions with a lightweight heuristic.
+    EVAL_TIE_BREAK_PROB_GAP = 0.015
+    EVAL_TIE_BREAK_SCORE_SCALE = 0.01
+
    LABEL_SIZE_LIST = [ACTION_NUM]
    LEGAL_ACTION_SIZE_LIST = LABEL_SIZE_LIST.copy()