Optimize PPO short-run training

This commit is contained in:
2026-04-26 12:46:00 +08:00
parent ca6234c941
commit eb3efa4df7
5 changed files with 153 additions and 41 deletions

View File

@@ -37,10 +37,16 @@ class Config:
GAMMA = 0.99
LAMDA = 0.95
INIT_LEARNING_RATE_START = 0.0003
BETA_START = 0.001
INIT_LEARNING_RATE_START = 0.00025
BETA_START = 0.008
BETA_END = 0.002
BETA_DECAY_STEPS = 4000
CLIP_PARAM = 0.2
VF_COEF = 0.5
PPO_EPOCHS = 3
MINI_BATCH_SIZE = 256
NORMALIZE_ADVANTAGE = True
TARGET_KL = 0.04
LABEL_SIZE_LIST = [ACTION_NUM]
LEGAL_ACTION_SIZE_LIST = LABEL_SIZE_LIST.copy()