Optimize PPO short-run training
This commit is contained in:
@@ -37,10 +37,16 @@ class Config:
|
||||
GAMMA = 0.99
|
||||
LAMDA = 0.95
|
||||
|
||||
INIT_LEARNING_RATE_START = 0.0003
|
||||
BETA_START = 0.001
|
||||
INIT_LEARNING_RATE_START = 0.00025
|
||||
BETA_START = 0.008
|
||||
BETA_END = 0.002
|
||||
BETA_DECAY_STEPS = 4000
|
||||
CLIP_PARAM = 0.2
|
||||
VF_COEF = 0.5
|
||||
PPO_EPOCHS = 3
|
||||
MINI_BATCH_SIZE = 256
|
||||
NORMALIZE_ADVANTAGE = True
|
||||
TARGET_KL = 0.04
|
||||
|
||||
LABEL_SIZE_LIST = [ACTION_NUM]
|
||||
LEGAL_ACTION_SIZE_LIST = LABEL_SIZE_LIST.copy()
|
||||
|
||||
Reference in New Issue
Block a user