Optimize PPO short-run training
This commit is contained in:
@@ -37,10 +37,16 @@ class Config:
|
||||
GAMMA = 0.99
|
||||
LAMDA = 0.95
|
||||
|
||||
INIT_LEARNING_RATE_START = 0.0003
|
||||
BETA_START = 0.001
|
||||
INIT_LEARNING_RATE_START = 0.00025
|
||||
BETA_START = 0.008
|
||||
BETA_END = 0.002
|
||||
BETA_DECAY_STEPS = 4000
|
||||
CLIP_PARAM = 0.2
|
||||
VF_COEF = 0.5
|
||||
PPO_EPOCHS = 3
|
||||
MINI_BATCH_SIZE = 256
|
||||
NORMALIZE_ADVANTAGE = True
|
||||
TARGET_KL = 0.04
|
||||
|
||||
LABEL_SIZE_LIST = [ACTION_NUM]
|
||||
LEGAL_ACTION_SIZE_LIST = LABEL_SIZE_LIST.copy()
|
||||
|
||||
@@ -77,6 +77,26 @@ def build_monitor():
|
||||
expr="avg(entropy_loss{})",
|
||||
)
|
||||
.end_panel()
|
||||
.add_panel(
|
||||
name="近似KL",
|
||||
name_en="approx_kl",
|
||||
type="line",
|
||||
)
|
||||
.add_metric(
|
||||
metrics_name="approx_kl",
|
||||
expr="avg(approx_kl{})",
|
||||
)
|
||||
.end_panel()
|
||||
.add_panel(
|
||||
name="裁剪比例",
|
||||
name_en="clip_fraction",
|
||||
type="line",
|
||||
)
|
||||
.add_metric(
|
||||
metrics_name="clip_fraction",
|
||||
expr="avg(clip_fraction{})",
|
||||
)
|
||||
.end_panel()
|
||||
.end_group()
|
||||
.build()
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user