Optimize PPO short-run training

This commit is contained in:
2026-04-26 12:46:00 +08:00
parent ca6234c941
commit eb3efa4df7
5 changed files with 153 additions and 41 deletions

View File

@@ -77,6 +77,26 @@ def build_monitor():
expr="avg(entropy_loss{})",
)
.end_panel()
.add_panel(
name="近似KL",
name_en="approx_kl",
type="line",
)
.add_metric(
metrics_name="approx_kl",
expr="avg(approx_kl{})",
)
.end_panel()
.add_panel(
name="裁剪比例",
name_en="clip_fraction",
type="line",
)
.add_metric(
metrics_name="clip_fraction",
expr="avg(clip_fraction{})",
)
.end_panel()
.end_group()
.build()
)