This repository has been archived on 2026-05-02. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
-----/conf/configure_app.toml
2026-04-26 12:38:39 +08:00

70 lines
5.1 KiB
TOML
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
[app]
# Replay buffer configurations
# 样本池容量
replay_buffer_capacity = 10000
# The ratio of the sample pool capacity that triggers training
# 当样本池中的样本占总容量的比例达到该值时,启动训练
preload_ratio = 1.0
# When new samples are added to the sample pool, the logic for removing old samples: reverb.selectors.Lifo, reverb.selectors.Fifo
# 当新样本加入样本池时旧样本的移除逻辑可选项reverb.selectors.Lifo, reverb.selectors.Fifo
# reverb.selectors.Lifo先进后出(Last In, First Out)
# reverb.selectors.Fifo先进先出(First In, First Out)
reverb_remover = "reverb.selectors.Fifo"
# The sampling logic of the Learner from the sample pool: reverb.selectors.Fifo, reverb.selectors.Uniform
# Learner从样本池中采样的逻辑可选项reverb.selectors.Fifo, reverb.selectors.Uniform
# reverb.selectors.UniformSamples are selected uniformly at random from the replay buffer, with each sample having an equal probability of being chosen.
# reverb.selectors.Uniform从回放缓冲区中随机均匀地选择样本每个样本被选中的概率相同。
# reverb.selectors.FifoSamples are selected in the order they were added to the replay buffer.
# reverb.selectors.Fifo按照先进先出从回放缓冲区中选择样本。
reverb_sampler = "reverb.selectors.Uniform"
# Control strategy for balancing data insertion and sampling in experience replay. Options: SampleToInsertRatio, MinSize
# 控制经验回放库中数据插入与采样的动态平衡策略可选项SampleToInsertRatio, MinSize
# How to choose
# 如何选择:
# - SampleToInsertRatio: Use when training is faster than sample generation (e.g. GPU training with few envs)
# 适用于训练速度快于样本产出速度的场景如GPU训练、少量环境数目严格控制每条样本被复用的次数防止过拟合
# - MinSize: Use when sample generation is faster than training (e.g. local CPU training, or many envs)
# 适用于样本产出速度快于训练速度的场景如本地CPU训练、大量环境数目buffer达到阈值后即可全速训练不限制复用次数
# reverb_samples_per_insert: Max sampling times per inserted sample (only for SampleToInsertRatio)
# 参数reverb_samples_per_insert: 每插入1条样本允许采样的最大次数仅SampleToInsertRatio模式生效
# reverb_error_buffer: Tolerance buffer for ratio constraint, similar to TCP sliding window (only for SampleToInsertRatio)
# 参数reverb_error_buffer: 比例限制的弹性缓冲区间类似TCP滑动窗口仅SampleToInsertRatio模式生效
reverb_rate_limiter = "MinSize"
reverb_samples_per_insert = 5
reverb_error_buffer = 5
# Training batch size limit for Learner
# Learner训练时样本批处理大小
train_batch_size = 2048
# Model dump frequency (steps)
# 训练间隔多少步输出模型参数文件
dump_model_freq = 100
# The Learner pushes model updates, and the frequency at which Actors fetch the model (in minutes).
# Learner推送模型参数文件至模型池以及Actor从模型池获取模型参数文件的频次单位分钟
model_file_sync_per_minutes = 1
# The number of model updates pushed per learner iteration, and the maximum number of updates each actor can fetch at once (cap: 50).
# Learner每次推送模型参数文件以及Actor每次获取模型参数文件的数量上限50
modelpool_max_save_model_count = 1
# Whether to enable the preload model function. If enabled (true), the model specified by preload_model_id will be loaded as the initial model in the preload_model_dir directory; if disabled (false), no preloading will be performed.
# 是否启用预加载模型功能,若开启(true)将在preload_model_dir目录下加载由preload_model_id指定的模型作为初始模型若关闭(false),则不进行预加载。
preload_model = false
# The relative path of the preloaded model folder (the variable name {agent_name} refers to the agent_algorithm name directory in the code package). It is only effective when preload_model=true. When the preload model function is enabled, you need to create a new ckpt folder under the agent_algorithm name directory in the code package and place the model file (.pkl) there.
# 预加载模型文件夹相对路径(变量名{agent_name}指代码包中agent_算法名目录)仅在preload_model=true时生效当开启预加载模型功能时需要在代码包中agent_算法名目录下新建ckpt文件夹将模型文件.pkl放置此即可。
preload_model_dir = "{agent_name}/ckpt"
# The identification ID of the preloaded model (here refers to the number of model training steps). This ID corresponds to the number of training steps recorded in the model file name. It only takes effect when preload_model=true.
# Note that it is forbidden to modify the original model file name, otherwise the model preloading process will fail.
# 预加载模型的标识ID这里指模型训练步数该ID对应模型文件名中的训练步数记录。仅在preload_model=true时生效。
# 注意,禁止修改原始模型文件名,否则将导致模型预加载流程失败。
preload_model_id = 1000