优化 PPO 清扫策略

This commit is contained in:
2026-04-26 17:29:03 +08:00
parent f04feb0cd9
commit f44e2483fc
6 changed files with 223 additions and 86 deletions

View File

@@ -6,7 +6,7 @@ map = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
# Whether to randomly select maps. Boolean.
# true = randomly pick one from configured maps per episode, false = used sequentially.
# 是否随机抽取地图。布尔值。true表示每局从配置的地图中随机抽取一张false表示按顺序抽取地图训练。
map_random = false
map_random = true
# Number of official robots. Range: 1~4 (integer).
# In each round, official robots will be randomly generated on the road according to the configured.
@@ -23,4 +23,4 @@ max_step = 1000
# Maximum battery. The battery level when fully charged. Range: 100~999.
# 最大电量。满电状态下的电量。可配置范围100999。
battery_max = 200
battery_max = 200