Initial robot vacuum code
This commit is contained in:
0
agent_ppo/conf/__init__.py
Normal file
0
agent_ppo/conf/__init__.py
Normal file
49
agent_ppo/conf/conf.py
Normal file
49
agent_ppo/conf/conf.py
Normal file
@@ -0,0 +1,49 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: UTF-8 -*-
|
||||
###########################################################################
|
||||
# Copyright © 1998 - 2026 Tencent. All Rights Reserved.
|
||||
###########################################################################
|
||||
"""
|
||||
Author: Tencent AI Arena Authors
|
||||
|
||||
Configuration for Robot Vacuum PPO agent.
|
||||
清扫大作战 PPO 配置。
|
||||
"""
|
||||
|
||||
|
||||
class Config:
|
||||
|
||||
# Feature dimensions (69D)
|
||||
# 特征维度(69D)
|
||||
FEATURES = [
|
||||
7 * 7,
|
||||
12,
|
||||
8,
|
||||
]
|
||||
FEATURE_SPLIT_SHAPE = FEATURES
|
||||
FEATURE_LEN = sum(FEATURES)
|
||||
DIM_OF_OBSERVATION = FEATURE_LEN
|
||||
|
||||
# Action space: 8 directional moves
|
||||
# 动作空间:8个方向移动
|
||||
ACTION_NUM = 8
|
||||
|
||||
# Single-head value
|
||||
# 单头价值
|
||||
VALUE_NUM = 1
|
||||
|
||||
# PPO hyperparameters
|
||||
# PPO 超参数
|
||||
GAMMA = 0.99
|
||||
LAMDA = 0.95
|
||||
|
||||
INIT_LEARNING_RATE_START = 0.0003
|
||||
BETA_START = 0.001
|
||||
CLIP_PARAM = 0.2
|
||||
VF_COEF = 0.5
|
||||
|
||||
LABEL_SIZE_LIST = [ACTION_NUM]
|
||||
LEGAL_ACTION_SIZE_LIST = LABEL_SIZE_LIST.copy()
|
||||
|
||||
USE_GRAD_CLIP = True
|
||||
GRAD_CLIP_RANGE = 0.5
|
||||
83
agent_ppo/conf/monitor_builder.py
Normal file
83
agent_ppo/conf/monitor_builder.py
Normal file
@@ -0,0 +1,83 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: UTF-8 -*-
|
||||
###########################################################################
|
||||
# Copyright © 1998 - 2026 Tencent. All Rights Reserved.
|
||||
###########################################################################
|
||||
"""
|
||||
Author: Tencent AI Arena Authors
|
||||
|
||||
Monitor panel configuration builder for Robot Vacuum.
|
||||
清扫大作战监控面板配置构建器。
|
||||
"""
|
||||
|
||||
|
||||
from kaiwudrl.common.monitor.monitor_config_builder import MonitorConfigBuilder
|
||||
|
||||
|
||||
def build_monitor():
|
||||
"""
|
||||
# This function is used to create monitoring panel configurations for custom indicators.
|
||||
# 该函数用于创建自定义指标的监控面板配置。
|
||||
"""
|
||||
monitor = MonitorConfigBuilder()
|
||||
|
||||
config_dict = (
|
||||
monitor.title("清扫大作战")
|
||||
.add_group(
|
||||
group_name="算法指标",
|
||||
group_name_en="algorithm",
|
||||
)
|
||||
.add_panel(
|
||||
name="累积回报",
|
||||
name_en="reward",
|
||||
type="line",
|
||||
)
|
||||
.add_metric(
|
||||
metrics_name="reward",
|
||||
expr="avg(reward{})",
|
||||
)
|
||||
.end_panel()
|
||||
.add_panel(
|
||||
name="总损失",
|
||||
name_en="total_loss",
|
||||
type="line",
|
||||
)
|
||||
.add_metric(
|
||||
metrics_name="total_loss",
|
||||
expr="avg(total_loss{})",
|
||||
)
|
||||
.end_panel()
|
||||
.add_panel(
|
||||
name="价值损失",
|
||||
name_en="value_loss",
|
||||
type="line",
|
||||
)
|
||||
.add_metric(
|
||||
metrics_name="value_loss",
|
||||
expr="avg(value_loss{})",
|
||||
)
|
||||
.end_panel()
|
||||
.add_panel(
|
||||
name="策略损失",
|
||||
name_en="policy_loss",
|
||||
type="line",
|
||||
)
|
||||
.add_metric(
|
||||
metrics_name="policy_loss",
|
||||
expr="avg(policy_loss{})",
|
||||
)
|
||||
.end_panel()
|
||||
.add_panel(
|
||||
name="熵损失",
|
||||
name_en="entropy_loss",
|
||||
type="line",
|
||||
)
|
||||
.add_metric(
|
||||
metrics_name="entropy_loss",
|
||||
expr="avg(entropy_loss{})",
|
||||
)
|
||||
.end_panel()
|
||||
.end_group()
|
||||
.build()
|
||||
)
|
||||
return config_dict
|
||||
26
agent_ppo/conf/train_env_conf.toml
Normal file
26
agent_ppo/conf/train_env_conf.toml
Normal file
@@ -0,0 +1,26 @@
|
||||
[env_conf]
|
||||
# Maps used for training. Customize by keeping only desired map IDs, e.g. [1, 2] for maps 1 and 2.
|
||||
# 训练使用的地图。可自定义选择期望用来训练的地图,如只期望使用1、2号地图训练数组内仅保留[1,2]即可。
|
||||
map = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
|
||||
|
||||
# Whether to randomly select maps. Boolean.
|
||||
# true = randomly pick one from configured maps per episode, false = used sequentially.
|
||||
# 是否随机抽取地图。布尔值。true表示每局从配置的地图中随机抽取一张,false表示按顺序抽取地图训练。
|
||||
map_random = false
|
||||
|
||||
# Number of official robots. Range: 1~4 (integer).
|
||||
# In each round, official robots will be randomly generated on the road according to the configured.
|
||||
# 官方机器人数量。可配置范围为1~4(整数)。每局将按照配置数量在道路上随机生成官方机器人。
|
||||
robot_count = 4
|
||||
|
||||
# Number of chargers. Range: 1~4 (integer). When less than 4, spawn points are randomly chosen.
|
||||
# 充电桩数量。可配置范围为1~4(整数)。当配置小于4时,将从每张地图可生成充电桩的点位随机选择对应数量的点位生成。
|
||||
charger_count = 4
|
||||
|
||||
# Maximum steps. The task ends when the predicted steps in a single round reach the maximum. Range: 1~2000.
|
||||
# 最大步数。单局任务预测步数达到最大步数时,任务结束。可配置范围为1~2000。
|
||||
max_step = 1000
|
||||
|
||||
# Maximum battery. The battery level when fully charged. Range: 100~999.
|
||||
# 最大电量。满电状态下的电量。可配置范围100~999。
|
||||
battery_max = 200
|
||||
Reference in New Issue
Block a user