Initial robot vacuum code

This commit is contained in:
2026-04-26 12:38:39 +08:00
commit ca6234c941
38 changed files with 1673 additions and 0 deletions

View File

43
agent_diy/conf/conf.py Normal file
View File

@@ -0,0 +1,43 @@
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
###########################################################################
# Copyright © 1998 - 2026 Tencent. All Rights Reserved.
###########################################################################
"""
Author: Tencent AI Arena Authors
"""
import numpy as np
# Configuration, including dimension settings and algorithm parameter settings.
# 配置,包含维度设置,算法参数设置
class Config:
# Whether to use CNN networks
# 是否使用CNN网络
USE_CNN = False
VIEW_SIZE = 50 if USE_CNN else 0
FEATURE_VECTOR_SHAPE = (153,)
FEATURE_IMAGE_SHAPE = (4, VIEW_SIZE + 1, VIEW_SIZE + 1)
ACTION_SHAPE = (8,)
VALUE_SHAPE = (1,)
# Discount factor GAMMA in RL
# RL中的回报折扣GAMMA
GAMMA = 0.95
# Initial learning rate
# 初始的学习率
START_LR = 5e-4
# Value function loss coefficient
# 价值函数损失系数
VALUE_LOSS_COEFF = 0.5
# Entropy regularization coefficient
# 熵正则化系数
ENTROPY_LOSS_COEFF = 0.025

View File

@@ -0,0 +1,83 @@
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
###########################################################################
# Copyright © 1998 - 2026 Tencent. All Rights Reserved.
###########################################################################
"""
Author: Tencent AI Arena Authors
Monitor panel configuration builder for Robot Vacuum.
清扫大作战监控面板配置构建器。
"""
from kaiwudrl.common.monitor.monitor_config_builder import MonitorConfigBuilder
def build_monitor():
"""
This function is used to create monitoring panel configurations for custom indicators.
该函数用于创建自定义指标的监控面板配置。
"""
monitor = MonitorConfigBuilder()
config_dict = (
monitor.title("扫地机器人")
.add_group(
group_name="算法指标",
group_name_en="algorithm",
)
.add_panel(
name="累积回报",
name_en="reward",
type="line",
)
.add_metric(
metrics_name="reward",
expr="avg(reward{})",
)
.end_panel()
.add_panel(
name="总损失",
name_en="total_loss",
type="line",
)
.add_metric(
metrics_name="total_loss",
expr="avg(total_loss{})",
)
.end_panel()
.add_panel(
name="价值损失",
name_en="value_loss",
type="line",
)
.add_metric(
metrics_name="value_loss",
expr="avg(value_loss{})",
)
.end_panel()
.add_panel(
name="策略损失",
name_en="policy_loss",
type="line",
)
.add_metric(
metrics_name="policy_loss",
expr="avg(policy_loss{})",
)
.end_panel()
.add_panel(
name="熵损失",
name_en="entropy_loss",
type="line",
)
.add_metric(
metrics_name="entropy_loss",
expr="avg(entropy_loss{})",
)
.end_panel()
.end_group()
.build()
)
return config_dict

View File

@@ -0,0 +1,26 @@
[env_conf]
# Maps used for training. Customize by keeping only desired map IDs, e.g. [1, 2] for maps 1 and 2.
# 训练使用的地图。可自定义选择期望用来训练的地图如只期望使用1、2号地图训练数组内仅保留[1,2]即可。
map = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
# Whether to randomly select maps. Boolean.
# true = randomly pick one from configured maps per episode, false = used sequentially.
# 是否随机抽取地图。布尔值。true表示每局从配置的地图中随机抽取一张false表示按顺序抽取地图训练。
map_random = false
# Number of official robots. Range: 1~4 (integer).
# In each round, official robots will be randomly generated on the road according to the configured.
# 官方机器人数量。可配置范围为14整数。每局将按照配置数量在道路上随机生成官方机器人。
robot_count = 4
# Number of chargers. Range: 1~4 (integer). When less than 4, spawn points are randomly chosen.
# 充电桩数量。可配置范围为14整数。当配置小于4时将从每张地图可生成充电桩的点位随机选择对应数量的点位生成。
charger_count = 4
# Maximum steps. The task ends when the predicted steps in a single round reach the maximum. Range: 1~2000.
# 最大步数。单局任务预测步数达到最大步数时任务结束。可配置范围为12000。
max_step = 1000
# Maximum battery. The battery level when fully charged. Range: 100~999.
# 最大电量。满电状态下的电量。可配置范围100999。
battery_max = 200