Initial robot vacuum code

This commit is contained in:
2026-04-26 12:38:39 +08:00
commit ca6234c941
38 changed files with 1673 additions and 0 deletions

49
agent_ppo/conf/conf.py Normal file
View File

@@ -0,0 +1,49 @@
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
###########################################################################
# Copyright © 1998 - 2026 Tencent. All Rights Reserved.
###########################################################################
"""
Author: Tencent AI Arena Authors
Configuration for Robot Vacuum PPO agent.
清扫大作战 PPO 配置。
"""
class Config:
# Feature dimensions (69D)
# 特征维度69D
FEATURES = [
7 * 7,
12,
8,
]
FEATURE_SPLIT_SHAPE = FEATURES
FEATURE_LEN = sum(FEATURES)
DIM_OF_OBSERVATION = FEATURE_LEN
# Action space: 8 directional moves
# 动作空间8个方向移动
ACTION_NUM = 8
# Single-head value
# 单头价值
VALUE_NUM = 1
# PPO hyperparameters
# PPO 超参数
GAMMA = 0.99
LAMDA = 0.95
INIT_LEARNING_RATE_START = 0.0003
BETA_START = 0.001
CLIP_PARAM = 0.2
VF_COEF = 0.5
LABEL_SIZE_LIST = [ACTION_NUM]
LEGAL_ACTION_SIZE_LIST = LABEL_SIZE_LIST.copy()
USE_GRAD_CLIP = True
GRAD_CLIP_RANGE = 0.5