#!/usr/bin/env python3 # -*- coding: UTF-8 -*- ########################################################################### # Copyright © 1998 - 2026 Tencent. All Rights Reserved. ########################################################################### """ Author: Tencent AI Arena Authors Configuration for Robot Vacuum PPO agent. 清扫大作战 PPO 配置。 """ class Config: # Feature dimensions: 21x21x6 local map + scalar planning features + last action. # 特征维度:21x21x6 多通道局部地图 + 标量规划特征 + 上一步动作。 VIEW_SIZE = 21 MAP_CHANNELS = 6 FEATURES = [ VIEW_SIZE * VIEW_SIZE * MAP_CHANNELS, 66, # global memory, charger, NPC, and action-improvement features 8, # last action one-hot / 上一步动作 one-hot ] FEATURE_SPLIT_SHAPE = FEATURES FEATURE_LEN = sum(FEATURES) DIM_OF_OBSERVATION = FEATURE_LEN # Action space: 8 directional moves # 动作空间:8个方向移动 ACTION_NUM = 8 # Single-head value # 单头价值 VALUE_NUM = 1 # PPO hyperparameters # PPO 超参数 GAMMA = 0.99 LAMDA = 0.95 INIT_LEARNING_RATE_START = 0.00025 BETA_START = 0.008 BETA_END = 0.002 BETA_DECAY_STEPS = 4000 CLIP_PARAM = 0.2 VF_COEF = 0.5 PPO_EPOCHS = 3 MINI_BATCH_SIZE = 256 NORMALIZE_ADVANTAGE = True TARGET_KL = 0.04 LABEL_SIZE_LIST = [ACTION_NUM] LEGAL_ACTION_SIZE_LIST = LABEL_SIZE_LIST.copy() USE_GRAD_CLIP = True GRAD_CLIP_RANGE = 0.5