#!/usr/bin/env python3 # -*- coding: UTF-8 -*- ########################################################################### # Copyright © 1998 - 2026 Tencent. All Rights Reserved. ########################################################################### """ Author: Tencent AI Arena Authors Configuration for Robot Vacuum PPO agent. 清扫大作战 PPO 配置。 """ class Config: # Feature dimensions (157D) # 特征维度(157D) FEATURES = [ 11 * 11, # wider local map view / 更大的局部地图视野 28, # global, charger, NPC, and map-stat features / 全局、充电桩、NPC、地图统计特征 8, # last action one-hot / 上一步动作 one-hot ] FEATURE_SPLIT_SHAPE = FEATURES FEATURE_LEN = sum(FEATURES) DIM_OF_OBSERVATION = FEATURE_LEN # Action space: 8 directional moves # 动作空间:8个方向移动 ACTION_NUM = 8 # Single-head value # 单头价值 VALUE_NUM = 1 # PPO hyperparameters # PPO 超参数 GAMMA = 0.99 LAMDA = 0.95 INIT_LEARNING_RATE_START = 0.00025 BETA_START = 0.008 BETA_END = 0.002 BETA_DECAY_STEPS = 4000 CLIP_PARAM = 0.2 VF_COEF = 0.5 PPO_EPOCHS = 3 MINI_BATCH_SIZE = 256 NORMALIZE_ADVANTAGE = True TARGET_KL = 0.04 LABEL_SIZE_LIST = [ACTION_NUM] LEGAL_ACTION_SIZE_LIST = LABEL_SIZE_LIST.copy() USE_GRAD_CLIP = True GRAD_CLIP_RANGE = 0.5