This repository has been archived on 2026-05-02. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
-----/agent_ppo/conf/conf.py

63 lines
1.7 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
###########################################################################
# Copyright © 1998 - 2026 Tencent. All Rights Reserved.
###########################################################################
"""
Author: Tencent AI Arena Authors
Configuration for Robot Vacuum PPO agent.
清扫大作战 PPO 配置。
"""
class Config:
# Feature dimensions: 21x21x6 local map + scalar planning features + last action.
# 特征维度21x21x6 多通道局部地图 + 标量规划特征 + 上一步动作。
VIEW_SIZE = 21
MAP_CHANNELS = 6
FEATURES = [
VIEW_SIZE * VIEW_SIZE * MAP_CHANNELS,
66, # global memory, charger, NPC, and action-improvement features
8, # last action one-hot / 上一步动作 one-hot
]
FEATURE_SPLIT_SHAPE = FEATURES
FEATURE_LEN = sum(FEATURES)
DIM_OF_OBSERVATION = FEATURE_LEN
# Action space: 8 directional moves
# 动作空间8个方向移动
ACTION_NUM = 8
# Single-head value
# 单头价值
VALUE_NUM = 1
# PPO hyperparameters
# PPO 超参数
GAMMA = 0.99
LAMDA = 0.95
INIT_LEARNING_RATE_START = 0.00025
BETA_START = 0.008
BETA_END = 0.002
BETA_DECAY_STEPS = 4000
CLIP_PARAM = 0.2
VF_COEF = 0.5
PPO_EPOCHS = 3
MINI_BATCH_SIZE = 256
NORMALIZE_ADVANTAGE = True
TARGET_KL = 0.04
# Evaluation tie-break: when policy probabilities are close, prefer safer
# coverage/recharge actions with a lightweight heuristic.
EVAL_TIE_BREAK_PROB_GAP = 0.015
EVAL_TIE_BREAK_SCORE_SCALE = 0.01
LABEL_SIZE_LIST = [ACTION_NUM]
LEGAL_ACTION_SIZE_LIST = LABEL_SIZE_LIST.copy()
USE_GRAD_CLIP = True
GRAD_CLIP_RANGE = 0.5