This repository has been archived on 2026-05-02. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
-----/agent_ppo/conf/conf.py
2026-04-26 12:46:00 +08:00

56 lines
1.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
###########################################################################
# Copyright © 1998 - 2026 Tencent. All Rights Reserved.
###########################################################################
"""
Author: Tencent AI Arena Authors
Configuration for Robot Vacuum PPO agent.
清扫大作战 PPO 配置。
"""
class Config:
# Feature dimensions (69D)
# 特征维度69D
FEATURES = [
7 * 7,
12,
8,
]
FEATURE_SPLIT_SHAPE = FEATURES
FEATURE_LEN = sum(FEATURES)
DIM_OF_OBSERVATION = FEATURE_LEN
# Action space: 8 directional moves
# 动作空间8个方向移动
ACTION_NUM = 8
# Single-head value
# 单头价值
VALUE_NUM = 1
# PPO hyperparameters
# PPO 超参数
GAMMA = 0.99
LAMDA = 0.95
INIT_LEARNING_RATE_START = 0.00025
BETA_START = 0.008
BETA_END = 0.002
BETA_DECAY_STEPS = 4000
CLIP_PARAM = 0.2
VF_COEF = 0.5
PPO_EPOCHS = 3
MINI_BATCH_SIZE = 256
NORMALIZE_ADVANTAGE = True
TARGET_KL = 0.04
LABEL_SIZE_LIST = [ACTION_NUM]
LEGAL_ACTION_SIZE_LIST = LABEL_SIZE_LIST.copy()
USE_GRAD_CLIP = True
GRAD_CLIP_RANGE = 0.5