This repository has been archived on 2026-05-02. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
-----/agent_ppo/conf/conf.py
gqt efbc612945 优化PPO充电与避障策略
扩展观测特征到157维,加入充电桩、NPC、电量安全余量、地图统计和本步清扫信息。

增加低电量回充动作过滤、NPC危险区过滤,并调整奖励和终局日志以突出充电、避障和真实清扫得分。
2026-04-26 14:14:18 +08:00

56 lines
1.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
###########################################################################
# Copyright © 1998 - 2026 Tencent. All Rights Reserved.
###########################################################################
"""
Author: Tencent AI Arena Authors
Configuration for Robot Vacuum PPO agent.
清扫大作战 PPO 配置。
"""
class Config:
# Feature dimensions (157D)
# 特征维度157D
FEATURES = [
11 * 11, # wider local map view / 更大的局部地图视野
28, # global, charger, NPC, and map-stat features / 全局、充电桩、NPC、地图统计特征
8, # last action one-hot / 上一步动作 one-hot
]
FEATURE_SPLIT_SHAPE = FEATURES
FEATURE_LEN = sum(FEATURES)
DIM_OF_OBSERVATION = FEATURE_LEN
# Action space: 8 directional moves
# 动作空间8个方向移动
ACTION_NUM = 8
# Single-head value
# 单头价值
VALUE_NUM = 1
# PPO hyperparameters
# PPO 超参数
GAMMA = 0.99
LAMDA = 0.95
INIT_LEARNING_RATE_START = 0.00025
BETA_START = 0.008
BETA_END = 0.002
BETA_DECAY_STEPS = 4000
CLIP_PARAM = 0.2
VF_COEF = 0.5
PPO_EPOCHS = 3
MINI_BATCH_SIZE = 256
NORMALIZE_ADVANTAGE = True
TARGET_KL = 0.04
LABEL_SIZE_LIST = [ACTION_NUM]
LEGAL_ACTION_SIZE_LIST = LABEL_SIZE_LIST.copy()
USE_GRAD_CLIP = True
GRAD_CLIP_RANGE = 0.5