优化PPO基于电量安全余量回充

This commit is contained in:
2026-04-26 16:20:02 +08:00
parent 3d0a8122bb
commit 3c3332e126

View File

@@ -114,6 +114,9 @@ class Preprocessor:
self.nearest_charger_dist = float(self.GRID_SIZE) self.nearest_charger_dist = float(self.GRID_SIZE)
self.nearest_charger_range_dist = float(self.GRID_SIZE) self.nearest_charger_range_dist = float(self.GRID_SIZE)
self.last_nearest_charger_range_dist = float(self.GRID_SIZE) self.last_nearest_charger_range_dist = float(self.GRID_SIZE)
self.charger_energy_cost = float(self.GRID_SIZE)
self.charger_safety_buffer = 0.0
self.charger_safety_margin = 0.0
self.battery_margin = 0.0 self.battery_margin = 0.0
self.has_charger = False self.has_charger = False
self.low_battery = False self.low_battery = False
@@ -252,6 +255,9 @@ class Preprocessor:
self.nearest_charger_center_dz = 0.0 self.nearest_charger_center_dz = 0.0
self.nearest_charger_dist = float(self.GRID_SIZE) self.nearest_charger_dist = float(self.GRID_SIZE)
self.nearest_charger_range_dist = float(self.GRID_SIZE) self.nearest_charger_range_dist = float(self.GRID_SIZE)
self.charger_energy_cost = float(self.GRID_SIZE)
self.charger_safety_buffer = 0.0
self.charger_safety_margin = 0.0
self.charger_rects = [] self.charger_rects = []
best = None best = None
@@ -278,6 +284,7 @@ class Preprocessor:
if best is None: if best is None:
self.battery_margin = float(self.battery) self.battery_margin = float(self.battery)
self.charger_safety_margin = float(self.battery)
return return
range_dist, dist, dx, dz, center_dx, center_dz = best range_dist, dist, dx, dz, center_dx, center_dz = best
@@ -288,6 +295,7 @@ class Preprocessor:
self.nearest_charger_center_dz = float(center_dz) self.nearest_charger_center_dz = float(center_dz)
self.nearest_charger_dist = float(dist) self.nearest_charger_dist = float(dist)
self.nearest_charger_range_dist = float(range_dist) self.nearest_charger_range_dist = float(range_dist)
self.charger_energy_cost = float(range_dist)
self.on_charger = range_dist <= 0.0 self.on_charger = range_dist <= 0.0
self.battery_margin = float(self.battery) - self.nearest_charger_range_dist self.battery_margin = float(self.battery) - self.nearest_charger_range_dist
@@ -344,13 +352,21 @@ class Preprocessor:
if not self.has_charger: if not self.has_charger:
self.recharge_mode = False self.recharge_mode = False
self.charger_safety_margin = float(self.battery)
return return
if self.charge_delta > 0 or (self.on_charger and battery_ratio > 0.85): self.charger_energy_cost = float(max(self.nearest_charger_range_dist, 0.0))
self.charger_safety_buffer = self._charger_safety_buffer()
self.charger_safety_margin = float(self.battery) - self.charger_energy_cost - self.charger_safety_buffer
should_recharge = self.charger_safety_margin <= 0.0 or battery_ratio < 0.28
safe_to_leave = self.charger_safety_margin > 18.0 and battery_ratio > 0.65
if self.on_charger and (battery_ratio > 0.85 or safe_to_leave):
self.recharge_mode = False self.recharge_mode = False
elif self.battery <= self.nearest_charger_range_dist + 18 or battery_ratio < 0.22: elif should_recharge:
self.recharge_mode = True self.recharge_mode = True
elif self.recharge_mode and battery_ratio < 0.85: elif self.recharge_mode and not safe_to_leave:
self.recharge_mode = True self.recharge_mode = True
else: else:
self.recharge_mode = False self.recharge_mode = False
@@ -358,6 +374,13 @@ class Preprocessor:
if self.recharge_mode: if self.recharge_mode:
self.recharge_steps += 1 self.recharge_steps += 1
def _charger_safety_buffer(self):
# One move roughly costs one charge; reserve extra for detours, local obstacles, and policy noise.
base = max(24.0, 0.16 * float(self.battery_max))
distance_buffer = min(24.0, 0.25 * float(max(self.nearest_charger_range_dist, 0.0)))
obstacle_buffer = 18.0 * float(self.local_obstacle_ratio)
return float(np.clip(base + distance_buffer + obstacle_buffer, 24.0, 64.0))
def _min_charger_range_dist(self, x, z): def _min_charger_range_dist(self, x, z):
if not self.charger_rects: if not self.charger_rects:
return float(self.GRID_SIZE) return float(self.GRID_SIZE)
@@ -701,9 +724,11 @@ class Preprocessor:
dist_delta = float( dist_delta = float(
np.clip(self.last_nearest_charger_range_dist - self.nearest_charger_range_dist, -4.0, 4.0) np.clip(self.last_nearest_charger_range_dist - self.nearest_charger_range_dist, -4.0, 4.0)
) )
charge_reward += 0.04 * dist_delta if dist_delta > 0 else 0.02 * dist_delta approach_scale = 0.06 if self.charger_safety_margin <= 0 else 0.04
if self.battery_margin < 0: retreat_scale = 0.03 if self.charger_safety_margin <= 0 else 0.02
charge_reward -= min(0.25, abs(self.battery_margin) / max(self.battery_max, 1)) charge_reward += approach_scale * dist_delta if dist_delta > 0 else retreat_scale * dist_delta
if self.charger_safety_margin < 0:
charge_reward -= min(0.35, abs(self.charger_safety_margin) / max(self.battery_max, 1))
elif self.on_charger and battery_ratio > 0.65: elif self.on_charger and battery_ratio > 0.65:
charge_reward -= 0.08 charge_reward -= 0.08