-
Notifications
You must be signed in to change notification settings - Fork 4
/
rl_llm_tsc.py
80 lines (66 loc) · 2.46 KB
/
rl_llm_tsc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
'''
@Author: WANG Maonan
@Author: PangAoyu
@Date: 2023-09-08 18:57:35
@Description: 使用训练好的 RL Agent 进行测试
LastEditTime: 2024-07-11 13:34:07
'''
import torch
from langchain_openai import ChatOpenAI
from loguru import logger
from tshub.utils.format_dict import dict_to_str
from tshub.utils.get_abs_path import get_abs_path
from tshub.utils.init_log import set_logger
from TSCEnvironment.tsc_env import TSCEnvironment
from TSCEnvironment.tsc_env_wrapper import TSCEnvWrapper
from TSCAssistant.tsc_assistant import TSCAgent
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import VecNormalize, SubprocVecEnv
from utils.readConfig import read_config
from utils.make_tsc_env import make_env
path_convert = get_abs_path(__file__)
set_logger(path_convert('./'))
if __name__ == '__main__':
sumo_cfg = path_convert("./TSCScenario/SumoNets/train_four_345/env/train_four_345.sumocfg")
# Init Chat
config = read_config()
openai_proxy = config['OPENAI_PROXY']
openai_api_key = config['OPENAI_API_KEY']
chat = ChatOpenAI(
model=config['OPENAI_API_MODEL'],
temperature=0.0,
openai_api_key=openai_api_key,
openai_proxy=openai_proxy
)
# #########
# Init Env
# #########
trip_info = path_convert(f'./Result/LLM.tripinfo.xml')
params = {
'tls_id':'J1',
'num_seconds':300,
'sumo_cfg':sumo_cfg,
'use_gui':True,
'log_file':'./log_test/',
'trip_info':trip_info,
}
env = SubprocVecEnv([make_env(env_index=f'{i}', **params) for i in range(1)])#获取env信息
env = VecNormalize.load(load_path=path_convert('./models/last_vec_normalize.pkl'), venv=env)
env.training = False # 测试的时候不要更新
env.norm_reward = False
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_path = path_convert('./models/last_rl_model.zip')
model = PPO.load(model_path, env=env, device=device)
# 使用模型进行测试
dones = False # 默认是 False
sim_step = 0
obs = env.reset()
tsc_agent = TSCAgent(llm=chat, verbose=True)
while not dones:
action, _state = model.predict(obs, deterministic=True)
if sim_step>4:
action=tsc_agent.agent_run(sim_step=sim_step, action=action, obs=obs, infos=infos), #加入 obs wrapper
obs, rewards, dones, infos = env.step(action)
sim_step += 1
print('***********rewards************',rewards)
env.close()