Skip to content

Commit

Permalink
TestPlan -> TestPrompts
Browse files Browse the repository at this point in the history
  • Loading branch information
antoninoLorenzo committed Jul 3, 2024
1 parent 9ce40d0 commit 04de5c7
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 43 deletions.
Binary file not shown.
1 change: 1 addition & 0 deletions test/tests/results/conversion_times.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"llama3": {"times": [37.51699495315552, 19.364502429962158, 48.94050359725952], "mean": 35.2740003267924}}
8 changes: 4 additions & 4 deletions test/tests/test_cases/conversion.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@
"SQL Injection Testing:\n\n",
"We can use sqlmap to test for SQL injection vulnerabilities in the login.php page. Here's a plan:\n\n",
"1. Initial Scan: Run sqlmap with the -T option to identify potential SQL injection points.\n",
"Command: sqlmap -T \"login.php\" http://10.10.62.242\n",
"Command: sqlmap -T login.php http://10.10.62.242\n",
"2. Fingerprinting: Use sqlmap's fingerprinting feature to gather more information about the database and its schema.\n",
"Command: sqlmap -F http://10.10.62.242/login.php\n",
"3. Injection Testing: Run sqlmap with the -b option to test for SQL injection vulnerabilities.\n",
"Command: sqlmap -b \"login.php\" http://10.10.62.242\n\n",
"Command: sqlmap -b login.php http://10.10.62.242\n\n",
"Accessing .htpasswd:\n\n",
"Since the .htpasswd file is protected, we can try using Nmap's -script option to run a script that can help us access it.\n\n",
"1. HTPasswd Script: Run Nmap with the htpasswd script to see if it can help us access the file.\n",
Expand All @@ -62,9 +62,9 @@
"(Note: As always, I'll ensure that the necessary information is provided, and we stay within the scope of authorized penetration testing.)\n"
],
"commands": [
"sqlmap -T \"login.php\" http://10.10.62.242",
"sqlmap -T login.php http://10.10.62.242",
"sqlmap -F http://10.10.62.242/login.php",
"sqlmap -b \"login.php\" http://10.10.62.242",
"sqlmap -b login.php http://10.10.62.242",
"nmap -script htpasswd http://10.10.62.242",
"nmap -script http http://10.10.62.242"
]
Expand Down
88 changes: 49 additions & 39 deletions test/tests/test_prompts.py
Original file line number Diff line number Diff line change
@@ -1,54 +1,64 @@
import os
import json
import time
import unittest

from src.agent import Agent
from src.agent.tools import Terminal
from src.agent.plan import Plan, Task, TaskStatus
import numpy as np
from dotenv import load_dotenv

from src.agent import Agent

class TestPlan(unittest.TestCase):
load_dotenv()

# def test_execute(self):
# tasks = [
# Task(thought="Get directory content", tool=Terminal, command="ls"),
# Task(thought="Get machine host name", tool=Terminal, command="hostname")
# ]

# plan = Plan(tasks)
# for output in plan.execute():
# print('---------------------------------')
# for i, task_overview in enumerate(output):
# print(f'{i+1}. {task_overview}')
# if task_overview.status == TaskStatus.DONE:
# print(f'Output:\n{task_overview.output}')
class TestPrompts(unittest.TestCase):
MODELS = ['llama3']
GEMINI_KEY = os.getenv('GEMINI_API_KEY')

def test_from_response(self):
@unittest.skip('')
def test_conversion(self):
"""Tests the conversion from natural language plan produced
by the llm to tasks, so tests the efficiency of the prompt."""
agent = Agent(model='llama3')
with open('plan_tests.json', 'r', encoding='utf-8') as fp:
with open('test_cases/conversion.json', 'r', encoding='utf-8') as fp:
test_cases = json.load(fp)

for test_case in test_cases:
plan_nl = test_case['content']
expected_commands = test_case['commands']

plan = agent.extract_plan(plan_nl)
self.assertIsNotNone(plan, "Plan is None:")

commands = [task.command for task in plan.tasks]

self.assertEquals(
len(commands),
len(expected_commands),
f"Found {len(commands)} commands, expected {len(expected_commands)}\n"
f"Commands:\n{commands}\nExpected:\n{expected_commands}"
)
self.assertEquals(
commands,
expected_commands,
f"Commands:\n{commands}\nExpected:\n{expected_commands}"
)
inference_times = {model: {'times': [], 'mean': 0} for model in self.MODELS}
for model in self.MODELS:
agent = Agent(model=model)
for test_case in test_cases:
plan_nl = test_case['content']
expected_commands = test_case['commands']

start = time.time()
plan = agent.extract_plan(plan_nl)
t = time.time() - start

self.assertIsNotNone(plan, "Plan is None:")
commands = [task.command for task in plan.tasks]
self.assertEquals(
len(commands),
len(expected_commands),
f"Found {len(commands)} commands, expected {len(expected_commands)}\n"
f"Commands:\n{commands}\nExpected:\n{expected_commands}"
)
self.assertEquals(
commands,
expected_commands,
f"Commands:\n{commands}\nExpected:\n{expected_commands}"
)

inference_times[model]['times'].append(t)

with open('results/conversion_times.json', 'w+', encoding='utf-8') as fp:
for model in self.MODELS:
mean_time = np.array(inference_times[model]['times']).mean()
inference_times[model]['mean'] = mean_time
json.dump(inference_times, fp)

# @unittest.skip('')
def test_planning(self):
"""Tests the instruction following capability of the llm"""
self.assertIsNotNone(self.GEMINI_KEY, 'Missing Gemini API Key')


if __name__ == "__main__":
Expand Down

0 comments on commit 04de5c7

Please sign in to comment.