From 574854e3b5c8b0c57627c97e8c6245b709de47d6 Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Wed, 3 Jul 2024 19:39:08 +0000 Subject: [PATCH 1/5] JVM: Add constructor informations in the prompt Signed-off-by: Arthur Chan --- llm_toolkit/prompt_builder.py | 36 ++++++++++++++++++++++++++++ prompts/template_xml/jvm_problem.txt | 1 + 2 files changed, 37 insertions(+) diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py index 1db409ac4c..4f4e69778b 100644 --- a/llm_toolkit/prompt_builder.py +++ b/llm_toolkit/prompt_builder.py @@ -716,6 +716,41 @@ def _format_arguments(self) -> str: return '\n'.join(argument_descriptions) + def _format_constructors(self) -> str: + """Formats a list of functions / constructors to create the object for + invoking the target method.""" + if self.benchmark.is_static: + return '' + + constructors = [] + for constructor in introspector.query_introspector_matching_constructor_type(self.benchmark.project, self.benchmark.return_type): + constructor_sig = constructor.get('function_signature') + if constructor_sig: + constructors.append('' + constructor_sig + '') + + if constructors: + return '' + '\n'.join(constructors) + '' + + functions = [] + for function in introspector.query_introspector_matching_function_type(self.benchmark.project, self.benchmark.return_type): + is_static = function.get('is_static', False) + function_sig = function.get('function_signature') + if not function_sig: + continue + if is_static: + functions.append('' + funcion_sig + '') + else: + function_class = function_sig[1:].split(']')[0] + function_str = '' + funcion_sig + '' + function_str = function_str + 'You MUST create an {CLASS_NAME} object before calling this constructing method.' + function_str = function_str.replace('{CLASS_NAME}', function_class) + function_str = '' + function_str + '' + functions.append(function_str) + if functions: + return '' + '\n'.join(functions) + '' + + return '' + def _format_source_reference(self, signature: str) -> Tuple[str, str]: """Formats the source code reference for this target.""" # Query for source code of the target method @@ -742,6 +777,7 @@ def _format_problem(self, signature: str) -> str: self._format_requirement(signature)) problem = problem.replace('{DATA_MAPPING}', self._format_data_filler()) problem = problem.replace('{ARGUMENTS}', self._format_arguments()) + problem = problem.replace('{CONSTRUCTORS}', self._format_constructors()) self_source, cross_source = self._format_source_reference(signature) problem = problem.replace('{SELF_SOURCE}', self_source) diff --git a/prompts/template_xml/jvm_problem.txt b/prompts/template_xml/jvm_problem.txt index 7adebf37ec..f0fd25d5fa 100644 --- a/prompts/template_xml/jvm_problem.txt +++ b/prompts/template_xml/jvm_problem.txt @@ -1,6 +1,7 @@ Your goal is to write a fuzzing harness for the provided method signature using Jazzer framework from Code Intellengence. It is important that the provided solution compiles and actually calls the function specified by the method signature: {TARGET} +{CONSTRUCTORS} {REQUIREMENTS} {DATA_MAPPING} From b4553246968f263b708f0c32c1e122d819c3df09 Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Wed, 3 Jul 2024 19:46:55 +0000 Subject: [PATCH 2/5] Fix formatting Signed-off-by: Arthur Chan --- llm_toolkit/prompt_builder.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py index 4f4e69778b..3f84b096ae 100644 --- a/llm_toolkit/prompt_builder.py +++ b/llm_toolkit/prompt_builder.py @@ -723,8 +723,9 @@ def _format_constructors(self) -> str: return '' constructors = [] - for constructor in introspector.query_introspector_matching_constructor_type(self.benchmark.project, self.benchmark.return_type): - constructor_sig = constructor.get('function_signature') + for ctr in introspector.query_introspector_matching_constructor_type( + self.benchmark.project, self.benchmark.return_type): + constructor_sig = ctr.get('function_signature') if constructor_sig: constructors.append('' + constructor_sig + '') @@ -732,17 +733,22 @@ def _format_constructors(self) -> str: return '' + '\n'.join(constructors) + '' functions = [] - for function in introspector.query_introspector_matching_function_type(self.benchmark.project, self.benchmark.return_type): + for function in introspector.query_introspector_matching_function_type( + self.benchmark.project, self.benchmark.return_type): is_static = function.get('is_static', False) function_sig = function.get('function_signature') if not function_sig: continue if is_static: - functions.append('' + funcion_sig + '') + functions.append('' + function_sig + + '') else: function_class = function_sig[1:].split(']')[0] - function_str = '' + funcion_sig + '' - function_str = function_str + 'You MUST create an {CLASS_NAME} object before calling this constructing method.' + function_str = '' + function_sig + '' + function_str = function_str + ( + 'You MUST create an ' + '{CLASS_NAME} object before calling this constructing method.' + '') function_str = function_str.replace('{CLASS_NAME}', function_class) function_str = '' + function_str + '' functions.append(function_str) From 3a2a8d263344af1c66e611aedc92d28605d9ebfc Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Wed, 10 Jul 2024 14:47:24 +0000 Subject: [PATCH 3/5] Fix formatting Signed-off-by: Arthur Chan --- llm_toolkit/prompt_builder.py | 37 +++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py index 83934754d3..8b0c4341a0 100644 --- a/llm_toolkit/prompt_builder.py +++ b/llm_toolkit/prompt_builder.py @@ -212,8 +212,8 @@ def _select_examples(self, examples: list[list], targets.add(example[2]) unique_examples.append(example) - if (sum(example[0] for example in unique_examples) + prompt_size - < self._model.context_window): + if (sum(example[0] for example in unique_examples) + prompt_size < + self._model.context_window): return [[example[1], example[2]] for example in examples] # Then prioritize complex (i.e., long) examples. @@ -681,7 +681,7 @@ def _format_argument(self, count: int, arg_type: str) -> str: # java.lang.Object argument if 'java.lang.Object' in arg_type: base = self._get_template(self.object_arg_description_template_file) - prefix = 'Argument \#{count} requires an Object instance\n' + prefix = 'Argument #{count} requires an Object instance\n' argument = '' + prefix + base + '' return argument @@ -787,41 +787,44 @@ def _format_arguments(self) -> str: def _format_constructors(self) -> str: """Formats a list of functions / constructors to create the object for invoking the target method.""" - if self.benchmark.is_static: + if self.benchmark.is_jvm_static: return '' constructors = [] - for ctr in introspector.query_introspector_matching_constructor_type( - self.benchmark.project, self.benchmark.return_type): + ctrs = introspector.query_introspector_matching_function_constructor_type( + self.benchmark.project, self.benchmark.return_type, False) + for ctr in ctrs: constructor_sig = ctr.get('function_signature') if constructor_sig: - constructors.append('' + constructor_sig + '') + constructors.append(f'{constructor_sig}') if constructors: - return '' + '\n'.join(constructors) + '' + ctr_str = '\n'.join(constructors) + return f'{ctr_str}' functions = [] - for function in introspector.query_introspector_matching_function_type( - self.benchmark.project, self.benchmark.return_type): - is_static = function.get('is_static', False) - function_sig = function.get('function_signature') + funcs = introspector.query_introspector_matching_function_constructor_type( + self.benchmark.project, self.benchmark.return_type, True) + for func in funcs: + is_static = func.get('is_static', False) + function_sig = func.get('function_signature') if not function_sig: continue if is_static: - functions.append('' + function_sig + - '') + functions.append(f'{function_sig}') else: function_class = function_sig[1:].split(']')[0] - function_str = '' + function_sig + '' + function_str = f'{function_sig}' function_str = function_str + ( 'You MUST create an ' '{CLASS_NAME} object before calling this constructing method.' '') function_str = function_str.replace('{CLASS_NAME}', function_class) - function_str = '' + function_str + '' + function_str = f'{function_str}' functions.append(function_str) if functions: - return '' + '\n'.join(functions) + '' + func_str = '\n'.join(functions) + return f'{func_str}' return '' From 51edda16a66b013841154cbd49e4fb93003f9331 Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Wed, 10 Jul 2024 14:50:48 +0000 Subject: [PATCH 4/5] Fix formatting Signed-off-by: Arthur Chan --- llm_toolkit/prompt_builder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py index 8b0c4341a0..f4924584b5 100644 --- a/llm_toolkit/prompt_builder.py +++ b/llm_toolkit/prompt_builder.py @@ -212,8 +212,8 @@ def _select_examples(self, examples: list[list], targets.add(example[2]) unique_examples.append(example) - if (sum(example[0] for example in unique_examples) + prompt_size < - self._model.context_window): + if (sum(example[0] for example in unique_examples) + prompt_size + < self._model.context_window): return [[example[1], example[2]] for example in examples] # Then prioritize complex (i.e., long) examples. From ba74012437ee69cdd22efb4174f63fa97fdaeac4 Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Wed, 10 Jul 2024 15:36:37 +0000 Subject: [PATCH 5/5] Fix bug Signed-off-by: Arthur Chan --- llm_toolkit/prompt_builder.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py index f4924584b5..b0197f0a5e 100644 --- a/llm_toolkit/prompt_builder.py +++ b/llm_toolkit/prompt_builder.py @@ -681,8 +681,8 @@ def _format_argument(self, count: int, arg_type: str) -> str: # java.lang.Object argument if 'java.lang.Object' in arg_type: base = self._get_template(self.object_arg_description_template_file) - prefix = 'Argument #{count} requires an Object instance\n' - argument = '' + prefix + base + '' + prefix = f'Argument #{count} requires an Object instance\n' + argument = f'{prefix}{base}' return argument # Simple arguments @@ -817,9 +817,8 @@ def _format_constructors(self) -> str: function_str = f'{function_sig}' function_str = function_str + ( 'You MUST create an ' - '{CLASS_NAME} object before calling this constructing method.' + f'{function_class} object before calling this constructing method.' '') - function_str = function_str.replace('{CLASS_NAME}', function_class) function_str = f'{function_str}' functions.append(function_str) if functions: