diff --git a/CHANGELOG.md b/CHANGELOG.md index ef807c7..3528d5a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,8 @@ Changes that are planned but not implemented yet: * Create a "align if needed" preprocessor directive paid that generates an `.align` directive if the bytecode in between the pair isn't naturally on the same page and can fit on the same page if aligned. An error would be benerated if the block of code can't fit on the same page regardless of alignment. ## [Unreleased] +* Upgrade python version requirements to 3.11 +* Fixed a bug where embedded stringsd weren't properly parsed if they contained a newline character or there were multiple embedded strings per line ## [0.4.2] * Added support for The Minimal 64x4 Home Computer with an example and updated assembler functionality to support it. diff --git a/pyproject.toml b/pyproject.toml index b9cabc9..f65b4db 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,18 +4,18 @@ build-backend = "setuptools.build_meta" [project] name = "bespokeasm" -version = "0.4.2" +version = "0.4.3" authors = [ { name="Michael Kamprath", email="michael@kamprath.net" }, ] description = "A customizable byte code assembler that allows for the definition of custom instruction set architecture" readme = "README.md" license = {file = "LICENSE"} -requires-python = ">=3.9" +requires-python = ">=3.11" classifiers = [ "Development Status :: 4 - Beta", "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", - "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.11", "Topic :: Software Development :: Assemblers", ] dependencies = [ diff --git a/src/bespokeasm/__init__.py b/src/bespokeasm/__init__.py index 2955eaf..2d89220 100644 --- a/src/bespokeasm/__init__.py +++ b/src/bespokeasm/__init__.py @@ -1,4 +1,4 @@ -BESPOKEASM_VERSION_STR = '0.4.2' +BESPOKEASM_VERSION_STR = '0.4.3b1' # if a cconfig file requires a certain bespoke ASM version, it should be at least this version. BESPOKEASM_MIN_REQUIRED_STR = '0.3.0' diff --git a/src/bespokeasm/assembler/line_object/emdedded_string.py b/src/bespokeasm/assembler/line_object/emdedded_string.py index 50a0944..24572ad 100644 --- a/src/bespokeasm/assembler/line_object/emdedded_string.py +++ b/src/bespokeasm/assembler/line_object/emdedded_string.py @@ -10,7 +10,7 @@ from bespokeasm.assembler.line_identifier import LineIdentifier -EMBEDDED_STRING_PATTERN = r'(?P[\"])((?:\\(?P=quote)|.)*)(?P=quote)' +EMBEDDED_STRING_PATTERN = r'(?P[\"])((?:\\(?P=quote)|.|\n)*?)(?P=quote)' class EmbeddedString(LineWithBytes): diff --git a/src/bespokeasm/assembler/line_object/factory.py b/src/bespokeasm/assembler/line_object/factory.py index 52c6ac8..52d1a39 100644 --- a/src/bespokeasm/assembler/line_object/factory.py +++ b/src/bespokeasm/assembler/line_object/factory.py @@ -22,7 +22,7 @@ class LineOjectFactory: flags=re.IGNORECASE | re.MULTILINE ) PATTERN_INSTRUCTION_CONTENT = re.compile( - r'^([^\;\n]*)', + r'^([^;\v]*)(?:;.*)?$', flags=re.IGNORECASE | re.MULTILINE ) @@ -68,7 +68,7 @@ def parse_line( log_verbosity, )) else: - # resolve proprocessor symbols + # resolve preprocessor symbols instruction_str = preprocessor.resolve_symbols(line_id, instruction_str) # parse instruction while len(instruction_str) > 0: diff --git a/src/bespokeasm/assembler/preprocessor/__init__.py b/src/bespokeasm/assembler/preprocessor/__init__.py index b95b7a2..4def643 100644 --- a/src/bespokeasm/assembler/preprocessor/__init__.py +++ b/src/bespokeasm/assembler/preprocessor/__init__.py @@ -52,6 +52,7 @@ def resolve_symbols( # Errors if there are recursion loops caused byt symbols that indirectly refer to themselves. # to make this fast, all symbol candidates should be identified first, then the symbols should be resolved + # TODO: ignore tokens that are in quoted strings found_symbols: list[str] = re.findall(f'\\b({SYMBOL_PATTERN})\\b', line_str) symbols_replaced: set[str] = set() diff --git a/test/config_files/test_operand_features.yaml b/test/config_files/test_operand_features.yaml index af87d41..cf1069d 100644 --- a/test/config_files/test_operand_features.yaml +++ b/test/config_files/test_operand_features.yaml @@ -240,3 +240,18 @@ instructions: argument: size: 8 byte_align: true +macros: + add_twice: + - operands: + count: 1 + specific_operands: + numeric_expression: + list: + numeric_expression: + type: numeric + argument: + size: 8 + byte_align: true + instructions: + - "add @ARG(0)" + - "add @ARG(0)" diff --git a/test/test_line_objects.py b/test/test_line_objects.py index a043ec5..f7a842e 100644 --- a/test/test_line_objects.py +++ b/test/test_line_objects.py @@ -24,12 +24,15 @@ class TestLineObject(unittest.TestCase): @classmethod def setUpClass(cls): + lineid = LineIdentifier(1, 'setUpClass') global_scope = GlobalLabelScope(set()) - global_scope.set_label_value('var1', 12, 1) - global_scope.set_label_value('my_val', 8, 2) - global_scope.set_label_value('the_two', 2, 3) + global_scope.set_label_value('var1', 12, lineid) + global_scope.set_label_value('my_val', 8, lineid) + global_scope.set_label_value('the_two', 2, lineid) + global_scope.set_label_value('VALUE1', 8777773, lineid) + global_scope.set_label_value('VALUE2', 139, lineid) local_scope = LabelScope(LabelScopeType.LOCAL, global_scope, 'TestInstructionParsing') - local_scope.set_label_value('.local_var', 10, 3) + local_scope.set_label_value('.local_var', 10, lineid) cls.label_values = local_scope def setUp(self): @@ -872,6 +875,61 @@ def test_embedded_string_bugs(self): self.assertIsInstance(t1, EmbeddedString) self.assertEqual(t1.byte_size, 2, 'string has 2 bytes') + # test single lines of code where the embedded string is in between two statements and + # contains a newline character. + # for example: + # add 5 "this is a test\n" nop + # the embedded string should be parsed as a separate line object + lo1: list[LineObject] = LineOjectFactory.parse_line( + lineid, + 'add 5 "this is a test\nof new lines" nop ; comments', + isa_model, + TestLineObject.label_values, + memzone_mngr.global_zone, + memzone_mngr, + Preprocessor(), + ConditionStack(), + 0, + ) + self.assertEqual(len(lo1), 3, 'There should be 3 parsed instructions') + self.assertIsInstance(lo1[0], InstructionLine) + self.assertIsInstance(lo1[1], EmbeddedString) + self.assertIsInstance(lo1[2], InstructionLine) + self.assertEqual(lo1[1].byte_size, 28, 'string has 28 bytes (27 characters + 1 null terminator)') + + def test_multiple_embedded_stringa_bug(self): + # ensure that a single line of code can correctly parse multiple embedded strings + fp = pkg_resources.files(config_files).joinpath('test_operand_features.yaml') + isa_model = AssemblerModel(str(fp), 0) + isa_model._config['general']['allow_embedded_strings'] = True + memzone_mngr = MemoryZoneManager( + isa_model.address_size, + isa_model.default_origin, + isa_model.predefined_memory_zones, + ) + lineid = LineIdentifier(88, 'test_multiple_embedded_stringa_bug') + # test a more complex case where the embedded string is in the middle of a line + # for example: + # add 5 "string 1" nop "string 2" nop + # the embedded string should be parsed as a separate line object + lo2: list[LineObject] = LineOjectFactory.parse_line( + lineid, + 'add VALUE2 "string 1" nop "string 2" nop', + isa_model, + TestLineObject.label_values, + memzone_mngr.global_zone, + memzone_mngr, + Preprocessor(), + ConditionStack(), + 0, + ) + self.assertEqual(len(lo2), 5, 'There should be 5 parsed instructions') + self.assertIsInstance(lo2[0], InstructionLine) + self.assertIsInstance(lo2[1], EmbeddedString) + self.assertIsInstance(lo2[2], InstructionLine) + self.assertIsInstance(lo2[3], EmbeddedString) + self.assertIsInstance(lo2[4], InstructionLine) + if __name__ == '__main__': unittest.main()