Skip to content

Commit

Permalink
Problem with the processing of dot when preceeded by an ambiguous esc…
Browse files Browse the repository at this point in the history
…ape. Resolves #19
  • Loading branch information
Aunsiels committed Feb 14, 2024
1 parent 4dd02ef commit e7214f7
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 11 deletions.
17 changes: 8 additions & 9 deletions pyformlang/regular_expression/python_regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
OCTAL = "01234567"
ESCAPED_OCTAL = ["\\0", "\\1", "\\2", "\\3", "\\4", "\\5", "\\6", "\\7"]


class PythonRegex(regex.Regex):
""" Represents a regular expression as used in Python.
Expand Down Expand Up @@ -106,7 +107,6 @@ def __init__(self, python_regex):
self._preprocess_brackets()
self._preprocess_positive_closure()
self._preprocess_optional()
self._preprocess_dot()
self._separate()
self._python_regex = self._python_regex.lstrip('\b')
super().__init__(self._python_regex)
Expand All @@ -119,7 +119,13 @@ def _separate(self):
else:
regex_temp.append(symbol)
regex_temp = self._recombine(regex_temp)
self._python_regex = " ".join(regex_temp)
regex_temp_dot = []
for symbol in regex_temp:
if symbol == ".":
regex_temp_dot.append(DOT_REPLACEMENT)
else:
regex_temp_dot.append(symbol)
self._python_regex = " ".join(regex_temp_dot)

def _preprocess_brackets(self):
regex_temp = []
Expand Down Expand Up @@ -287,13 +293,6 @@ def _preprocess_positive_closure(self):
regex_temp.append("*")
self._python_regex = "".join(regex_temp)

@staticmethod
def _dot_replacer(dot):
return DOT_REPLACEMENT

def _preprocess_dot(self):
self._python_regex = re.sub(r'(?<!\\)\.', self._dot_replacer, self._python_regex)

def _preprocess_optional(self):
regex_temp = []
for symbol in self._python_regex:
Expand Down
12 changes: 11 additions & 1 deletion pyformlang/regular_expression/tests/test_python_regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,8 @@ def test_backslash(self):
def test_octal(self):
self._test_compare(r"\x10", "\x10")
self._test_compare(r"\110", "\110")
self._test_compare(r"\\\\x10", "\x10")
self._test_compare(r"\\\\x10", "\\x10")

def test_backspace(self):
self._test_compare(r"a[b\b]", "ab")
Expand All @@ -272,4 +274,12 @@ def test_unicode_name(self):

def test_unicode(self):
self._test_compare(r"\u1111", "\u1111")
self._test_compare(r"\U00001111", "\U00001111")
self._test_compare(r"\U00001111", "\U00001111")

def test_dot_harder(self):
self._test_compare(r"\\.", "\\a")
self._test_compare(r"\\.", "\\.")
self._test_compare(r"\.", "a")
self._test_compare(r"\.", ".")
self._test_compare(r"\\\.", "\\a")
self._test_compare(r"\\\.", "\\.")
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

setuptools.setup(
name='pyformlang',
version='1.0.6',
version='1.0.7',
#scripts=['pyformlang'] ,
author="Julien Romero",
author_email="[email protected]",
Expand Down

1 comment on commit e7214f7

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage

Coverage Report
FileStmtsMissCoverMissing
pyformlang
   __init__.py10100% 
pyformlang/cfg
   __init__.py70100% 
   cfg.py5500100% 
   cfg_object.py100100% 
   cyk_table.py720100% 
   epsilon.py60100% 
   llone_parser.py15622 99%
   parse_tree.py6211 98%
   pda_object_creator.py310100% 
   production.py330100% 
   recursive_decent_parser.py5611 98%
   set_queue.py140100% 
   terminal.py150100% 
   utils.py100100% 
   utils_cfg.py250100% 
   variable.py260100% 
pyformlang/cfg/tests
   __init__.py00100% 
   test_cfg.py59811 99%
   test_llone_parser.py11811 99%
   test_production.py220100% 
   test_recursive_decent_parser.py250100% 
   test_terminal.py190100% 
   test_variable.py160100% 
pyformlang/fcfg
   __init__.py40100% 
   fcfg.py11711 99%
   feature_production.py260100% 
   feature_structure.py19133 98%
   state.py350100% 
pyformlang/fcfg/tests
   __init__.py00100% 
   test_fcfg.py1230100% 
   test_feature_structure.py1590100% 
pyformlang/finite_automaton
   __init__.py100100% 
   deterministic_finite_automaton.py20722 99%
   doubly_linked_list.py340100% 
   doubly_linked_node.py150100% 
   epsilon.py100100% 
   epsilon_nfa.py3720100% 
   finite_automaton.py1620100% 
   finite_automaton_object.py100100% 
   hopcroft_processing_list.py220100% 
   nondeterministic_finite_automaton.py220100% 
   nondeterministic_transition_function.py500100% 
   partition.py360100% 
   regexable.py160100% 
   state.py150100% 
   symbol.py110100% 
   transition_function.py5111 98%
pyformlang/finite_automaton/tests
   __init__.py00100% 
   test_deterministic_finite_automaton.py2610100% 
   test_epsilon.py100100% 
   test_epsilon_nfa.py6210100% 
   test_nondeterministic_finite_automaton.py930100% 
   test_nondeterministic_transition_function.py610100% 
   test_state.py280100% 
   test_symbol.py270100% 
   test_transition_function.py600100% 
pyformlang/fst
   __init__.py20100% 
   fst.py2420100% 
pyformlang/fst/tests
   __init__.py00100% 
   test_fst.py1600100% 
pyformlang/indexed_grammar
   __init__.py70100% 
   consumption_rule.py340100% 
   duplication_rule.py300100% 
   end_rule.py300100% 
   indexed_grammar.py25722 99%
   production_rule.py320100% 
   reduced_rule.py250100% 
   rule_ordering.py700100% 
   rules.py690100% 
pyformlang/indexed_grammar/tests
   __init__.py00100% 
   test_indexed_grammar.py2250100% 
   test_rules.py360100% 
pyformlang/pda
   __init__.py60100% 
   cfg_variable_converter.py6744 94%
   epsilon.py40100% 
   pda.py3090100% 
   stack_symbol.py160100% 
   state.py180100% 
   symbol.py140100% 
   transition_function.py460100% 
   utils.py360100% 
pyformlang/pda/tests
   __init__.py00100% 
   test_pda.py2460100% 
pyformlang/regular_expression
   __init__.py40100% 
   python_regex.py19933 98%
   regex.py1430100% 
   regex_objects.py790100% 
   regex_reader.py16044 98%
pyformlang/regular_expression/tests
   __init__.py00100% 
   test_python_regex.py23122 99%
   test_regex.py2490100% 
pyformlang/rsa
   __init__.py30100% 
   box.py3866 84%
   recursive_automaton.py8766 93%
pyformlang/rsa/tests
   __init__.py00100% 
   test_rsa.py510100% 
pyformlang/tests
   __init__.py00100% 
TOTAL79564099% 

Tests Skipped Failures Errors Time
268 0 💤 0 ❌ 0 🔥 3.467s ⏱️

Please sign in to comment.