Skip to content

Commit

Permalink
✨ Add the negation [^...] in PythonRegex
Browse files Browse the repository at this point in the history
  • Loading branch information
Aunsiels committed Mar 18, 2024
1 parent eca9a78 commit 51d61ef
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 6 deletions.
20 changes: 15 additions & 5 deletions pyformlang/regular_expression/python_regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
"$": "\\$",
"\n": "",
" ": "\\ ",
'\\': '\\\\'
'\\': '\\\\',
"?": "\\?"
}

RECOMBINE = {
Expand Down Expand Up @@ -218,13 +219,14 @@ def _preprocess_brackets_content(self, bracket_content):
bracket_content_temp = []
previous_is_valid_for_range = False
for i, symbol in enumerate(bracket_content):
if (symbol == "-" and not self._should_escape_next_symbol(
bracket_content_temp)):
if (not previous_is_valid_for_range
or i == len(bracket_content) - 1):
# We have a range
if symbol == "-" and not self._should_escape_next_symbol(bracket_content_temp):
if not previous_is_valid_for_range or i == len(bracket_content) - 1:
# False alarm, no range
bracket_content_temp.append("-")
previous_is_valid_for_range = True
else:
# We insert all the characters in the range
bracket_content[i - 1] = self._recombine(bracket_content[i - 1])
for j in range(ord(bracket_content[i - 1][-1]) + 1,
ord(bracket_content[i + 1][-1])):
Expand All @@ -244,10 +246,18 @@ def _preprocess_brackets_content(self, bracket_content):
previous_is_valid_for_range = False
else:
previous_is_valid_for_range = True
bracket_content_temp = self._preprocess_negation(bracket_content_temp)
bracket_content_temp = self._insert_or(bracket_content_temp)
bracket_content_temp = self._recombine(bracket_content_temp)
return bracket_content_temp

@staticmethod
def _preprocess_negation(bracket_content):
if not bracket_content or bracket_content[0] != "^":
return bracket_content
# We inverse everything
return [x for x in ESCAPED_PRINTABLES if x not in bracket_content]

@staticmethod
def _insert_or(l_to_modify):
res = []
Expand Down
15 changes: 15 additions & 0 deletions pyformlang/regular_expression/tests/test_python_regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,3 +313,18 @@ def test_error_backslash(self):
self._test_compare(r"[a\\\\\\]]", "\\]")
self._test_compare(r"\"([d\"\\\\]|\\\\.)*\"", '"d\\"')
self._test_compare(r"[a\\\\]", "a")

def test_negation_brackets(self):
self._test_compare(r"[^abc]*", "")
self._test_compare(r"[^abc]*", "a")
self._test_compare(r"[^abc]*", "b")
self._test_compare(r"[^abc]*", "c")
self._test_compare(r"[^abc]*", "d")
self._test_compare(r"[^abc]*", "dga")
self._test_compare(r"[^abc]*", "dgh")
self._test_compare(r"[^?]*", "dgh")

def test_question_mark(self):
self._test_compare(r".", "?")
self._test_compare(r"a(a|b)?", "a")
self._test_compare(r"a(a|b)\?", "ab?")
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

setuptools.setup(
name='pyformlang',
version='1.0.8',
version='1.0.9',
#scripts=['pyformlang'] ,
author="Julien Romero",
author_email="[email protected]",
Expand Down

1 comment on commit 51d61ef

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage

Coverage Report
FileStmtsMissCoverMissing
pyformlang
   __init__.py10100% 
pyformlang/cfg
   __init__.py70100% 
   cfg.py5500100% 
   cfg_object.py100100% 
   cyk_table.py720100% 
   epsilon.py60100% 
   llone_parser.py15622 99%
   parse_tree.py6211 98%
   pda_object_creator.py310100% 
   production.py330100% 
   recursive_decent_parser.py5611 98%
   set_queue.py140100% 
   terminal.py150100% 
   utils.py100100% 
   utils_cfg.py250100% 
   variable.py260100% 
pyformlang/cfg/tests
   __init__.py00100% 
   test_cfg.py59811 99%
   test_llone_parser.py11811 99%
   test_production.py220100% 
   test_recursive_decent_parser.py250100% 
   test_terminal.py190100% 
   test_variable.py160100% 
pyformlang/fcfg
   __init__.py40100% 
   fcfg.py11711 99%
   feature_production.py260100% 
   feature_structure.py19133 98%
   state.py350100% 
pyformlang/fcfg/tests
   __init__.py00100% 
   test_fcfg.py1230100% 
   test_feature_structure.py1590100% 
pyformlang/finite_automaton
   __init__.py100100% 
   deterministic_finite_automaton.py20722 99%
   doubly_linked_list.py340100% 
   doubly_linked_node.py150100% 
   epsilon.py100100% 
   epsilon_nfa.py3720100% 
   finite_automaton.py1620100% 
   finite_automaton_object.py100100% 
   hopcroft_processing_list.py220100% 
   nondeterministic_finite_automaton.py220100% 
   nondeterministic_transition_function.py500100% 
   partition.py360100% 
   regexable.py160100% 
   state.py150100% 
   symbol.py110100% 
   transition_function.py5111 98%
pyformlang/finite_automaton/tests
   __init__.py00100% 
   test_deterministic_finite_automaton.py2610100% 
   test_epsilon.py100100% 
   test_epsilon_nfa.py6210100% 
   test_nondeterministic_finite_automaton.py930100% 
   test_nondeterministic_transition_function.py610100% 
   test_state.py280100% 
   test_symbol.py270100% 
   test_transition_function.py600100% 
pyformlang/fst
   __init__.py20100% 
   fst.py2420100% 
pyformlang/fst/tests
   __init__.py00100% 
   test_fst.py1600100% 
pyformlang/indexed_grammar
   __init__.py70100% 
   consumption_rule.py340100% 
   duplication_rule.py300100% 
   end_rule.py300100% 
   indexed_grammar.py25722 99%
   production_rule.py320100% 
   reduced_rule.py250100% 
   rule_ordering.py700100% 
   rules.py690100% 
pyformlang/indexed_grammar/tests
   __init__.py00100% 
   test_indexed_grammar.py2250100% 
   test_rules.py360100% 
pyformlang/pda
   __init__.py60100% 
   cfg_variable_converter.py6744 94%
   epsilon.py40100% 
   pda.py3090100% 
   stack_symbol.py160100% 
   state.py180100% 
   symbol.py140100% 
   transition_function.py460100% 
   utils.py360100% 
pyformlang/pda/tests
   __init__.py00100% 
   test_pda.py2460100% 
pyformlang/regular_expression
   __init__.py40100% 
   python_regex.py26666 98%
   regex.py1430100% 
   regex_objects.py790100% 
   regex_reader.py16044 98%
pyformlang/regular_expression/tests
   __init__.py00100% 
   test_python_regex.py27122 99%
   test_regex.py2490100% 
pyformlang/rsa
   __init__.py30100% 
   box.py3866 84%
   recursive_automaton.py8766 93%
pyformlang/rsa/tests
   __init__.py00100% 
   test_rsa.py510100% 
pyformlang/tests
   __init__.py00100% 
TOTAL80634399% 

Tests Skipped Failures Errors Time
273 0 💤 0 ❌ 0 🔥 3.948s ⏱️

Please sign in to comment.