Skip to content

Commit

Permalink
clean: replace more regex strings with shared patterns
Browse files Browse the repository at this point in the history
  • Loading branch information
CompRhys committed Jul 16, 2024
1 parent f54274f commit e92ec53
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 10 deletions.
16 changes: 9 additions & 7 deletions aviary/wren/data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations

import json
import re
from functools import cache
from itertools import groupby
from typing import TYPE_CHECKING, Any
Expand All @@ -12,7 +11,12 @@
from torch.utils.data import Dataset

from aviary import PKG_DIR
from aviary.wren.utils import relab_dict, wyckoff_multiplicity_dict
from aviary.wren.utils import (
RE_SUBST_ONE_PREFIX,
RE_WYCKOFF_NO_PREFIX,
relab_dict,
wyckoff_multiplicity_dict,
)

if TYPE_CHECKING:
from collections.abc import Sequence
Expand Down Expand Up @@ -252,10 +256,6 @@ def collate_batch(
)


# Pre-compile the regular expression
WYK_LETTER_PATTERN = re.compile(r"((?<![0-9])[A-z])")


def parse_aflow_wyckoff_str(
aflow_label: str,
) -> tuple[str, list[float], list[str], list[tuple[str, ...]]]:
Expand All @@ -278,7 +278,9 @@ def parse_aflow_wyckoff_str(

for el, wyk_letters_per_elem in zip(elems, wyckoff_letters):
# Normalize Wyckoff letters to start with 1 if missing digit
wyk_letters_normalized = WYK_LETTER_PATTERN.sub(r"1\g<1>", wyk_letters_per_elem)
wyk_letters_normalized = RE_WYCKOFF_NO_PREFIX.sub(
RE_SUBST_ONE_PREFIX, wyk_letters_per_elem
)

# Separate out pairs of Wyckoff letters and their number of occurrences
sep_n_wyks = [
Expand Down
3 changes: 0 additions & 3 deletions aviary/wren/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,6 @@
RE_SUBST_ONE_PREFIX = r"1\g<1>"
RE_SUBST_ONE_SUFFIX = r"\g<1>1"

# Define substitution patterns
SUBST_ONE_PREFIX = r"1\g<1>"


def split_alpha_numeric(s: str) -> dict[str, list[str]]:
"""Split a string into separate lists of alpha and numeric groups.
Expand Down

0 comments on commit e92ec53

Please sign in to comment.