Skip to content

Commit

Permalink
Add mode kwarg to load_string
Browse files Browse the repository at this point in the history
  • Loading branch information
kg583 committed Jun 28, 2024
1 parent 76bb3bd commit 610a9d9
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 10 deletions.
5 changes: 3 additions & 2 deletions tivars/tokenizer/encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@


def encode(string: str, *,
trie: TokenTrie = None, mode: str = "smart", normalize: bool = True) -> tuple[bytes, OsVersion]:
trie: TokenTrie = None, mode: str = None, normalize: bool = True) -> tuple[bytes, OsVersion]:
"""
Encodes a string of token represented in text into a byte stream and its minimum supported OS version
Encodes a string of tokens represented as text into a byte stream and its minimum supported OS version
Tokenization is performed using one of three procedures, dictated by ``mode``:
- ``max``: Always munch maximally, i.e. consume the most input possible to produce a token
Expand Down Expand Up @@ -44,6 +44,7 @@ def encode(string: str, *,

string = _normalize(string) if normalize else string
trie = trie or TI_84PCE.get_trie()
mode = mode or "smart"

data = b''
since = OsVersions.INITIAL
Expand Down
27 changes: 19 additions & 8 deletions tivars/types/tokenized.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def decode(data: bytes, *, lang: str = "en", mode: str = "display") -> str | byt
return decode(data, lang=lang, mode=mode)[0]

@staticmethod
def encode(string: str, *, model: TIModel = None, lang: str = None, mode: str = "max") -> bytes:
def encode(string: str, *, model: TIModel = None, lang: str = None, mode: str = None) -> bytes:
"""
Encodes a string of token represented in text into a byte stream
Expand All @@ -85,7 +85,7 @@ def encode(string: str, *, model: TIModel = None, lang: str = None, mode: str =
:param string: The text string to encode
:param model: The model to target when encoding (defaults to no specific model)
:param lang: The language used in ``string`` (defaults to English, ``en``)
:param mode: The tokenization mode to use (defaults to ``max``)
:param mode: The tokenization mode to use (defaults to ``smart``)
:return: A stream of token bytes
"""

Expand Down Expand Up @@ -149,8 +149,19 @@ def load_bytes(self, data: bytes | BytesIO):
BytesWarning)

@Loader[str]
def load_string(self, string: str, *, model: TIModel = None, lang: str = None):
self.data = self.encode(string, model=model, lang=lang)
def load_string(self, string: str, *, model: TIModel = None, lang: str = None, mode: str = None):
"""
Loads this entry from a string representation
For detailed information on tokenization modes, see `tivars.tokenizer.encode`.
:param string: The string to load
:param model: The model to target when encoding (defaults to no specific model)
:param lang: The language used in ``string`` (defaults to English, ``en``)
:param mode: The tokenization mode to use (defaults to ``smart``)
"""

self.data = self.encode(string, model=model, lang=lang, mode=mode)

def string(self) -> str:
return format(self, "")
Expand Down Expand Up @@ -246,8 +257,8 @@ def name(self, value) -> str:
return value.capitalize()

@Loader[str]
def load_string(self, string: str, *, model: TIModel = None):
super().load_string(string.strip("\""))
def load_string(self, string: str, *, model: TIModel = None, lang: str = None, mode: str = None):
super().load_string(string.strip("\""), model=model, lang=lang, mode=mode)

def string(self) -> str:
return f"\"{super().string()}\""
Expand Down Expand Up @@ -317,12 +328,12 @@ def load_bytes(self, data: bytes | BytesIO):
BytesWarning)

@Loader[str]
def load_string(self, string: str, *, model: TIModel = None, lang: str = None):
def load_string(self, string: str, *, model: TIModel = None, lang: str = None, mode: str = None):
if not self.is_tokenized:
warn("ASM programs may not have tokenized data.",
UserWarning)

super().load_string(string, model=model, lang=lang)
super().load_string(string, model=model, lang=lang, mode=mode)

def string(self) -> str:
string = super().string()
Expand Down

0 comments on commit 610a9d9

Please sign in to comment.