Skip to content

Commit

Permalink
feat: g2p convert now accepts --file option to read a file
Browse files Browse the repository at this point in the history
This replaces the formerly hidden feature of heuristically detecting
existing .txt files.

Also fix test_update_schema to:
 - be quiet unless there's an error
 - correctly catch errors and display the problem filename when there is
   an error.
  • Loading branch information
joanise committed Jul 11, 2024
1 parent 55b80de commit ab02e4a
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 18 deletions.
33 changes: 22 additions & 11 deletions g2p/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,13 @@ def generate_mapping( # noqa: C901
is_flag=True,
help="Show all the conversion steps applied.",
)
@click.option(
"--file",
"-f",
default=False,
is_flag=True,
help="Read input from plain-text file INPUT_TEXT.",
)
@click.option(
"--check/--no-check",
"-c",
Expand Down Expand Up @@ -478,6 +485,7 @@ def convert( # noqa: C901
input_text,
tok,
check,
file,
debugger,
pretty_edges,
tok_lang,
Expand Down Expand Up @@ -531,20 +539,23 @@ def convert( # noqa: C901
raise click.UsageError(
f"Path between '{in_lang}' and '{out_lang}' does not exist"
)
# Figure if the input text is on the command line or in a file
input_text_is_a_file = (
os.path.exists(input_text)
and input_text.endswith("txt")
or re.match(r"/dev/(fd/[0-9]*|stdin)", input_text)
)
to_close = None
try:
if input_text_is_a_file:
to_close = lines = open(input_text, encoding="utf8")
elif input_text == "-":
lines = sys.stdin
if file:
if input_text == "-":
lines = sys.stdin

Check warning on line 546 in g2p/cli.py

View check run for this annotation

Codecov / codecov/patch

g2p/cli.py#L546

Added line #L546 was not covered by tests
else:
try:
to_close = lines = open(input_text, encoding="utf8")
except FileNotFoundError as e:
raise click.UsageError(f"Could not open file {input_text}: {e}")
else:
lines = [input_text]
if os.path.exists(input_text) and input_text.endswith(".txt"):
LOGGER.warning(
"The old heuristic for detecting file input is deprecated, "
f"specify --file if you meant to read your input text from file {input_text}."
)
# Determine which tokenizer to use, if any
if tok is not None and not tok and tok_lang is not None:
raise click.UsageError(
Expand Down Expand Up @@ -572,7 +583,7 @@ def convert( # noqa: C901
if len(outputs) > 1:
click.echo(pprint.pformat(outputs, indent=4))
else:
click.echo(tg.output_string, nl=not input_text_is_a_file)
click.echo(tg.output_string, nl=not file)
finally:
if to_close is not None:
to_close.close()
Expand Down
21 changes: 14 additions & 7 deletions g2p/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import jsonschema
import yaml
from click.testing import CliRunner
from tqdm import tqdm

from g2p._version import VERSION
from g2p.cli import (
Expand Down Expand Up @@ -122,13 +121,14 @@ def test_update_schema(self):
encoding="utf8",
) as f:
schema = json.load(f)
for config in tqdm(
Path(LANGS_DIR).glob("**/config-g2p.yaml"),
desc="Validating all configurations against current schema",
):
# Validate all configurations against the current schema, quietly unless there's an error:
for config in Path(LANGS_DIR).glob("**/config-g2p.yaml"):
with open(config, encoding="utf8") as f:
config_yaml = yaml.safe_load(f)
self.assertIsNone(jsonschema.validate(config_yaml, schema=schema))
try:
jsonschema.validate(config_yaml, schema=schema)
except jsonschema.exceptions.ValidationError:
self.fail(f"Error validating {config}")

def test_convert(self):
langs_to_test = load_public_test_data()
Expand Down Expand Up @@ -490,7 +490,7 @@ def test_show_mappings(self):

def test_convert_from_file(self):
input_file = os.path.join(DATA_DIR, "fra_simple.txt")
results = self.runner.invoke(convert, [input_file, "fra", "fra-ipa"])
results = self.runner.invoke(convert, [input_file, "fra", "fra-ipa", "--file"])
self.assertEqual(results.exit_code, 0)
self.assertIn("fʁɑ̃sɛ", results.output)
with open(input_file, "r", encoding="utf8") as f:
Expand All @@ -501,6 +501,13 @@ def test_convert_from_file(self):
# The output should have the same number of lines as the input
self.assertEqual(lines_in, len(results.output.splitlines()))

with self.assertLogs(LOGGER, "WARNING"):
self.runner.invoke(convert, [input_file, "fra", "fra-ipa"])
result = self.runner.invoke(
convert, ["does_not_exist.txt", "fra", "fra-ipa", "--file"]
)
self.assertIn("No such file or directory", result.output)

def test_convert_errors(self):
"""Exercise code handling error situations in g2p convert"""
results = self.runner.invoke(convert, "asdf bad_in_lang eng-ipa")
Expand Down

0 comments on commit ab02e4a

Please sign in to comment.