Skip to content

Commit

Permalink
Merge pull request #250 from HopkinsIDD/enhancement/document-test-gem…
Browse files Browse the repository at this point in the history
…pyor-file_paths

Document And Test `gempyor.file_paths`
  • Loading branch information
TimothyWillard committed Jul 15, 2024
2 parents 27c7a5b + 9761bb5 commit 76c6985
Show file tree
Hide file tree
Showing 7 changed files with 475 additions and 165 deletions.
191 changes: 171 additions & 20 deletions flepimop/gempyor_pkg/src/gempyor/file_paths.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,71 @@
import os, pathlib, datetime
"""
This module contains utilities for interacting with and generating file paths
in the context of this package, which saves its output to a very particular
directory structure.
Functions:
- create_file_name: Creates a full file name with extension.
- create_file_name_without_extension: Creates a file name without extension.
- run_id: Generates a run ID based on the current or provided timestamp.
- create_dir_name: Creates a directory name based on given parameters.
"""

from datetime import datetime
import os
from pathlib import Path


def create_file_name(
run_id,
prefix,
index,
ftype,
extension,
inference_filepath_suffix="",
inference_filename_prefix="",
create_directory=True,
):
run_id: str,
prefix: str,
index: str | int,
ftype: str,
extension: str,
inference_filepath_suffix: str = "",
inference_filename_prefix: str = "",
create_directory: bool = True,
) -> str:
"""
Generates a full file name with the given parameters and extension.
Args:
run_id: The unique identifier for the run.
prefix: A prefix for the file path.
index: An index to include in the file name.
ftype: The type of file being created.
extension: The file extension, without the leading period.
inference_filepath_suffix: Suffix for the inference file path. Defaults to "".
inference_filename_prefix: Prefix for the inference file name. Defaults to "".
create_directory: Whether to create the parent directory if it doesn't exist.
Defaults to True.
Returns:
The full file name with extension.
Examples:
>>> from gempyor.file_paths import create_file_name
>>> create_file_name(
... "20240101_000000",
... "abc",
... 1,
... "hosp",
... "parquet",
... "global",
... "jkl",
... create_directory=False,
... )
PosixPath('model_output/abc/hosp/global/jkl000000001.20240101_000000.hosp.parquet')
"""
if create_directory:
os.makedirs(
create_dir_name(run_id, prefix, ftype, inference_filepath_suffix, inference_filename_prefix), exist_ok=True
create_dir_name(
run_id,
prefix,
ftype,
inference_filepath_suffix,
inference_filename_prefix,
),
exist_ok=True,
)

fn_no_ext = create_file_name_without_extension(
Expand All @@ -29,30 +81,129 @@ def create_file_name(


def create_file_name_without_extension(
run_id, prefix, index, ftype, inference_filepath_suffix, inference_filename_prefix, create_directory=True
):
run_id: str,
prefix: str,
index: str | int,
ftype: str,
inference_filepath_suffix: str,
inference_filename_prefix: str,
create_directory: bool = True,
) -> Path:
"""
Generates a file name without the extension.
This function will return the file name to use, but does not actually create the
file.
Args:
run_id: The unique identifier for the run.
prefix: A prefix for the file path.
index: An index to include in the file name.
ftype: The type of file being created.
inference_filepath_suffix: Suffix for the inference file path.
inference_filename_prefix: Prefix for the inference file name.
create_directory: Whether to create the file's parent directory if it doesn't
exist. Defaults to True.
Returns:
The file name without extension as a Path object.
Examples:
>>> from gempyor.file_paths import create_file_name_without_extension
>>> create_file_name_without_extension(
... "20240101_000000",
... "abc",
... 1,
... "hosp",
... "global",
... "jkl",
... create_directory=False,
... )
PosixPath('model_output/abc/hosp/global/jkl000000001.20240101_000000.hosp')
"""
if create_directory:
os.makedirs(
create_dir_name(run_id, prefix, ftype, inference_filepath_suffix, inference_filename_prefix), exist_ok=True
create_dir_name(
run_id,
prefix,
ftype,
inference_filepath_suffix,
inference_filename_prefix,
),
exist_ok=True,
)
filename = pathlib.Path(
filename = Path(
"model_output",
prefix,
ftype,
inference_filepath_suffix,
f"{inference_filename_prefix}{index:>09}.{run_id}.{ftype}",
)
# old: "model_output/%s/%s%09d.%s.%s" % (ftype, prefix, index, run_id, ftype)
return filename


def run_id():
return datetime.datetime.strftime(datetime.datetime.now(), "%Y%m%d_%H%M%S%Z")
def run_id(timestamp: None | datetime = None) -> str:
"""
Generates a run ID based on the current or provided timestamp.
Args:
timestamp: A specific timestamp to use. If `None` this function will use the
current timestamp.
Returns:
The generated run ID.
Examples:
>>> from datetime import datetime, timezone
>>> from gempyor.file_paths import run_id
>>> run_id()
'20240711_160059'
>>> run_id(timestamp=datetime(2024, 1, 1))
'20240101_000000'
>>> run_id(timestamp=datetime(2024, 1, 1, tzinfo=timezone.utc))
'20240101_000000UTC'
"""
if not timestamp:
timestamp = datetime.now()
return datetime.strftime(timestamp, "%Y%m%d_%H%M%S%Z")


def create_dir_name(
run_id: str,
prefix: str,
ftype: str,
inference_filepath_suffix: str,
inference_filename_prefix: str,
) -> str:
"""
Generate a directory name based on the given parameters.
This function will return the directory name to use, but does not actually create
the directory.
Args:
run_id: The unique identifier for the run.
prefix: A prefix for the file path.
ftype: The type of file being created.
inference_filepath_suffix: Suffix for the inference file path.
inference_filename_prefix: Prefix for the inference file name.
Returns:
The directory name.
def create_dir_name(run_id, prefix, ftype, inference_filepath_suffix, inference_filename_prefix):
Examples:
>>> from gempyor.file_paths import create_dir_name
>>> create_dir_name("20240101_000000", "abc", "hosp", "def", "jkl")
'model_output/abc/hosp/def'
"""
return os.path.dirname(
create_file_name_without_extension(
run_id, prefix, 1, ftype, inference_filepath_suffix, inference_filename_prefix, create_directory=False
run_id,
prefix,
1,
ftype,
inference_filepath_suffix,
inference_filename_prefix,
create_directory=False,
)
)
32 changes: 32 additions & 0 deletions flepimop/gempyor_pkg/src/gempyor/testing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""
Unit testing utilities for `gempyor`
This module contains unit testing utilities, mostly pytest fixtures. To use this module
the optional test dependencies must be installed.
"""

from collections.abc import Generator
import os
from tempfile import TemporaryDirectory

import pytest


@pytest.fixture
def change_directory_to_temp_directory() -> Generator[None, None, None]:
"""Change test working directory to a temporary directory
Pytest fixture that will create a temporary directory and change the working
directory to that temporary directory. This fixture also cleans up after itself by
resetting the working directory and removing the temporary directory on test end.
Useful for testing functions that create files relative to the working directory.
Yields:
None
"""
current_dir = os.getcwd()
temp_dir = TemporaryDirectory()
os.chdir(temp_dir.name)
yield
os.chdir(current_dir)
temp_dir.cleanup()
62 changes: 62 additions & 0 deletions flepimop/gempyor_pkg/tests/file_paths/test_create_dir_name.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import os.path
from pathlib import Path

import pytest

from gempyor.file_paths import create_dir_name


class TestCreateDirName:
"""
Unit tests for the `gempyor.file_paths.create_dir_name` function.
"""

@pytest.mark.parametrize(
(
"run_id",
"prefix",
"ftype",
"inference_filepath_suffix",
"inference_filename_prefix",
),
[
("abc", "def", "jkl", "mno", "pqr"),
("20240101_000000", "test0001", "seed", "", ""),
("20240101_000000", "test0002", "seed", "", ""),
("20240101_000000", "test0003", "seed", "", ""),
("20240101_000000", "test0004", "seed", "", ""),
("20240101_000000", "test0005", "hosp", "", ""),
("20240101_000000", "test0006", "hosp", "", ""),
("20240101_000000", "test0007", "hosp", "", ""),
("20240101_000000", "test0008", "hosp", "", ""),
],
)
def test_create_file_name(
self,
run_id: str,
prefix: str,
ftype: str,
inference_filepath_suffix: str,
inference_filename_prefix: str,
) -> None:
# Setup
path = create_dir_name(
run_id=run_id,
prefix=prefix,
ftype=ftype,
inference_filepath_suffix=inference_filepath_suffix,
inference_filename_prefix=inference_filename_prefix,
)
expected_path = str(
Path(
"model_output",
prefix,
ftype,
inference_filepath_suffix,
)
)

# Assertions
assert isinstance(path, str)
assert not os.path.exists(path)
assert path == expected_path
Loading

0 comments on commit 76c6985

Please sign in to comment.