Skip to content

Commit

Permalink
Merging dev into main (#19)
Browse files Browse the repository at this point in the history
* Updated README to include blocked-list and restructured

* Adding minor suggestion by Guy

Co-authored-by: Guy Dumais <[email protected]>

* update the readme with the blocked-list details

* update with new REST route

* Update README.md

Co-authored-by: koryf <[email protected]>

* Update README.md

Co-authored-by: koryf <[email protected]>

* keeping the old REST path until 3.0 is released.

* Update pii_dict format according to deid 3.0.0beta3

* Fix failing pre-commit hook fails on directories (#17)

* Add test for get flagged lines

* Skip PII flag check for directories

---------

Co-authored-by: ketakipai <[email protected]>
Co-authored-by: ketakipai <[email protected]>
Co-authored-by: Guy Dumais <[email protected]>
Co-authored-by: Guy Dumais <[email protected]>
Co-authored-by: koryf <[email protected]>
  • Loading branch information
6 people committed Feb 8, 2023
1 parent f2b7fa6 commit 8bf1476
Show file tree
Hide file tree
Showing 8 changed files with 36 additions and 6 deletions.
12 changes: 6 additions & 6 deletions pii_check/pii_check_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def get_payload(content, enabled_entity_list, blocked_list):
def get_flagged_lines(files):
flagged = []
for file in files:
if os.path.exists(file):
if os.path.exists(file) and not os.path.isdir(file):
with open(file, "r") as fp:
lines = fp.readlines()
start_flag = False
Expand Down Expand Up @@ -77,8 +77,8 @@ def locate_pii_in_files(content, files, checked, pii_dict):
for number, line in enumerate(lines, 1):
if content in line:
if (
pii_dict["stt_idx"],
pii_dict["end_idx"],
pii_dict["location"]["stt_idx"],
pii_dict["location"]["end_idx"],
number,
file,
) in checked:
Expand Down Expand Up @@ -115,16 +115,16 @@ def check_for_pii(url, api_key, enabled_entity_list, blocked_list):
continue
for pii_dict in item["entities"]:
line, file = locate_pii_in_files(content, files, checked, pii_dict)
checked.append((pii_dict["stt_idx"], pii_dict["end_idx"], line, file))
checked.append((pii_dict["location"]["stt_idx"], pii_dict["location"]["end_idx"], line, file))
skip = False
for item in flagged:
if line > item[0] and line < item[1] and file == item[2]:
skip = True
break
if skip == False:
msg.append(
f"PII found - type: {pii_dict['best_label']}, line number: {line}, file: {file}, start index: {pii_dict['stt_idx'] + 1}, end "
f"index: {pii_dict['end_idx'] + 1} "
f"PII found - type: {pii_dict['best_label']}, line number: {line}, file: {file}, start index: {pii_dict['location']['stt_idx'] + 1}, end "
f"index: {pii_dict['location']['end_idx'] + 1} "
)

if not msg:
Expand Down
4 changes: 4 additions & 0 deletions requirements-test.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
pytest==7.2.1
pytest-check==2.1.2
python-dotenv==0.19.0
requests==2.28.1
Empty file added tests/__init__.py
Empty file.
1 change: 1 addition & 0 deletions tests/test_data/dir_with_files/file_with_pii.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Credit card number: 1234 5678 9101 1123
10 changes: 10 additions & 0 deletions tests/test_data/dir_with_files/file_with_pii_flag
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
PII_CHECK:OFF
Some content in between the flags. Ideally this content won't be checked for PII.
Below is a dummy PII to check this
Credit card number: 1234 5678 9101 1123
CVV: 123
PII_CHECK:ON

Some content where the check will be performed.
Credit card number: 1234 5678 9101 1123
CVV: 123
1 change: 1 addition & 0 deletions tests/test_data/dir_with_files/file_without_pii.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Here's some content.
1 change: 1 addition & 0 deletions tests/test_data/symlink_of_dir_with_files
13 changes: 13 additions & 0 deletions tests/test_get_flagged_lines.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import pytest_check as check
from pii_check.pii_check_hook import get_flagged_lines


def test_get_flagged_lines():
files = [
"tests/test_data/dir_with_files/file_with_pii.txt", "tests/test_data/dir_with_files/file_without_pii.txt",
"tests/test_data/dir_with_files/file_with_pii_flag_on", "tests/test_data/dir_with_files/file_with_pii_flag_off",
"tests/test_data/dir_with_files/file_with_pii_flag", "tests/test_data/symlink_of_dir_with_files"
]
res = get_flagged_lines(files)
check.equal(res, [(1, 6, 'tests/test_data/dir_with_files/file_with_pii_flag')])

0 comments on commit 8bf1476

Please sign in to comment.