Skip to content

Commit

Permalink
light name normalisation before default compare
Browse files Browse the repository at this point in the history
  • Loading branch information
pudo committed May 28, 2024
1 parent 8fdf13b commit b166cff
Showing 1 changed file with 6 additions and 1 deletion.
7 changes: 6 additions & 1 deletion followthemoney/types/name.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from rigour.names import pick_name
from normality import slugify
from normality.cleaning import collapse_spaces, strip_quotes
from fingerprints.cleanup import clean_name_light

from followthemoney.types.common import PropertyType
from followthemoney.util import dampen
Expand Down Expand Up @@ -48,7 +49,11 @@ def _specificity(self, value: str) -> float:

def compare(self, left: str, right: str) -> float:
"""Compare two names for similarity."""
return levenshtein_similarity(left, right)
left_clean = clean_name_light(left)
right_clean = clean_name_light(right)
if left_clean is None or right_clean is None:
return 0.0
return levenshtein_similarity(left_clean, right_clean)

def node_id(self, value: str) -> Optional[str]:
slug = slugify(value)
Expand Down

0 comments on commit b166cff

Please sign in to comment.