From 878cd077322e090c9ef5f552b100e4dc45c228c9 Mon Sep 17 00:00:00 2001 From: Thanos Tsiamis Date: Fri, 14 Nov 2025 19:59:34 +0200 Subject: [PATCH] test: Add tests for normalize_distance function in test_utils.py --- tests/test_utils.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 9be26ab..7304494 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,8 +1,10 @@ import unittest +import pytest + from tests import d1_path from valentine.data_sources.utils import get_encoding, get_delimiter, is_date -from valentine.utils.utils import is_sorted, convert_data_type +from valentine.utils.utils import is_sorted, convert_data_type, normalize_distance class TestUtils(unittest.TestCase): @@ -30,3 +32,28 @@ def test_get_delimiter(self): def test_is_date(self): date_str = "2019-04-26 18:03:50.941332" assert is_date(date_str) + + def test_normalize_distance_many_cases(self): + cases = [ + # identical strings + (0, "abc", "abc", 1.0), + # completely different, distance == max length + (3, "abc", "xyz", 0.0), + # partial similarity + (1, "abc", "axc", 1 - 1/3), + # different lengths, distance smaller than max length + (2, "abcd", "ab", 1 - 2/4), + # both empty strings → max(len1, len2) = 0 → denominator becomes 1 + (0, "", "", 1 - 0/1), + # one empty, one non-empty, distance equals length of non-empty + (3, "", "abc", 1 - 3/3), + # distance greater than max length (still valid mathematically) + (5, "abc", "", 1 - 5/3), + # another mixed case + (2, "kitten", "sitting", 1 - 2/7), + ] + + for dist, s1, s2, expected in cases: + with self.subTest(dist=dist, str1=s1, str2=s2): + result = normalize_distance(dist, s1, s2) + self.assertAlmostEqual(result, expected)