Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 28 additions & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import unittest

import pytest
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need to import pytest, we are using unittest in this.


from tests import d1_path
from valentine.data_sources.utils import get_encoding, get_delimiter, is_date
from valentine.utils.utils import is_sorted, convert_data_type
from valentine.utils.utils import is_sorted, convert_data_type, normalize_distance


class TestUtils(unittest.TestCase):
Expand Down Expand Up @@ -30,3 +32,28 @@ def test_get_delimiter(self):
def test_is_date(self):
date_str = "2019-04-26 18:03:50.941332"
assert is_date(date_str)

def test_normalize_distance_many_cases(self):
cases = [
# identical strings
(0, "abc", "abc", 1.0),
# completely different, distance == max length
(3, "abc", "xyz", 0.0),
# partial similarity
(1, "abc", "axc", 1 - 1/3),
# different lengths, distance smaller than max length
(2, "abcd", "ab", 1 - 2/4),
# both empty strings → max(len1, len2) = 0 → denominator becomes 1
(0, "", "", 1 - 0/1),
# one empty, one non-empty, distance equals length of non-empty
(3, "", "abc", 1 - 3/3),
# distance greater than max length (still valid mathematically)
(5, "abc", "", 1 - 5/3),
# another mixed case
(2, "kitten", "sitting", 1 - 2/7),
]

for dist, s1, s2, expected in cases:
with self.subTest(dist=dist, str1=s1, str2=s2):
result = normalize_distance(dist, s1, s2)
self.assertAlmostEqual(result, expected)