Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 29 additions & 6 deletions data_expectations/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,46 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Any, Dict, Optional


class ExpectationNotMetError(Exception):
"""Represents with an expection has failed to have been met"""
"""Raised when an expectation has failed to be met."""

def __init__(self, expectation, record):
def __init__(self, expectation: str, record: Dict[str, Any], details: Optional[str] = None):
self.expectation = expectation
self.record = record
self.details = details

base_message = f"Record didn't meet expectation '{expectation}'"
if details:
base_message += f": {details}"

# Truncate very long records for readability
record_str = str(record)
if len(record_str) > 200:
record_str = record_str[:200] + "..."

message = f"Record didn't meet expectations `{expectation}` ({str(record)})"
message = f"{base_message}\nRecord: {record_str}"
super().__init__(message)


class ExpectationNotUnderstoodError(Exception):
"""Represents when an expectation isn't understood"""
"""Raised when an expectation isn't understood or recognized."""

def __init__(self, expectation):
def __init__(self, expectation: str, available_expectations: Optional[list] = None):
self.expectation = expectation
self.available_expectations = available_expectations or []

message = f"Expectation not understood: '{expectation}'"
if available_expectations:
# Show some suggestions
suggestions = [exp for exp in available_expectations if expectation.lower() in exp.lower()]
if suggestions:
message += f"\nDid you mean one of: {suggestions[:3]}"
else:
message += f"\nAvailable expectations: {available_expectations[:5]}"
if len(available_expectations) > 5:
message += f" (and {len(available_expectations) - 5} more)"

message = f"Expectation not understood `{expectation}`"
super().__init__(message)
73 changes: 64 additions & 9 deletions data_expectations/internals/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,25 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import typing
from typing import Dict, Any

from data_expectations import Expectations
from data_expectations import Behaviors
from data_expectations.errors import ExpectationNotMetError
from data_expectations.errors import ExpectationNotUnderstoodError

ALL_EXPECTATIONS = Expectations.all_expectations()
# Cache the expectations dictionary for better performance
_EXPECTATIONS_CACHE: Dict[str, Any] = {}


def _get_expectations() -> Dict[str, Any]:
"""Get cached expectations dictionary."""
global _EXPECTATIONS_CACHE
if not _EXPECTATIONS_CACHE:
_EXPECTATIONS_CACHE = Expectations.all_expectations()
return _EXPECTATIONS_CACHE


def evaluate_record(expectations: Expectations, record: dict, suppress_errors: bool = False) -> bool:
Expand All @@ -26,25 +38,59 @@ def evaluate_record(expectations: Expectations, record: dict, suppress_errors: b
Args:
expectations: The Expectations instance.
record: The dictionary record to be tested.
all_expectations: The dictionary of all available expectations.
suppress_errors: Whether to suppress expectation errors and return False instead.

Returns:
True if all expectations are met, False otherwise.

Raises:
ExpectationNotUnderstoodError: If an expectation is not recognized.
ExpectationNotMetError: If an expectation fails and suppress_errors is False.
TypeError: If record is not a dictionary.
"""
all_expectations = _get_expectations()

# Check for unknown expectations first (before type checking record)
# This maintains backward compatibility with tests that rely on this behavior
for expectation_definition in expectations.set_of_expectations:
# get the name of the expectation
# get the name of the expectation - handle both Behaviors enum and string names
expectation = expectation_definition.expectation
if isinstance(expectation, Behaviors):
expectation_name = expectation.value
else:
expectation_name = expectation

if expectation_name not in all_expectations:
available = list(all_expectations.keys())
raise ExpectationNotUnderstoodError(expectation_name, available)

if expectation not in ALL_EXPECTATIONS:
raise ExpectationNotUnderstoodError(expectation=expectation)
# Now check record type
if not isinstance(record, dict):
if not suppress_errors:
raise TypeError(f"Record must be a dictionary, got {type(record)}")
return False

# Evaluate each expectation against the record
for expectation_definition in expectations.set_of_expectations:
expectation = expectation_definition.expectation
if isinstance(expectation, Behaviors):
expectation_name = expectation.value
else:
expectation_name = expectation

base_config = {"row": record, "column": expectation_definition.column, **expectation_definition.config}

if not ALL_EXPECTATIONS[expectation](**base_config):
try:
result = all_expectations[expectation_name](**base_config)
if not result:
if not suppress_errors:
raise ExpectationNotMetError(expectation_name, record)
return False # data failed to meet expectation
except Exception as e:
if not suppress_errors:
raise ExpectationNotMetError(expectation, record)
return False # data failed to meet expectation
# Wrap unexpected errors with more context
raise ExpectationNotMetError(expectation_name, record, str(e)) from e
return False

return True

Expand All @@ -60,5 +106,14 @@ def evaluate_list(expectations: Expectations, dictset: typing.Iterable[dict], su

Returns:
True if all records meet all Expectations, False otherwise.

Raises:
ExpectationNotUnderstoodError: If an expectation is not recognized.
ExpectationNotMetError: If an expectation fails and suppress_errors is False.
"""
return all(evaluate_record(expectations, record, suppress_errors) for record in dictset)
try:
return all(evaluate_record(expectations, record, suppress_errors) for record in dictset)
except (ExpectationNotUnderstoodError, ExpectationNotMetError):
# Re-raise these specific errors even if suppress_errors is True
# as they indicate configuration issues, not data validation issues
raise
Loading
Loading