Skip to content

Commit

Permalink
Merge pull request #14 from joocer/1.6.0
Browse files Browse the repository at this point in the history
1.6.0
  • Loading branch information
joocer authored Sep 5, 2023
2 parents 0ba9ba8 + e7eb49b commit 088e5bb
Show file tree
Hide file tree
Showing 11 changed files with 97 additions and 195 deletions.
13 changes: 7 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ Expectations can be used alongside, or in place of a schema validator, however E
## Provided Expectations

- **expect_column_to_exist** (column)
- **expect_column_names_to_match_set** (columns, ignore_excess:true)
- **expect_column_values_to_not_be_null** (column)
- **expect_column_values_to_be_of_type** (column, expected_type, ignore_nulls:true)
- **expect_column_values_to_be_in_type_list** (column, type_list, ignore_nulls:true)
Expand All @@ -50,18 +49,20 @@ Data Expectations has no external dependencies, can be used ad hoc and in-the-mo

~~~python
import data_expectations as de
from data_expectations import Expectation
from data_expectations import Behaviors

TEST_DATA = {"name":"charles","age":12}
TEST_DATA = {"name": "charles", "age": 12}

set_of_expectations = [
{"expectation": "expect_column_to_exist", "column": "name"},
{"expectation": "expect_column_to_exist", "column": "age"},
{"expectation": "expect_column_values_to_be_between", "column": "age", "minimum": 0, "maximum": 120},
Expectation(Behaviors.EXPECT_COLUMN_TO_EXIST, column="name"),
Expectation(Behaviors.EXPECT_COLUMN_TO_EXIST, column="age"),
Expectation(Behaviors.EXPECT_COLUMN_VALUES_TO_BE_BETWEEN, column="age", config={"minimum": 0, "maximum": 120}),
]

expectations = de.Expectations(set_of_expectations)
try:
de.evaluate_record(expectations, TEST_DATA)
except de.errors.ExpectationNotMetError:
except de.errors.ExpectationNotMetError: # pragma: no cover
print("Data Didn't Meet Expectations")
~~~
22 changes: 21 additions & 1 deletion data_expectations/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,25 @@
from enum import Enum


class Behaviors(str, Enum):
EXPECT_COLUMN_TO_EXIST = "expect_column_to_exist"
EXPECT_COLUMN_VALUES_TO_NOT_BE_NULL = "expect_column_values_to_not_be_null"
EXPECT_COLUMN_VALUES_TO_BE_OF_TYPE = "expect_column_values_to_be_of_type"
EXPECT_COLUMN_VALUES_TO_BE_IN_TYPE_LIST = "expect_column_values_to_be_in_type_list"
EXPECT_COLUMN_VALUES_TO_BE_MORE_THAN = "expect_column_values_to_be_more_than"
EXPECT_COLUMN_VALUES_TO_BE_LESS_THAN = "expect_column_values_to_be_less_than"
EXPECT_COLUMN_VALUES_TO_BE_BETWEEN = "expect_column_values_to_be_between"
EXPECT_COLUMN_VALUES_TO_BE_INCREASING = "expect_column_values_to_be_increasing"
EXPECT_COLUMN_VALUES_TO_BE_DECREASING = "expect_column_values_to_be_decreasing"
EXPECT_COLUMN_VALUES_TO_BE_IN_SET = "expect_column_values_to_be_in_set"
EXPECT_COLUMN_VALUES_TO_MATCH_REGEX = "expect_column_values_to_match_regex"
EXPECT_COLUMN_VALUES_TO_MATCH_LIKE = "expect_column_values_to_match_like"
EXPECT_COLUMN_VALUES_LENGTH_TO_BE_BE = "expect_column_values_length_to_be_be"
EXPECT_COLUMN_VALUES_LENGTH_TO_BE_BETWEEN = "expect_column_values_length_to_be_between"


from data_expectations.internals.expectations import Expectations
from data_expectations.internals.models import Expectation, ColumnExpectation
from data_expectations.internals.models import Expectation

from data_expectations.internals.evaluate import evaluate_list
from data_expectations.internals.evaluate import evaluate_record
1 change: 0 additions & 1 deletion data_expectations/internals/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@

7 changes: 1 addition & 6 deletions data_expectations/internals/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

import typing

from data_expectations import ColumnExpectation
from data_expectations import Expectations
from data_expectations.errors import ExpectationNotMetError
from data_expectations.errors import ExpectationNotUnderstoodError
Expand Down Expand Up @@ -40,11 +39,7 @@ def evaluate_record(expectations: Expectations, record: dict, suppress_errors: b
if expectation not in ALL_EXPECTATIONS:
raise ExpectationNotUnderstoodError(expectation=expectation)

base_config = {"row": record, **expectation_definition.config}

# Conditionally include the 'column' parameter
if isinstance(expectation_definition, ColumnExpectation):
base_config["column"] = expectation_definition.column
base_config = {"row": record, "column": expectation_definition.column, **expectation_definition.config}

if not ALL_EXPECTATIONS[expectation](**base_config):
if not suppress_errors:
Expand Down
36 changes: 3 additions & 33 deletions data_expectations/internals/expectations.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
from typing import List
from typing import Union

from data_expectations.internals.models import ColumnExpectation
from data_expectations.internals.models import Expectation
from data_expectations.internals.text import sql_like_to_regex

Expand Down Expand Up @@ -70,12 +69,9 @@ def __init__(self, set_of_expectations: Iterable[Union[str, dict, Expectation]])
if isinstance(exp, str): # Parse JSON string
exp = json.loads(exp)

if isinstance(exp, dict): # Convert dict to Expectation or ColumnExpectation
if "column" in exp:
self.set_of_expectations.append(ColumnExpectation.load(exp))
else:
self.set_of_expectations.append(Expectation.load(exp))
elif is_dataclass(exp) and (isinstance(exp, Expectation) or isinstance(exp, ColumnExpectation)):
if isinstance(exp, dict): # Convert dict to Expectation
self.set_of_expectations.append(Expectation.load(exp))
elif is_dataclass(exp) and isinstance(exp, Expectation):
self.set_of_expectations.append(exp)

@classmethod
Expand All @@ -100,32 +96,6 @@ def reset():
# COLUMN EXPECTATIONS
###################################################################################

@staticmethod
def expect_column_names_to_match_set(
*,
row: dict,
columns: list,
ignore_excess: bool = True,
**kwargs,
):
"""
Confirms that the columns in a record match the given set.
Parameters:
row: dict
The record to be checked.
columns: list
List of expected column names.
ignore_excess: bool
If True, ignores columns not in the list. If False, ensures columns match the list exactly.
Returns: bool
True if expectation is met, False otherwise.
"""
if ignore_excess:
return all(key in columns for key in row.keys())
return sorted(columns) == sorted(list(row.keys()))

@staticmethod
def expect_column_to_exist(
*,
Expand Down
81 changes: 17 additions & 64 deletions data_expectations/internals/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,101 +18,54 @@
from typing import Type
from typing import Union

from data_expectations import Behaviors


@dataclass
class Expectation:
"""
Represents a general Data Expectation.
"""

expectation: str
expectation: Behaviors
column: str
config: Dict[str, Any] = field(default_factory=dict)
ignore_nulls: bool = True

def to_dict(self) -> Dict[str, Any]:
def dump(self) -> Dict[str, Any]:
"""
Converts the Expectation instance to a dictionary representation.
Returns:
A dictionary containing the expectation and its configuration.
"""
return {"expectation": self.expectation, **self.config}
return {
"expectation": self.expectation,
"column": self.column,
"ignore_nulls": self.ignore_nulls,
**self.config,
}

@classmethod
def load_base(cls: Type["Expectation"], serialized: Union[Dict[str, Any], str]) -> Dict[str, Any]:
def load(cls: Type["Expectation"], serialized: Union[Dict[str, Any], str]) -> "Expectation":
"""
Loads a serialized Expectation and returns it as a dictionary.
Loads a serialized Expectation and returns it as an instance.
Parameters:
serialized: Serialized Expectation as a dictionary or JSON string.
Returns:
A dictionary representation of the serialized Expectation.
An Expectation instance populated with the serialized data.
"""
if isinstance(serialized, str):
serialized = dict(json.loads(serialized))
serialized_copy: dict = deepcopy(serialized)
if "expectation" not in serialized_copy:
raise ValueError("Missing 'expectation' key in Expectation.")
return serialized_copy

@classmethod
def load(cls: Type["Expectation"], serialized: Union[Dict[str, Any], str]) -> "Expectation":
"""
Loads a serialized Expectation and returns it as an instance.
Parameters:
serialized: Serialized Expectation as a dictionary or JSON string.
Returns:
An Expectation instance populated with the serialized data.
"""
serialized_copy = cls.load_base(serialized)
expectation = serialized_copy.pop("expectation")
config = serialized_copy
return cls(expectation=expectation, config=config)


class ColumnExpectation(Expectation):
"""
Represents a Data Expectation related to a specific column.
"""

def __init__(self, expectation: str, column: str, config: Dict[str, Any] = None):
"""
Initializes a ColumnExpectation instance.
Parameters:
expectation: The expectation type as a string.
column: The column the expectation applies to.
config: Additional configuration as a dictionary.
"""
super().__init__(expectation, config or {})
self.column = column

def to_dict(self) -> Dict[str, Any]:
"""
Converts the ColumnExpectation instance to a dictionary representation.
Returns:
A dictionary containing the expectation, column, and its configuration.
"""
return {"expectation": self.expectation, "column": self.column, **self.config}

@classmethod
def load(cls: Type["ColumnExpectation"], serialized: Union[Dict[str, Any], str]) -> "ColumnExpectation":
"""
Loads a serialized ColumnExpectation and returns it as an instance.
Parameters:
serialized: Serialized ColumnExpectation as a dictionary or JSON string.
Returns:
A ColumnExpectation instance populated with the serialized data.
"""
serialized_copy = cls.load_base(serialized)
if "column" not in serialized_copy:
raise ValueError("Missing 'column' key in Expectation.")
expectation = serialized_copy.pop("expectation")
column = serialized_copy.pop("column")
ignore_nulls = serialized_copy.pop("ignore_nulls", True)
config = serialized_copy
return cls(expectation=expectation, column=column, config=config)
return cls(expectation=expectation, column=column, ignore_nulls=ignore_nulls, config=config)
2 changes: 1 addition & 1 deletion data_expectations/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@
# Store the version here so:
# 1) we don't load dependencies by storing it in __init__.py
# 2) we can import it in setup.py for the same reason
__version__ = "1.5.0"
__version__ = "1.6.0"

# nodoc - don't add to the documentation wiki
23 changes: 22 additions & 1 deletion tests/test_documentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
sys.path.insert(1, os.path.join(sys.path[0], ".."))


def test_example():
def test_example_legacy():
import data_expectations as de

TEST_DATA = {"name": "charles", "age": 12}
Expand All @@ -30,6 +30,27 @@ def test_example():
print("Data Didn't Meet Expectations")


def test_example():
import data_expectations as de
from data_expectations import Expectation
from data_expectations import Behaviors

TEST_DATA = {"name": "charles", "age": 12}

set_of_expectations = [
Expectation(Behaviors.EXPECT_COLUMN_TO_EXIST, column="name"),
Expectation(Behaviors.EXPECT_COLUMN_TO_EXIST, column="age"),
Expectation(Behaviors.EXPECT_COLUMN_VALUES_TO_BE_BETWEEN, column="age", config={"minimum": 0, "maximum": 120}),
]

expectations = de.Expectations(set_of_expectations)
try:
de.evaluate_record(expectations, TEST_DATA)
except de.errors.ExpectationNotMetError: # pragma: no cover
print("Data Didn't Meet Expectations")


if __name__ == "__main__": # pragma: no cover
test_example()
test_example_legacy()
print("✅ okay")
31 changes: 0 additions & 31 deletions tests/test_expect_column_names_to_match_set.py

This file was deleted.

Loading

0 comments on commit 088e5bb

Please sign in to comment.