forked from airbytehq/airbyte
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
support semi incremental by adding extractor record filter (airbytehq…
…#13520) * support semi incremental by adding extractor record filter * refactor extractor into a record_selector that supports extraction and filtering of response records
- Loading branch information
Showing
11 changed files
with
216 additions
and
29 deletions.
There are no files selected for viewing
15 changes: 0 additions & 15 deletions
15
airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/http_extractor.py
This file was deleted.
Oops, something went wrong.
21 changes: 21 additions & 0 deletions
21
airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/http_selector.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# | ||
# Copyright (c) 2022 Airbyte, Inc., all rights reserved. | ||
# | ||
|
||
from abc import ABC, abstractmethod | ||
from typing import Any, List, Mapping | ||
|
||
import requests | ||
from airbyte_cdk.sources.declarative.types import Record | ||
|
||
|
||
class HttpSelector(ABC): | ||
@abstractmethod | ||
def select_records( | ||
self, | ||
response: requests.Response, | ||
stream_state: Mapping[str, Any], | ||
stream_slice: Mapping[str, Any] = None, | ||
next_page_token: Mapping[str, Any] = None, | ||
) -> List[Record]: | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
24 changes: 24 additions & 0 deletions
24
airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/record_filter.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# | ||
# Copyright (c) 2022 Airbyte, Inc., all rights reserved. | ||
# | ||
|
||
from typing import Any, List, Mapping | ||
|
||
from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean | ||
from airbyte_cdk.sources.declarative.types import Record | ||
|
||
|
||
class RecordFilter: | ||
def __init__(self, config, condition: str = None): | ||
self._config = config | ||
self._filter_interpolator = InterpolatedBoolean(condition) | ||
|
||
def filter_records( | ||
self, | ||
records: List[Record], | ||
stream_state: Mapping[str, Any], | ||
stream_slice: Mapping[str, Any] = None, | ||
next_page_token: Mapping[str, Any] = None, | ||
) -> List[Record]: | ||
kwargs = {"stream_state": stream_state, "stream_slice": stream_slice, "next_page_token": next_page_token} | ||
return [record for record in records if self._filter_interpolator.eval(self._config, record=record, **kwargs)] |
36 changes: 36 additions & 0 deletions
36
airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/record_selector.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
# | ||
# Copyright (c) 2022 Airbyte, Inc., all rights reserved. | ||
# | ||
|
||
from typing import Any, List, Mapping | ||
|
||
import requests | ||
from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector | ||
from airbyte_cdk.sources.declarative.extractors.jello import JelloExtractor | ||
from airbyte_cdk.sources.declarative.extractors.record_filter import RecordFilter | ||
from airbyte_cdk.sources.declarative.types import Record | ||
|
||
|
||
class RecordSelector(HttpSelector): | ||
""" | ||
Responsible for translating an HTTP response into a list of records by extracting records from the response and optionally filtering | ||
records based on a heuristic. | ||
""" | ||
|
||
def __init__(self, extractor: JelloExtractor, record_filter: RecordFilter = None): | ||
self._extractor = extractor | ||
self._record_filter = record_filter | ||
|
||
def select_records( | ||
self, | ||
response: requests.Response, | ||
stream_state: Mapping[str, Any], | ||
stream_slice: Mapping[str, Any] = None, | ||
next_page_token: Mapping[str, Any] = None, | ||
) -> List[Record]: | ||
all_records = self._extractor.extract_records(response) | ||
if self._record_filter: | ||
return self._record_filter.filter_records( | ||
all_records, stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token | ||
) | ||
return all_records |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
48 changes: 48 additions & 0 deletions
48
airbyte-cdk/python/unit_tests/sources/declarative/extractors/test_record_filter.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
# | ||
# Copyright (c) 2022 Airbyte, Inc., all rights reserved. | ||
# | ||
|
||
import pytest | ||
from airbyte_cdk.sources.declarative.extractors.record_filter import RecordFilter | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"test_name, filter_template, records, expected_records", | ||
[ | ||
( | ||
"test_using_state_filter", | ||
"{{ record['created_at'] > stream_state['created_at'] }}", | ||
[{"id": 1, "created_at": "06-06-21"}, {"id": 2, "created_at": "06-07-21"}, {"id": 3, "created_at": "06-08-21"}], | ||
[{"id": 2, "created_at": "06-07-21"}, {"id": 3, "created_at": "06-08-21"}], | ||
), | ||
( | ||
"test_with_slice_filter", | ||
"{{ record['last_seen'] >= stream_slice['last_seen'] }}", | ||
[{"id": 1, "last_seen": "06-06-21"}, {"id": 2, "last_seen": "06-07-21"}, {"id": 3, "last_seen": "06-10-21"}], | ||
[{"id": 3, "last_seen": "06-10-21"}], | ||
), | ||
( | ||
"test_with_next_page_token_filter", | ||
"{{ record['id'] >= next_page_token['last_seen_id'] }}", | ||
[{"id": 11}, {"id": 12}, {"id": 13}, {"id": 14}, {"id": 15}], | ||
[{"id": 14}, {"id": 15}], | ||
), | ||
( | ||
"test_missing_filter_fields_return_no_results", | ||
"{{ record['id'] >= next_page_token['path_to_nowhere'] }}", | ||
[{"id": 11}, {"id": 12}, {"id": 13}, {"id": 14}, {"id": 15}], | ||
[], | ||
), | ||
], | ||
) | ||
def test_record_filter(test_name, filter_template, records, expected_records): | ||
config = {"response_override": "stop_if_you_see_me"} | ||
stream_state = {"created_at": "06-06-21"} | ||
stream_slice = {"last_seen": "06-10-21"} | ||
next_page_token = {"last_seen_id": 14} | ||
record_filter = RecordFilter(config=config, condition=filter_template) | ||
|
||
actual_records = record_filter.filter_records( | ||
records, stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token | ||
) | ||
assert actual_records == expected_records |
58 changes: 58 additions & 0 deletions
58
airbyte-cdk/python/unit_tests/sources/declarative/extractors/test_record_selector.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
# | ||
# Copyright (c) 2022 Airbyte, Inc., all rights reserved. | ||
# | ||
|
||
import json | ||
|
||
import pytest | ||
import requests | ||
from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder | ||
from airbyte_cdk.sources.declarative.extractors.jello import JelloExtractor | ||
from airbyte_cdk.sources.declarative.extractors.record_filter import RecordFilter | ||
from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"test_name, transform_template, filter_template, body, expected_records", | ||
[ | ||
( | ||
"test_with_extractor_and_filter", | ||
"_.data", | ||
"{{ record['created_at'] > stream_state['created_at'] }}", | ||
{"data": [{"id": 1, "created_at": "06-06-21"}, {"id": 2, "created_at": "06-07-21"}, {"id": 3, "created_at": "06-08-21"}]}, | ||
[{"id": 2, "created_at": "06-07-21"}, {"id": 3, "created_at": "06-08-21"}], | ||
), | ||
( | ||
"test_no_record_filter_returns_all_records", | ||
"_.data", | ||
None, | ||
{"data": [{"id": 1, "created_at": "06-06-21"}, {"id": 2, "created_at": "06-07-21"}]}, | ||
[{"id": 1, "created_at": "06-06-21"}, {"id": 2, "created_at": "06-07-21"}], | ||
), | ||
], | ||
) | ||
def test_record_filter(test_name, transform_template, filter_template, body, expected_records): | ||
config = {"response_override": "stop_if_you_see_me"} | ||
stream_state = {"created_at": "06-06-21"} | ||
stream_slice = {"last_seen": "06-10-21"} | ||
next_page_token = {"last_seen_id": 14} | ||
|
||
response = create_response(body) | ||
decoder = JsonDecoder() | ||
extractor = JelloExtractor(transform=transform_template, decoder=decoder, config=config, kwargs={}) | ||
if filter_template is None: | ||
record_filter = None | ||
else: | ||
record_filter = RecordFilter(config=config, condition=filter_template) | ||
record_selector = RecordSelector(extractor=extractor, record_filter=record_filter) | ||
|
||
actual_records = record_selector.select_records( | ||
response=response, stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token | ||
) | ||
assert actual_records == expected_records | ||
|
||
|
||
def create_response(body): | ||
response = requests.Response() | ||
response._content = json.dumps(body).encode("utf-8") | ||
return response |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters