Skip to content

Commit

Permalink
DAG-2865 binary vs nonbinary artifact stream handling with unit test (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
aldo-aguilar authored Sep 25, 2023
1 parent f995b5b commit 31839e4
Show file tree
Hide file tree
Showing 8 changed files with 141 additions and 8 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

## 3.6.15

- Added support for binary and nonbinary artifact streams.

## 3.6.14

- Fix bug in `Version` to always initialize member `build_variants_map`.
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "evergreen.py"
version = "3.6.14"
version = "3.6.15"
description = "Python client for the Evergreen API"
authors = [
"Dev Prod DAG <[email protected]>",
Expand Down
24 changes: 19 additions & 5 deletions src/evergreen/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,20 +200,34 @@ def _call_api(
self._raise_for_status(response)
return response

def _stream_api(self, url: str, params: Dict = None) -> Iterable:
def _stream_api(
self,
url: str,
params: Optional[Dict] = None,
decode_unicode: bool = True,
chunk_size: Optional[int] = None,
is_binary: bool = False,
) -> Iterable:
"""
Make a streaming call to an api.
Make a streaming call based on if artifact is binary or nonbinary.
:param url: url to call
:param params: url parameters
:param decode_unicode: determines if we decode as unicode
:param chunk_size: the size of the chunks to be read
:param is_binary: is the data being streamed a binary object
:return: Iterable over the lines of the returned content.
"""
start_time = time()

with self.session.get(url=url, params=params, stream=True, timeout=self._timeout) as res:
self._log_api_call_time(res, start_time)

for line in res.iter_lines(decode_unicode=True):
yield line
if is_binary:
for line in res.iter_content(chunk_size=chunk_size, decode_unicode=decode_unicode):
yield line
else:
for line in res.iter_lines(decode_unicode=decode_unicode):
yield line

def _raise_for_status(self, response: requests.Response) -> None:
"""
Expand Down
29 changes: 27 additions & 2 deletions src/evergreen/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
["create_time", "dispatch_time", "finish_time", "ingest_time", "scheduled_time", "start_time"]
)

_BINARY_TYPES = ["application"]


class Artifact(_BaseEvergreenObject):
"""Representation of a task artifact from evergreen."""
Expand All @@ -34,18 +36,41 @@ class Artifact(_BaseEvergreenObject):
url = evg_attrib("url")
visibility = evg_attrib("visibility")
ignore_for_fetch = evg_attrib("ignore_for_fetch")
content_type = evg_attrib("content_type")

def __init__(self, json: Dict[str, Any], api: "EvergreenApi") -> None:
"""Create an instance of an evergreen task artifact."""
super(Artifact, self).__init__(json, api)

def stream(self) -> Iterable[str]:
def stream(
self,
decode_unicode: bool = True,
chunk_size: Optional[int] = None,
is_binary: Optional[bool] = None,
) -> Iterable[str]:
"""
Retrieve an iterator of the streamed contents of this artifact.
:param decode_unicode: determines if we decode as unicode
:param chunk_size: the size of the chunks to be read
:param is_binary: explicit variable, overrides information from content type
:return: Iterable to stream contents of artifact.
"""
return self._api._stream_api(self.url)
if is_binary is None:
is_binary = self._is_binary()

return self._api._stream_api(
self.url, decode_unicode=decode_unicode, chunk_size=chunk_size, is_binary=is_binary,
)

def _is_binary(self) -> bool:
"""Determine if an artifact is binary based on content_type."""
_type, subtype = self.content_type.split("/")

if _type in _BINARY_TYPES:
return True
else:
return False


class StatusScore(IntEnum):
Expand Down
12 changes: 12 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,18 @@ def get_sample_yaml(file):
return yaml.safe_load(file_data)


@pytest.fixture()
def sample_binary_artifact():
"""Return sample artifact that is a binary."""
return get_sample_json("binary_artifact.json")


@pytest.fixture()
def sample_nonbinary_artifact():
"""Return sample artifact that is a binary."""
return get_sample_json("nonbinary_artifact.json")


@pytest.fixture()
def sample_host():
"""Return sample host json."""
Expand Down
7 changes: 7 additions & 0 deletions tests/evergreen/data/binary_artifact.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"name": "mongodb-arm64.tar.gz",
"url": "https://mciuploads.s3.amazonaws.com/dsi/sys_perf_7e36a45d31a6d5f2c1edcd1a4f009a86bd209210/7e36a45d31a6d5f2c1edcd1a4f009a86bd209210/linux/mongodb-arm64-sys_perf_7e36a45d31a6d5f2c1edcd1a4f009a86bd209210.tar.gz",
"visibility": "",
"ignore_for_fetch": false,
"content_type": "application/x-gzip"
}
7 changes: 7 additions & 0 deletions tests/evergreen/data/nonbinary_artifact.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"name": "Documentation",
"url": "https://mciuploads.s3.amazonaws.com/dsi/compile-amazon-linux2-arm64/7e36a45d31a6d5f2c1edcd1a4f009a86bd209210/sys_perf_compile_amazon_linux2_arm64_compile_7e36a45d31a6d5f2c1edcd1a4f009a86bd209210_23_09_20_15_55_48/sys_perf_7e36a45d31a6d5f2c1edcd1a4f009a86bd209210/logs/compile-sys_perf_compile_amazon_linux2_arm64_7e36a45d31a6d5f2c1edcd1a4f009a86bd209210_23_09_20_15_55_48-index.html",
"visibility": "",
"ignore_for_fetch": false,
"content_type": "text/html"
}
64 changes: 64 additions & 0 deletions tests/evergreen/test_artifact_stream.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
"""Unit tests for Artifact class streams in src/evergreen/task.py"""
from unittest.mock import MagicMock

import pytest
from requests.models import Response

from evergreen.task import Artifact

RESPONSE_DATA = [b"data\nwith\nnew\nlines", b"second\nchunck\nof\ndata"]


@pytest.fixture
def mocked_res():
mock_res = MagicMock(spec=Response)
mock_res.request = MagicMock()
mock_res.__enter__.return_value = mock_res
mock_res.request.url = "url"
mock_res.iter_content.return_value = iter(RESPONSE_DATA)
mock_res.iter_lines.return_value = iter(RESPONSE_DATA)
return mock_res


class TestArtifactStream(object):
def test_binary_artifact_stream(self, sample_binary_artifact, mocked_api, mocked_res):
mocked_api.session.get = MagicMock(return_value=mocked_res)
artifact = Artifact(sample_binary_artifact, mocked_api)

stream_output = list(artifact.stream())
mocked_res.iter_content.assert_called_once()
mocked_res.iter_lines.assert_not_called()
assert stream_output == RESPONSE_DATA

def test_binary_artifact_stream_with_params(
self, sample_binary_artifact, mocked_api, mocked_res
):
mocked_api.session.get = MagicMock(return_value=mocked_res)
artifact = Artifact(sample_binary_artifact, mocked_api)

chunk_size = 2

stream_output = list(artifact.stream(decode_unicode=False, chunk_size=chunk_size))
mocked_res.iter_content.assert_called_once_with(decode_unicode=False, chunk_size=chunk_size)
mocked_res.iter_lines.assert_not_called()
assert stream_output == RESPONSE_DATA

def test_nonbinary_artifact_stream(self, sample_nonbinary_artifact, mocked_api, mocked_res):
mocked_api.session.get = MagicMock(return_value=mocked_res)
artifact = Artifact(sample_nonbinary_artifact, mocked_api)

stream_output = list(artifact.stream())
mocked_res.iter_lines.assert_called_once()
mocked_res.iter_content.assert_not_called()
assert stream_output == RESPONSE_DATA

def test_artifact_stream_override(self, sample_binary_artifact, mocked_api, mocked_res):
mocked_api.session.get = MagicMock(return_value=mocked_res)
artifact = Artifact(sample_binary_artifact, mocked_api)
artifact._is_binary = MagicMock()

stream_output = list(artifact.stream(is_binary=False))
artifact._is_binary.assert_not_called()
mocked_res.iter_lines.assert_called_once()
mocked_res.iter_content.assert_not_called()
assert stream_output == RESPONSE_DATA

0 comments on commit 31839e4

Please sign in to comment.