Skip to content

PYTHON-5126 - Implemented new test cases for Binary Vector #2393

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions bson/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,11 @@ def as_vector(self) -> BinaryVector:
dtype = BinaryVectorDtype(dtype)
n_values = len(self) - position

if padding and dtype != BinaryVectorDtype.PACKED_BIT:
raise ValueError(
f"Corrupt data. Padding ({padding}) must be 0 for all but PACKED_BIT dtypes. ({dtype=})"
)

if dtype == BinaryVectorDtype.INT8:
dtype_format = "b"
format_string = f"<{n_values}{dtype_format}"
Expand All @@ -510,6 +515,10 @@ def as_vector(self) -> BinaryVector:

elif dtype == BinaryVectorDtype.PACKED_BIT:
# data packed as uint8
if padding and not n_values:
raise ValueError("Corrupt data. Vector has a padding P, but no data.")
if padding > 7 or padding < 0:
raise ValueError(f"Corrupt data. Padding ({padding}) must be between 0 and 7.")
dtype_format = "B"
format_string = f"<{n_values}{dtype_format}"
unpacked_uint8s = list(struct.unpack_from(format_string, self, position))
Expand Down
16 changes: 8 additions & 8 deletions test/bson_binary_vector/packed_bit.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,22 @@
"canonical_bson": "1600000005766563746F7200040000000910007F0700"
},
{
"description": "Empty Vector PACKED_BIT",
"description": "PACKED_BIT with padding",
"valid": true,
"vector": [],
"vector": [127, 8],
"dtype_hex": "0x10",
"dtype_alias": "PACKED_BIT",
"padding": 0,
"canonical_bson": "1400000005766563746F72000200000009100000"
"padding": 3,
"canonical_bson": "1600000005766563746F7200040000000910037F0800"
},
{
"description": "PACKED_BIT with padding",
"description": "Empty Vector PACKED_BIT",
"valid": true,
"vector": [127, 7],
"vector": [],
"dtype_hex": "0x10",
"dtype_alias": "PACKED_BIT",
"padding": 3,
"canonical_bson": "1600000005766563746F7200040000000910037F0700"
"padding": 0,
"canonical_bson": "1400000005766563746F72000200000009100000"
},
{
"description": "Overflow Vector PACKED_BIT",
Expand Down
30 changes: 21 additions & 9 deletions test/test_bson_binary_vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def create_test(case_spec):
def run_test(self):
for test_case in case_spec.get("tests", []):
description = test_case["description"]
vector_exp = test_case.get("vector", [])
vector_exp = test_case.get("vector")
dtype_hex_exp = test_case["dtype_hex"]
dtype_alias_exp = test_case.get("dtype_alias")
padding_exp = test_case.get("padding", 0)
Expand Down Expand Up @@ -85,14 +85,26 @@ def run_test(self):
self.assertEqual(cB_obs, canonical_bson_exp, description)

else:
with self.assertRaises((struct.error, ValueError), msg=description):
# Tests Binary.from_vector
Binary.from_vector(vector_exp, dtype_exp, padding_exp)
# Tests Binary.as_vector
cB_exp = binascii.unhexlify(canonical_bson_exp.encode("utf8"))
decoded_doc = decode(cB_exp)
binary_obs = decoded_doc[test_key]
binary_obs.as_vector()
"""
#### To prove correct in an invalid case (`valid:false`), one MUST
- (encoding case) if the vector field is present, raise an exception
when attempting to encode a document from the numeric values,dtype, and padding.
- (decoding case) if the canonical_bson field is present, raise an exception
when attempting to deserialize it into the corresponding
numeric values, as the field contains corrupted data.
"""
# Tests Binary.from_vector()
if vector_exp is not None:
with self.assertRaises((struct.error, ValueError), msg=description):
Binary.from_vector(vector_exp, dtype_exp, padding_exp)

# Tests Binary.as_vector()
if canonical_bson_exp is not None:
with self.assertRaises((struct.error, ValueError), msg=description):
cB_exp = binascii.unhexlify(canonical_bson_exp.encode("utf8"))
decoded_doc = decode(cB_exp)
binary_obs = decoded_doc[test_key]
binary_obs.as_vector()

return run_test

Expand Down
Loading