diff --git a/bson/binary.py b/bson/binary.py index 6698e55ccc..a1f63adf27 100644 --- a/bson/binary.py +++ b/bson/binary.py @@ -490,6 +490,11 @@ def as_vector(self) -> BinaryVector: dtype = BinaryVectorDtype(dtype) n_values = len(self) - position + if padding and dtype != BinaryVectorDtype.PACKED_BIT: + raise ValueError( + f"Corrupt data. Padding ({padding}) must be 0 for all but PACKED_BIT dtypes. ({dtype=})" + ) + if dtype == BinaryVectorDtype.INT8: dtype_format = "b" format_string = f"<{n_values}{dtype_format}" @@ -510,6 +515,10 @@ def as_vector(self) -> BinaryVector: elif dtype == BinaryVectorDtype.PACKED_BIT: # data packed as uint8 + if padding and not n_values: + raise ValueError("Corrupt data. Vector has a padding P, but no data.") + if padding > 7 or padding < 0: + raise ValueError(f"Corrupt data. Padding ({padding}) must be between 0 and 7.") dtype_format = "B" format_string = f"<{n_values}{dtype_format}" unpacked_uint8s = list(struct.unpack_from(format_string, self, position)) diff --git a/test/bson_binary_vector/packed_bit.json b/test/bson_binary_vector/packed_bit.json index a220e7e318..7cc272e38b 100644 --- a/test/bson_binary_vector/packed_bit.json +++ b/test/bson_binary_vector/packed_bit.json @@ -21,22 +21,22 @@ "canonical_bson": "1600000005766563746F7200040000000910007F0700" }, { - "description": "Empty Vector PACKED_BIT", + "description": "PACKED_BIT with padding", "valid": true, - "vector": [], + "vector": [127, 8], "dtype_hex": "0x10", "dtype_alias": "PACKED_BIT", - "padding": 0, - "canonical_bson": "1400000005766563746F72000200000009100000" + "padding": 3, + "canonical_bson": "1600000005766563746F7200040000000910037F0800" }, { - "description": "PACKED_BIT with padding", + "description": "Empty Vector PACKED_BIT", "valid": true, - "vector": [127, 7], + "vector": [], "dtype_hex": "0x10", "dtype_alias": "PACKED_BIT", - "padding": 3, - "canonical_bson": "1600000005766563746F7200040000000910037F0700" + "padding": 0, + "canonical_bson": "1400000005766563746F72000200000009100000" }, { "description": "Overflow Vector PACKED_BIT", diff --git a/test/test_bson_binary_vector.py b/test/test_bson_binary_vector.py index 9bfdcbfb9a..ba3eff8bb2 100644 --- a/test/test_bson_binary_vector.py +++ b/test/test_bson_binary_vector.py @@ -48,7 +48,7 @@ def create_test(case_spec): def run_test(self): for test_case in case_spec.get("tests", []): description = test_case["description"] - vector_exp = test_case.get("vector", []) + vector_exp = test_case.get("vector") dtype_hex_exp = test_case["dtype_hex"] dtype_alias_exp = test_case.get("dtype_alias") padding_exp = test_case.get("padding", 0) @@ -85,14 +85,26 @@ def run_test(self): self.assertEqual(cB_obs, canonical_bson_exp, description) else: - with self.assertRaises((struct.error, ValueError), msg=description): - # Tests Binary.from_vector - Binary.from_vector(vector_exp, dtype_exp, padding_exp) - # Tests Binary.as_vector - cB_exp = binascii.unhexlify(canonical_bson_exp.encode("utf8")) - decoded_doc = decode(cB_exp) - binary_obs = decoded_doc[test_key] - binary_obs.as_vector() + """ + #### To prove correct in an invalid case (`valid:false`), one MUST + - (encoding case) if the vector field is present, raise an exception + when attempting to encode a document from the numeric values,dtype, and padding. + - (decoding case) if the canonical_bson field is present, raise an exception + when attempting to deserialize it into the corresponding + numeric values, as the field contains corrupted data. + """ + # Tests Binary.from_vector() + if vector_exp is not None: + with self.assertRaises((struct.error, ValueError), msg=description): + Binary.from_vector(vector_exp, dtype_exp, padding_exp) + + # Tests Binary.as_vector() + if canonical_bson_exp is not None: + with self.assertRaises((struct.error, ValueError), msg=description): + cB_exp = binascii.unhexlify(canonical_bson_exp.encode("utf8")) + decoded_doc = decode(cB_exp) + binary_obs = decoded_doc[test_key] + binary_obs.as_vector() return run_test