Skip to content

Commit

Permalink
AVRO-2921: Type Fixes for avro.io (apache#1264)
Browse files Browse the repository at this point in the history
  • Loading branch information
kojiromike authored Jun 23, 2021
1 parent 5bb0320 commit 6c4e165
Show file tree
Hide file tree
Showing 8 changed files with 438 additions and 342 deletions.
22 changes: 12 additions & 10 deletions lang/py/avro/datafile.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@
import io
import json
from types import TracebackType
from typing import BinaryIO, MutableMapping, Optional, Type
from typing import BinaryIO, MutableMapping, Optional, Type, cast

import avro.codecs
import avro.errors
import avro.io
import avro.schema
import avro.utils
from avro.utils import TypedDict, randbytes

VERSION = 1
MAGIC = bytes(b"Obj" + bytearray([VERSION]))
Expand Down Expand Up @@ -60,6 +60,12 @@
SCHEMA_KEY = "avro.schema"


class HeaderType(TypedDict):
magic: bytes
meta: MutableMapping[str, bytes]
sync: bytes


class _DataFileMetadata:
"""
Mixin for meta properties.
Expand Down Expand Up @@ -183,7 +189,7 @@ def __init__(
writer.seek(0, 2)
self._header_written = True
return
self.sync_marker = avro.utils.randbytes(16)
self.sync_marker = randbytes(16)
self.codec = codec
self.schema = str(writers_schema)
self.datum_writer.writers_schema = writers_schema
Expand Down Expand Up @@ -358,16 +364,10 @@ def _read_header(self) -> None:
self.reader.seek(0, 0)

# read header into a dict
header = self.datum_reader.read_data(META_SCHEMA, META_SCHEMA, self.raw_decoder)

# check magic number
header = cast(HeaderType, self.datum_reader.read_data(META_SCHEMA, META_SCHEMA, self.raw_decoder))
if header.get("magic") != MAGIC:
raise avro.errors.AvroException(f"Not an Avro data file: {header.get('magic')!r} doesn't match {MAGIC!r}.")

# set metadata
self._meta = header["meta"]

# set sync marker
self.sync_marker = header["sync"]

def _read_block_header(self) -> None:
Expand All @@ -393,6 +393,8 @@ def __next__(self) -> object:
raise StopIteration
self._read_block_header()

if self.datum_decoder is None:
raise avro.errors.DataFileException("DataFile is not ready to read because it has no decoder")
datum = self.datum_reader.read(self.datum_decoder)
self.block_count -= 1
return datum
Expand Down
10 changes: 9 additions & 1 deletion lang/py/avro/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,11 @@ def __init__(self, *args):
expected_schema, datum = args[:2]
except (IndexError, ValueError):
return super().__init__(*args)
return super().__init__(f"The datum {datum} is not an example of the schema {_safe_pretty(expected_schema)}")
return super().__init__(f"The datum {datum} of the type {type(datum)} is not an example of the schema {_safe_pretty(expected_schema)}")


class InvalidDefaultException(AvroTypeException):
"""Raised when a default value isn't a suitable type for the schema."""


class AvroOutOfScaleException(AvroTypeException):
Expand All @@ -81,6 +85,10 @@ class DataFileException(AvroException):
"""Raised when there's a problem reading or writing file object containers."""


class IONotReadyException(AvroException):
"""Raised when attempting an avro operation on an io object that isn't fully initialized."""


class AvroRemoteException(AvroException):
"""Raised when an error message is sent by an Avro requestor or responder."""

Expand Down
Loading

0 comments on commit 6c4e165

Please sign in to comment.