Trade: serialization for MeshData.

At first I attempted to make the whole thing reinterpret_cast-able from a blob of memory (i.e., truly zero-overhead), but while that sounded cool and all, it moved the overhead to basically all other code -- each function had to special-case access to attribute/vertex/index data as the pointers were no longer pointers, the binary representation had various weird unexplainable gaps ("here an array deleter is stored, set that to null and don't ask"), release*() functions got more complicated and when I got to issues with move construction/assignment I knew this was not the right path. Now the MeshData internals are packed to a much more compact representation (with the first attempt it was 128 bytes, now it's just 64) and the serialization doesn't make everything else slower, more complex or harder to test, which is a win.
mosra · Apr 17, 2020 · d6cce9b · d6cce9b
1 parent 9649979
commit d6cce9b
Show file tree

Hide file tree

Showing 16 changed files with 557 additions and 1 deletion.
diff --git a/src/Magnum/Trade/MeshData.cpp b/src/Magnum/Trade/MeshData.cpp
@@ -795,6 +795,156 @@ Containers::Array<char> MeshData::releaseVertexData() {
     return out;
 }
 
+namespace {
+    struct MeshDataHeader: DataChunkHeader {
+        UnsignedInt indexCount;
+        UnsignedInt vertexCount;
+        MeshPrimitive primitive;
+        MeshIndexType indexType;
+        Byte:8;
+        UnsignedShort attributeCount;
+        std::size_t indexOffset;
+        std::size_t indexDataSize;
+        std::size_t vertexDataSize;
+    };
+
+    static_assert(sizeof(MeshDataHeader) == (sizeof(void*) == 4 ? 48 : 64),
+        "MeshDataHeader has unexpected size");
+}
+
+Containers::Optional<MeshData> MeshData::deserialize(Containers::ArrayView<const void> data) {
+    /* Validate the header. If that fails, the error has been already printed,
+       so just propagate */
+    const DataChunkHeader* chunk = dataChunkHeaderDeserialize(data);
+    if(!chunk) return Containers::NullOpt;
+
+    /* Basic header validity */
+    if(chunk->type != DataChunkType::Mesh) {
+        Error{} << "Trade::MeshData::deserialize(): expected data chunk type" << DataChunkType::Mesh << "but got" << chunk->type;
+        return Containers::NullOpt;
+    }
+    if(chunk->typeVersion != 0) {
+        Error{} << "Trade::MeshData::deserialize(): invalid chunk type version, expected 0 but got" << chunk->typeVersion;
+        return Containers::NullOpt;
+    }
+    if(chunk->size < sizeof(MeshDataHeader)) {
+        Error{} << "Trade::MeshData::deserialize(): expected at least a" << sizeof(MeshDataHeader) << Debug::nospace << "-byte chunk for a header but got" << chunk->size;
+        return Containers::NullOpt;
+    }
+
+    /* Reinterpret as a mesh data and check that everything can fit */
+    const MeshDataHeader& header = static_cast<const MeshDataHeader&>(*chunk);
+    const std::size_t size = sizeof(MeshDataHeader) + header.attributeCount*sizeof(MeshAttributeData) + header.indexDataSize + header.vertexDataSize;
+    if(chunk->size != size) {
+        Error{} << "Trade::MeshData::deserialize(): expected a" << size << Debug::nospace << "-byte chunk but got" << chunk->size;
+        return Containers::NullOpt;
+    }
+
+    Containers::ArrayView<const MeshAttributeData> attributeData{reinterpret_cast<const MeshAttributeData*>(reinterpret_cast<const char*>(data.data()) + sizeof(MeshDataHeader)), header.attributeCount};
+    Containers::ArrayView<const char> vertexData{reinterpret_cast<const char*>(data.data()) + sizeof(MeshDataHeader) + header.attributeCount*sizeof(MeshAttributeData) + header.indexDataSize, header.vertexDataSize};
+
+    /* Check bounds of indices and all attributes */
+    /** @todo this will assert on invalid index type */
+    Containers::ArrayView<const char> indexData;
+    MeshIndexData indices;
+    if(header.indexType != MeshIndexType{}) {
+        const std::size_t indexEnd = header.indexOffset + header.indexCount*meshIndexTypeSize(header.indexType);
+        if(indexEnd > header.indexDataSize) {
+            Error{} << "Trade::MeshData::deserialize(): indices [" <<  Debug::nospace << header.indexOffset << Debug::nospace << ":" << Debug::nospace << indexEnd << Debug::nospace << "] out of range for" << header.indexDataSize << "bytes of index data";
+            return Containers::NullOpt;
+        }
+
+        indexData = Containers::ArrayView<const char>{reinterpret_cast<const char*>(data.data()) + sizeof(MeshDataHeader) + header.attributeCount*sizeof(MeshAttributeData), header.indexDataSize};
+        indices = MeshIndexData{header.indexType, indexData.suffix(header.indexOffset)};
+    }
+    for(std::size_t i = 0; i != attributeData.size(); ++i) {
+        const MeshAttributeData& attribute = attributeData[i];
+
+        /** @todo this will assert on invalid vertex format */
+        /** @todo check also consistency of vertex count and _isOffsetOnly? */
+        /* Check that the view fits into the provided vertex data array. For
+           implementation-specific formats we don't know the size so use 0 to
+           check at least partially. */
+        const UnsignedInt typeSize =
+            isVertexFormatImplementationSpecific(attribute._format) ? 0 :
+            vertexFormatSize(attribute._format);
+        const std::size_t attributeEnd = attribute._data.offset + (header.vertexCount - 1)*attribute._stride + typeSize;
+        if(header.vertexCount && attributeEnd > header.vertexDataSize) {
+            Error{} << "Trade::MeshData::deserialize(): attribute" << i << "[" << Debug::nospace << attribute._data.offset << Debug::nospace << ":" << Debug::nospace << attributeEnd << Debug::nospace << "] out of range for" << header.vertexDataSize << "bytes of vertex data";
+            return Containers::NullOpt;
+        }
+    }
+
+    return MeshData{header.primitive,
+        {}, indexData, indices,
+        {}, vertexData, meshAttributeDataNonOwningArray(attributeData),
+        header.vertexCount};
+}
+
+std::size_t MeshData::serializedSize() const {
+    return sizeof(MeshDataHeader) + sizeof(MeshAttributeData)*_attributes.size() +
+        _indexData.size() + _vertexData.size();
+}
+
+std::size_t MeshData::serializeInto(Containers::ArrayView<char> out) const {
+    #ifndef CORRADE_NO_DEBUG
+    const std::size_t size = serializedSize();
+    CORRADE_ASSERT(out.size() == size, "Trade::MeshData::serializeInto(): data too small, expected at least" << size << "bytes but got" << out.size(), {});
+    #endif
+
+    /* Serialize the header */
+    dataChunkHeaderSerializeInto(out, DataChunkType::Mesh, 0);
+
+    /* Memset the header to avoid padding getting random values */
+    std::memset(out.data() + sizeof(DataChunkHeader), 0, sizeof(MeshDataHeader) + _attributes.size()*sizeof(MeshAttributeData) - sizeof(DataChunkHeader));
+
+    MeshDataHeader& header = *reinterpret_cast<MeshDataHeader*>(out.data());
+    header.indexCount = _indexCount;
+    header.vertexCount = _vertexCount;
+    header.primitive = _primitive;
+    header.indexType = _indexType;
+    header.attributeCount = _attributes.size();
+    header.indexOffset = _indices - _indexData.data();
+    header.indexDataSize = _indexData.size();
+    header.vertexDataSize = _vertexData.size();
+
+    std::size_t offset = sizeof(MeshDataHeader);
+
+    /* Copy the attribute data, turning them into offset-only */
+    auto outAttributeData = Containers::arrayCast<MeshAttributeData>(out.slice(offset, offset + sizeof(MeshAttributeData)*_attributes.size()));
+    for(std::size_t i = 0; i != outAttributeData.size(); ++i) {
+        if(_attributes[i]._isOffsetOnly)
+            outAttributeData[i]._data.offset = _attributes[i]._data.offset;
+        else
+            outAttributeData[i]._data.offset = reinterpret_cast<const char*>(_attributes[i]._data.pointer) - _vertexData;
+        outAttributeData[i]._vertexCount = _attributes[i]._vertexCount;
+        outAttributeData[i]._format = _attributes[i]._format;
+        outAttributeData[i]._stride = _attributes[i]._stride;
+        outAttributeData[i]._name = _attributes[i]._name;
+        outAttributeData[i]._arraySize = _attributes[i]._arraySize;
+        outAttributeData[i]._isOffsetOnly = true;
+    }
+    offset += sizeof(MeshAttributeData)*_attributes.size();
+
+    /* Copy the index data */
+    Utility::copy(_indexData, out.slice(offset, offset + _indexData.size()));
+    offset += _indexData.size();
+
+    /* Copy the vertex data */
+    Utility::copy(_vertexData, out.slice(offset, offset + _vertexData.size()));
+    offset += _vertexData.size();
+
+    /* Check we calculated correctly, return number of bytes written */
+    CORRADE_INTERNAL_ASSERT(offset == size);
+    return offset;
+}
+
+Containers::Array<char> MeshData::serialize() const {
+    Containers::Array<char> out{Containers::NoInit, serializedSize()};
+    serializeInto(out);
+    return out;
+}
+
 Debug& operator<<(Debug& debug, const MeshAttribute value) {
     debug << "Trade::MeshAttribute" << Debug::nospace;
 

diff --git a/src/Magnum/Trade/MeshData.h b/src/Magnum/Trade/MeshData.h
@@ -31,6 +31,7 @@
  */
 
 #include <Corrade/Containers/Array.h>
+#include <Corrade/Containers/Optional.h>
 #include <Corrade/Containers/StridedArrayView.h>
 
 #include "Magnum/Mesh.h"
@@ -709,6 +710,53 @@ you can also supply implementation-specific values that are not available in
 the generic @ref MeshPrimitive enum, similarly see also
 @ref Trade-MeshAttributeData-custom-vertex-format for details on
 implementation-specific @ref VertexFormat values.
+
+@section Trade-MeshData-serialization Memory-mappable serialization format
+
+Using @ref serialize(), an instance of this class can be serialized into a
+binary format, and deserialized back using @ref deserialize(). The
+deserialization only involves various sanity checks followed by a creation of a
+new @ref MeshData instance referencing the index, vertex and attribute data.
+It thus makes it possible to operate for example directly on a memory-mapped
+file. The binary representation begins with @ref DataChunkHeader of type
+@ref DataChunkType::Mesh and type version @cpp 0 @ce. The rest is defined like
+below, depending on bitness and endianness defined by the header signature.
+Fields that are stored in an endian-dependent way are marked with
+@m_class{m-label m-primary} **E**:
+
+@m_class{m-fullwidth}
+
+Byte offset | Byte size | Contents
+----------- | --------- | -----------------------------------------------------
+20 or 24 | 4 @m_class{m-label m-primary} **E** | Index count, or @cpp 0 @ce if the mesh has no indices
+24 or 28 | 4 @m_class{m-label m-primary} **E** | Vertex count, or @cpp 0 @ce if the mesh has no vertices
+28 or 32 | 4 @m_class{m-label m-primary} **E** | Mesh primitive, defined with @ref MeshPrimitive
+32 or 36    | 1         | Index type, defined with @ref MeshIndexType, or zero if the mesh is not indexed
+33 or 37    | 1         | @m_class{m-text m-dim} *Padding / reserved*
+34 or 38 | 2 @m_class{m-label m-primary} **E** | Attribute count
+36 or 40 | 4 or 8 @m_class{m-label m-primary} **E** | Index offset in the index data array
+40 or 44 | 4 or 8 @m_class{m-label m-primary} **E** | Index data size in bytes
+44 or 56 | 4 or 8 @m_class{m-label m-primary} **E** | Vertex data size in bytes
+48 or 64 | ... @m_class{m-label m-primary} **E** | List of @ref MeshAttributeData entries, count defined by attribute count above
+... | ... @m_class{m-label m-primary} **E** | Index data, byte count defined by index data size above
+... | ... @m_class{m-label m-primary} **E** | Vertex data, byte count defined by vertex data size above
+
+For the attribute list, each @ref MeshAttributeData entry is either 20 or 24
+bytes, with fields defined like this. In this case it exactly matches the
+internals of @ref MeshAttributeData to allow the attribute array to be
+referenced directly from the original memory:
+
+Byte offset | Byte size | Contents
+----------- | --------- | -----------------------------------------------------
+0 | 4 @m_class{m-label m-primary} **E** | Vertex format, defined with @ref VertexFormat
+4 | 2 @m_class{m-label m-primary} **E** | Mesh attribute name, defined with @ref MeshAttribute
+6           | 1         | Whether the attribute is offset-only. Always @cpp 1 @ce.
+7           | 1         | @m_class{m-text m-dim} *Padding / reserved*
+8 | 4 @m_class{m-label m-primary} **E** | Vertex count. Same value as the vertex count field above.
+12 | 2 @m_class{m-label m-primary} **E** | Vertex stride. Always positive and not larger than @cpp 32767 @ce.
+14 | 2 @m_class{m-label m-primary} **E** | Attribute array size
+16 | 4 or 8 @m_class{m-label m-primary} **E** | Attribute offset in the vertex data array
+
 @see @ref AbstractImporter::mesh()
 */
 class MAGNUM_TRADE_EXPORT MeshData {
@@ -721,6 +769,30 @@ class MAGNUM_TRADE_EXPORT MeshData {
             ImplicitVertexCount = ~UnsignedInt{}
         };
 
+        /**
+         * @brief Try to deserialize from a memory-mappable representation
+         *
+         * If @p data is a valid serialized representation of @ref MeshData
+         * matching current platform, returns a @ref MeshData instance
+         * referencing the original data. On failure prints an error message
+         * and returns @ref Containers::NullOpt.
+         *
+         * The returned instance doesn't provide mutable access to the original
+         * data, pass a non-const view to the overload below to get that.
+         * @see @ref serialize()
+         */
+        static Containers::Optional<MeshData> deserialize(Containers::ArrayView<const void> data);
+
+        /** @overload */
+        template<class T, class = typename std::enable_if<std::is_convertible<T&&, Containers::ArrayView<void>>::value>::type> static Containers::Optional<MeshData> deserialize(T&& data) {
+            Containers::Optional<MeshData> out = deserialize(Containers::ArrayView<const void>{data});
+            if(out) {
+                out->_indexDataFlags = DataFlag::Mutable;
+                out->_vertexDataFlags = DataFlag::Mutable;
+            }
+            return out;
+        }
+
         /**
          * @brief Construct an indexed mesh data
          * @param primitive     Primitive
@@ -1775,6 +1847,30 @@ class MAGNUM_TRADE_EXPORT MeshData {
          */
         const void* importerState() const { return _importerState; }
 
+        /**
+         * @brief Size of serialized data
+         *
+         * Amount of bytes written by @ref serializeInto() or @ref serialize().
+         */
+        std::size_t serializedSize() const;
+
+        /**
+         * @brief Serialize to a memory-mappable representation
+         *
+         * @see @ref serializeInto(), @ref deserialize()
+         */
+        Containers::Array<char> serialize() const;
+
+        /**
+         * @brief Serialize to a memory-mappable representation into an existing array
+         * @param[out] out      Where to write the output
+         * @return  Number of bytes written. Same as @ref serializedSize().
+         *
+         * Expects that @p data is at least @ref serializedSize().
+         * @see @ref serialize(), @ref deserialize()
+         */
+        std::size_t serializeInto(Containers::ArrayView<char> out) const;
+
     private:
         /* For custom deleter checks. Not done in the constructors here because
            the restriction is pointless when used outside of plugin

diff --git a/src/Magnum/Trade/Test/CMakeLists.txt b/src/Magnum/Trade/Test/CMakeLists.txt
@@ -52,7 +52,24 @@ corrade_add_test(TradeDataTest DataTest.cpp LIBRARIES MagnumTradeTestLib)
 corrade_add_test(TradeImageDataTest ImageDataTest.cpp LIBRARIES MagnumTradeTestLib)
 corrade_add_test(TradeLightDataTest LightDataTest.cpp LIBRARIES MagnumTrade)
 corrade_add_test(TradeMaterialDataTest MaterialDataTest.cpp LIBRARIES MagnumTradeTestLib)
-corrade_add_test(TradeMeshDataTest MeshDataTest.cpp LIBRARIES MagnumTradeTestLib)
+
+corrade_add_test(TradeMeshDataTest MeshDataTest.cpp
+    LIBRARIES MagnumTradeTestLib
+    FILES
+        mesh-be32.blob
+        mesh-be64.blob
+        mesh-le32.blob
+        mesh-le64.blob
+        mesh-empty-be32.blob
+        mesh-empty-be64.blob
+        mesh-empty-le32.blob
+        mesh-empty-le64.blob
+        mesh-nonindexed-be32.blob
+        mesh-nonindexed-be64.blob
+        mesh-nonindexed-le32.blob
+        mesh-nonindexed-le64.blob)
+target_include_directories(TradeMeshDataTest PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
+
 corrade_add_test(TradeObjectData2DTest ObjectData2DTest.cpp LIBRARIES MagnumTradeTestLib)
 corrade_add_test(TradeObjectData3DTest ObjectData3DTest.cpp LIBRARIES MagnumTradeTestLib)
 corrade_add_test(TradeSceneDataTest SceneDataTest.cpp LIBRARIES MagnumTrade)