Skip to content

Commit

Permalink
Add copy api
Browse files Browse the repository at this point in the history
  • Loading branch information
kostya committed Oct 24, 2022
1 parent 682d82c commit 2559dcc
Show file tree
Hide file tree
Showing 5 changed files with 220 additions and 0 deletions.
1 change: 1 addition & 0 deletions bench/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ crystal build load_file_serializable.cr --release -o bin_load_file_ser --no-debu
crystal build pack.cr --release -o bin_pack --no-debug
crystal build unpack.cr --release -o bin_unpack --no-debug
crystal build pack_unpack.cr --release -o bin_pack_unpack --no-debug
crystal build copy.cr --release -o bin_copy --no-debug
72 changes: 72 additions & 0 deletions bench/copy.cr
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
require "../src/msgpack"

def copy1(io1, io2)
obj = MessagePack::IOUnpacker.new(io1).read
obj.to_msgpack(io2)
end

def copy2(io1, io2)
MessagePack::Copy.new(io1, io2).copy_object
end

def test_obj_copy(copy_name, test_name, n, type, obj)
io1 = IO::Memory.new
obj.to_msgpack(io1)
io1.rewind

io2 = IO::Memory.new

t = Time.local
n.times do
io1.rewind
io2.clear

yield io1, io2
end

t2 = Time.local

io2.rewind
res = type.from_msgpack(io2)
puts "#{copy_name}[#{test_name}]: #{t2 - t}"

io1.rewind
io2.rewind
unless io1.to_slice == io2.to_slice
puts "Warning, bad copy"
end
end

def test_obj(test_name, n, type, obj)
test_obj_copy("copy_dumb", test_name, n, type, obj) { |io1, io2| copy1(io1, io2) }
test_obj_copy("copy_fast", test_name, n, type, obj) { |io1, io2| copy2(io1, io2) }
end

def bytes(size : Int32) : Bytes
Bytes.new(size) { |i| (i % 256).to_u8 }
end

def byte(value : Int32) : Bytes
Bytes.new(1) { (value % 256).to_u8 }
end

t = Time.local

test_obj("small string", 1000000, String, "a" * 200)
test_obj("small binary", 1000000, Bytes, bytes(200))
test_obj("big string", 10000, String, "a" * 200000)
test_obj("big binary", 10000, Bytes, bytes(200000))
test_obj("hash string string", 10000, Hash(String, String), (0..1000).reduce({} of String => String) { |h, i| h["key#{i}"] = "value#{i}"; h })
test_obj("hash string binary", 10000, Hash(String, Bytes), (0..1000).reduce({} of String => Bytes) { |h, i| h["key#{i}"] = byte(i); h })
test_obj("hash string float64", 10000, Hash(String, Float64), (0..1000).reduce({} of String => Float64) { |h, i| h["key#{i}"] = i / 10.0.to_f64; h })
test_obj("array of strings", 10000, Array(String), Array.new(1000) { |i| "data#{i}" })
test_obj("array of binaries", 10000, Array(Bytes), Array.new(1000) { |i| byte(i) })
test_obj("array of floats", 20000, Array(Float64), Array.new(3000) { |i| i / 10.0 })

ints = [1, -1, 0x21, -0x21, 128, -128, -0x8000, 0x8000, 0xFFFF, -0xFFFF, -0x80000000, 0x80000000, -9223372036854775808, 9223372036854775807, 4294967295, -4294967295]
test_obj("array of mix int sizes", 2000, Array(Int64), Array.new(30000) { |i| ints[i % ints.size] })

data = [Array.new(30) { |i| i }, Array.new(30) { |i| i.to_s }, (0..30).reduce({} of Int32 => String) { |h, i| h[i] = i.to_s; h }, 1, "1"]
test_obj("array of mix of data", 200, Array(Array(Int32) | Array(String) | Hash(Int32, String) | Int32 | String), Array.new(10000) { |i| data[i % data.size] })

puts "Summary time: #{Time.local - t}"
2 changes: 2 additions & 0 deletions bench/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ echo == Crystal Pack
./bin_pack
echo == Crystal Unpack
./bin_unpack
echo == Crystal Copy
./bin_copy

echo == Ruby Generate File
ruby generate_file.rb
Expand Down
30 changes: 30 additions & 0 deletions spec/copy_spec.cr
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
require "./spec_helper"

def copy(x)
io1 = IO::Memory.new
x.to_msgpack(io1)
io1.rewind

io2 = IO::Memory.new
c = MessagePack::Copy.new(io1, io2)
c.copy_object

io1.to_slice.should eq io2.to_slice
io2.rewind
y = typeof(x).from_msgpack(io2)

x.should eq y
end

describe "MessagePack::Packer" do
it { copy(1) }
it { copy([1, 2, 3]) }
it { copy({1 => 2, "bla" => "hah"}) }
it { copy({"a": "jopa", "b": {1, 3, 4.6}, "c": ["a", 2, [3], {1 => 2}]}) }
it { copy([1000000000] * 1000) }
it { copy([[1], [[3]]]) }
it { copy("asdf" * 1000) }
it { copy(Int64[1, -1, 0x21, -0x21, 128, -128, -0x8000, 0x8000, 0xFFFF, -0xFFFF, -0x80000000, 0x80000000, -9223372036854775808, 9223372036854775807, 4294967295, -4294967295]) }
it { copy("⬠ ⬡ ⬢ ⬣ ⬤ ⬥ ⬦") }
it { copy(Bytes[1, 2, 3]) }
end
115 changes: 115 additions & 0 deletions src/message_pack/copy.cr
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
require "./lexer"

module MessagePack
# Fast copy msgpack objects from IO to IO, without full parse, without create temp objects
# this is usefull for streaming apis
# MessagePack::Copy.new(io1, io2).copy_objects(1)
# This is from 40% to 700% faster than `MessagePack::IOUnpacker.new(io1).read.to_msgpack(io2)`
struct Copy
getter io_dst, io_src

def initialize(@io_src : IO, @io_dst : IO)
end

def copy_objects(n = 1)
n.times { copy_object }
end

def copy_object
cb = next_byte
@io_dst.write_byte(cb)
copy_token(cb)
1
end

protected def copy_token(current_byte : UInt8)
case current_byte
when 0xA0..0xBF
copy(current_byte - 0xA0)
when 0x80..0x8F
copy_objects((current_byte - 0x80) * 2)
when 0x90..0x9F
copy_objects(current_byte - 0x90)
when 0xC4, 0xD9
size = read(UInt8)
write_bytes(size)
copy(size)
when 0xC5, 0xDA
size = read(UInt16)
write_bytes(size)
copy(size)
when 0xC6, 0xDB
size = read(UInt32)
write_bytes(size)
copy(size)
when 0xC7
size = read(UInt8)
write_bytes(size)
copy(size + 1)
when 0xC8
size = read(UInt16)
write_bytes(size)
copy(size + 1)
when 0xC9
size = read(UInt32)
write_bytes(size)
copy(size + 1)
when 0xCC, 0xD0
copy_static(1)
when 0xCD, 0xD1
copy_static(2)
when 0xCE, 0xD2, 0xCA
copy_static(4)
when 0xCF, 0xD3, 0xCB
copy_static(8)
when 0xD4..0xD8
size = 1 << (current_byte - 0xD4) # 1, 2, 4, 8, 16
copy(size + 1)
when 0xDC
size = read UInt16
write_bytes(size)
copy_objects(size)
when 0xDD
size = read UInt32
write_bytes(size)
copy_objects(size)
when 0xDE
size = read UInt16
write_bytes(size)
copy_objects(size * 2)
when 0xDF
size = read UInt32
write_bytes(size)
copy_objects(size * 2)
# else
# just one byte copy
# one bytes: 0xC0, 0xC2, 0xC3, 0xE0..0xFF, 0x00..0x7F
# 0xC1 invalid symbol copied also, buy this is doesnot matter
end
end

protected def next_byte : UInt8
byte = @io_src.read_byte
raise EofError.new(0) unless byte
byte
end

def write_bytes(v)
@io_dst.write_bytes(v, IO::ByteFormat::BigEndian)
end

def copy(size)
IO.copy(@io_src, @io_dst, size)
end

protected def read(type : T.class) forall T
@io_src.read_bytes(T, IO::ByteFormat::BigEndian)
end

macro copy_static(size)
%buffer = uninitialized UInt8[{{size}}]
@io_src.read(%buffer.to_slice)
@io_dst.write(%buffer.to_slice)
end
end
end

0 comments on commit 2559dcc

Please sign in to comment.