Skip to content

Commit

Permalink
Added map sorting to binary and text encoders.
Browse files Browse the repository at this point in the history
For the binary encoder, sorting is off by default.
For the text encoder, sorting is on by default.
Both defaults can be explicitly overridden.

This grows code size a bit. I think we could potentially
shave this (and other map-related code size) by having
the generated code inject a function pointer to the map-related
parsing/serialization code if maps are present.

    FILE SIZE        VM SIZE
 --------------  --------------
   +86% +1.07Ki   +71%    +768    upb/msg.c
    [NEW]    +391  [NEW]    +344    _upb_mapsorter_pushmap
    [NEW]    +158  [NEW]    +112    _upb_mapsorter_cmpstr
    [NEW]    +111  [NEW]     +64    _upb_mapsorter_cmpbool
    [NEW]    +110  [NEW]     +64    _upb_mapsorter_cmpi32
    [NEW]    +110  [NEW]     +64    _upb_mapsorter_cmpi64
    [NEW]    +110  [NEW]     +64    _upb_mapsorter_cmpu32
    [NEW]    +110  [NEW]     +64    _upb_mapsorter_cmpu64
    -3.6%      -8  -4.3%      -8    _upb_map_new
  +9.5%    +464  +9.2%    +424    upb/text_encode.c
    [NEW]    +656  [NEW]    +616    txtenc_mapentry
     +15%     +32   +20%     +32    upb_text_encode
   -20.1%    -224 -20.7%    -224    txtenc_msg
  +5.7%    +342  +5.3%    +296    upb/encode.c
    [NEW]    +344  [NEW]    +304    encode_mapentry
    [NEW]    +246  [NEW]    +208    upb_encode_ex
    [NEW]     +41  [NEW]     +16    upb_encode_ex.ch
    +0.7%      +8  +0.7%      +8    encode_scalar
    -1.0%     -32  -1.0%     -32    encode_message
    [DEL]     -38  [DEL]     -16    upb_encode.ch
    [DEL]    -227  [DEL]    -192    upb_encode
  +2.0%    +152  +2.2%    +152    upb/decode.c
     +44%    +128   +44%    +128    [section .rodata]
    +3.4%     +24  +3.4%     +24    _GLOBAL_OFFSET_TABLE_
  +0.6%    +107  +0.3%     +48    upb/def.c
    [NEW]    +100  [NEW]     +48    upb_fielddef_descriptortype
    +7.1%      +7  [ = ]       0    upb_fielddef_defaultint32
  +2.9%     +24  +2.9%     +24    [section .dynsym]
  +1.2%     +24  [ = ]       0    [section .symtab]
  +3.2%     +16  +3.2%     +16    [section .plt]
    [NEW]     +16  [NEW]     +16    memcmp@plt
  +0.5%     +16  +0.6%     +16    tests/conformance_upb.c
    +1.5%     +16  +1.6%     +16    DoTestIo
  +0.1%     +16  +0.1%     +16    upb/json_decode.c
    +0.4%     +16  +0.4%     +16    jsondec_wellknown
  +3.0%      +8  +3.0%      +8    [section .got.plt]
    +3.0%      +8  +3.0%      +8    _GLOBAL_OFFSET_TABLE_
  +1.6%      +7  +1.6%      +7    [section .dynstr]
  +1.8%      +4  +1.8%      +4    [section .hash]
  +0.5%      +3  +0.5%      +3    [LOAD protocolbuffers#2 [RX]]
  +2.8%      +2  +2.8%      +2    [section .gnu.version]
 -60.0% -1.74Ki  [ = ]       0    [Unmapped]
  +0.3%    +496  +1.4% +1.74Ki    TOTAL
  • Loading branch information
haberman committed Nov 26, 2020
1 parent 3e071ea commit a04627a
Show file tree
Hide file tree
Showing 14 changed files with 521 additions and 84 deletions.
2 changes: 1 addition & 1 deletion benchmarks/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def Benchmark(outbase, bench_cpu=True, runs=12, fasttable=False):


baseline = "master"
bench_cpu = True
bench_cpu = False
fasttable = False

if len(sys.argv) > 1:
Expand Down
63 changes: 63 additions & 0 deletions tests/bindings/lua/test_upb.lua
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,69 @@ function test_msg_map()
assert_equal(12, msg2.map_int32_int32[6])
end

function test_map_sorting()
function msg_with_int32_entries(start, expand)
local msg = test_messages_proto3.TestAllTypesProto3()
for i=start,start + 8 do
msg.map_int32_int32[i] = i * 2
end

if expand then
for i=start+20,200 do
msg.map_int32_int32[i] = i
end
for i=start+20,200 do
msg.map_int32_int32[i] = nil
end
end
return msg
end

function msg_with_msg_entries(expand)
local msg = test_messages_proto3.TestAllTypesProto3()
-- 8! = 40320 possible orderings makes it overwhelmingly likely that two
-- random orderings will be different.
for i=1,8 do
local submsg = test_messages_proto3.TestAllTypesProto3.NestedMessage()
submsg.corecursive = msg_with_int32_entries(i, expand)
msg.map_string_nested_message[tostring(i)] = submsg
end

expand = false
if expand then
for i=21,2000 do
local submsg = test_messages_proto3.TestAllTypesProto3.NestedMessage()
submsg.corecursive = msg_with_int32_entries(i, expand)
msg.map_string_nested_message[tostring(i)] = submsg
end
for i=21,2000 do
msg.map_string_nested_message[tostring(i)] = nil
end
end
return msg
end

-- Create two messages with the same contents but (hopefully) different
-- map table orderings.
local msg = msg_with_msg_entries(false)
local msg2 = msg_with_msg_entries(true)

local text1 = upb.text_encode(msg)
local text2 = upb.text_encode(msg2)
assert_equal(text1, text2)

local binary1 = upb.encode(msg, {upb.ENCODE_DETERMINISTIC})
local binary2 = upb.encode(msg2, {upb.ENCODE_DETERMINISTIC})
assert_equal(binary1, binary2)

-- Non-sorted map should compare different.
local text3 = upb.text_encode(msg, {upb.TXTENC_NOSORT})
assert_not_equal(text1, text3)

local binary3 = upb.encode(msg)
assert_not_equal(binary1, binary3)
end

function test_utf8()
local proto2_msg = test_messages_proto2.TestAllTypesProto2()
proto2_msg.optional_string = "\xff"
Expand Down
32 changes: 30 additions & 2 deletions upb/bindings/lua/def.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,12 @@ static void lupb_wrapper_pushwrapper(lua_State *L, int narg, const void *def,
/* lupb_msgdef_pushsubmsgdef()
*
* Pops the msgdef wrapper at the top of the stack and replaces it with a msgdef
* wrapper for field |f| of this msgdef.
* wrapper for field |f| of this msgdef (submsg may not be direct, for example it
* may be the submessage of the map value).
*/
void lupb_msgdef_pushsubmsgdef(lua_State *L, const upb_fielddef *f) {
const upb_msgdef *m = upb_fielddef_msgsubdef(f);
assert(m);
assert(upb_fielddef_containingtype(f) == lupb_msgdef_check(L, -1));
lupb_wrapper_pushwrapper(L, -1, m, LUPB_MSGDEF);
lua_replace(L, -2); /* Replace msgdef with submsgdef. */
}
Expand Down Expand Up @@ -337,6 +337,26 @@ static int lupb_msgdef_oneofcount(lua_State *L) {
return 1;
}

static bool lupb_msgdef_pushnested(lua_State *L, int msgdef, int name) {
const upb_msgdef *m = lupb_msgdef_check(L, msgdef);
lupb_wrapper_pushsymtab(L, msgdef);
upb_symtab *symtab = lupb_symtab_check(L, -1);
lua_pop(L, 1);

/* Construct full package.Message.SubMessage name. */
lua_pushstring(L, upb_msgdef_fullname(m));
lua_pushstring(L, ".");
lua_pushvalue(L, name);
lua_concat(L, 3);
const char *nested_name = lua_tostring(L, -1);

/* Try lookup. */
const upb_msgdef *nested = upb_symtab_lookupmsg(symtab, nested_name);
if (!nested) return false;
lupb_wrapper_pushwrapper(L, msgdef, nested, LUPB_MSGDEF);
return true;
}

/* lupb_msgdef_field()
*
* Handles:
Expand Down Expand Up @@ -430,6 +450,13 @@ static int lupb_msgdef_fullname(lua_State *L) {
return 1;
}

static int lupb_msgdef_index(lua_State *L) {
if (!lupb_msgdef_pushnested(L, 1, 2)) {
luaL_error(L, "No such nested message");
}
return 1;
}

static int lupb_msgoneofiter_next(lua_State *L) {
const upb_msgdef *m = lupb_msgdef_check(L, lua_upvalueindex(1));
int *index = lua_touserdata(L, lua_upvalueindex(2));
Expand Down Expand Up @@ -471,6 +498,7 @@ static int lupb_msgdef_tostring(lua_State *L) {

static const struct luaL_Reg lupb_msgdef_mm[] = {
{"__call", lupb_msg_pushnew},
{"__index", lupb_msgdef_index},
{"__len", lupb_msgdef_fieldcount},
{"__tostring", lupb_msgdef_tostring},
{NULL, NULL}
Expand Down
109 changes: 94 additions & 15 deletions upb/bindings/lua/msg.c
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,13 @@ static void lupb_arena_fuse(lua_State *L, int to, int from) {
upb_arena_fuse(to_arena, from_arena);
}

static void lupb_arena_fuseobjs(lua_State *L, int to, int from) {
lua_getiuservalue(L, to, LUPB_ARENA_INDEX);
lua_getiuservalue(L, from, LUPB_ARENA_INDEX);
lupb_arena_fuse(L, lua_absindex(L, -2), lua_absindex(L, -1));
lua_pop(L, 2);
}

static int lupb_arena_gc(lua_State *L) {
upb_arena *a = lupb_arena_check(L, 1);
upb_arena_free(a);
Expand Down Expand Up @@ -398,6 +405,10 @@ static int lupb_array_newindex(lua_State *L) {
upb_array_set(larray->arr, n, msgval);
}

if (larray->type == UPB_TYPE_MESSAGE) {
lupb_arena_fuseobjs(L, 1, 3);
}

return 0; /* 1 for chained assignments? */
}

Expand Down Expand Up @@ -535,6 +546,9 @@ static int lupb_map_newindex(lua_State *L) {
} else {
upb_msgval val = lupb_tomsgval(L, lmap->value_type, 3, 1, LUPB_COPY);
upb_map_set(map, key, val, lupb_arenaget(L, 1));
if (lmap->value_type == UPB_TYPE_MESSAGE) {
lupb_arena_fuseobjs(L, 1, 3);
}
}

return 0;
Expand Down Expand Up @@ -600,21 +614,26 @@ static upb_msg *lupb_msg_check(lua_State *L, int narg) {
return msg->msg;
}

static const upb_fielddef *lupb_msg_checkfield(lua_State *L, int msg,
int field) {
static const upb_msgdef *lupb_msg_getmsgdef(lua_State *L, int msg) {
lua_getiuservalue(L, msg, LUPB_MSGDEF_INDEX);
const upb_msgdef *m = lupb_msgdef_check(L, -1);
lua_pop(L, 1);
return m;
}

static const upb_fielddef *lupb_msg_tofield(lua_State *L, int msg, int field) {
size_t len;
const char *fieldname = luaL_checklstring(L, field, &len);
const upb_msgdef *m;
const upb_fielddef *f;
const upb_msgdef *m = lupb_msg_getmsgdef(L, msg);
return upb_msgdef_ntof(m, fieldname, len);
}

lua_getiuservalue(L, msg, LUPB_MSGDEF_INDEX);
m = lupb_msgdef_check(L, -1);
f = upb_msgdef_ntof(m, fieldname, len);
static const upb_fielddef *lupb_msg_checkfield(lua_State *L, int msg,
int field) {
const upb_fielddef *f = lupb_msg_tofield(L, msg, field);
if (f == NULL) {
luaL_error(L, "no such field '%s'", fieldname);
luaL_error(L, "no such field '%s'", lua_tostring(L, field));
}
lua_pop(L, 1);

return f;
}

Expand Down Expand Up @@ -813,10 +832,7 @@ static int lupb_msg_newindex(lua_State *L) {
}

if (merge_arenas) {
lua_getiuservalue(L, 1, LUPB_ARENA_INDEX);
lua_getiuservalue(L, 3, LUPB_ARENA_INDEX);
lupb_arena_fuse(L, lua_absindex(L, -2), lua_absindex(L, -1));
lua_pop(L, 2);
lupb_arena_fuseobjs(L, 1, 3);
}

upb_msg_set(msg, f, msgval, lupb_arenaget(L, 1));
Expand Down Expand Up @@ -907,24 +923,75 @@ static int lupb_decode(lua_State *L) {
return 1;
}

/**
* lupb_msg_textencode()
*
* Handles:
* text_string = upb.text_encode(msg, {upb.TXTENC_SINGLELINE})
*/
static int lupb_textencode(lua_State *L) {
int argcount = lua_gettop(L);
upb_msg *msg = lupb_msg_check(L, 1);
const upb_msgdef *m;
char buf[1024];
size_t size;
int options = 0;

lua_getiuservalue(L, 1, LUPB_MSGDEF_INDEX);
m = lupb_msgdef_check(L, -1);

if (argcount > 1) {
size_t len = lua_rawlen(L, 2);
for (size_t i = 1; i <= len; i++) {
lua_rawgeti(L, 2, i);
options |= lupb_checkuint32(L, -1);
lua_pop(L, 1);
}
}

size = upb_text_encode(msg, m, NULL, options, buf, sizeof(buf));

if (size < sizeof(buf)) {
lua_pushlstring(L, buf, size);
} else {
char *ptr = malloc(size + 1);
upb_text_encode(msg, m, NULL, options, ptr, size + 1);
lua_pushlstring(L, ptr, size);
free(ptr);
}

return 1;
}

/**
* lupb_encode()
*
* Handles:
* bin_string = upb.encode(msg)
*/
static int lupb_encode(lua_State *L) {
int argcount = lua_gettop(L);
const upb_msg *msg = lupb_msg_check(L, 1);
const upb_msglayout *layout;
upb_arena *arena = lupb_arena_pushnew(L);
size_t size;
char *result;
int options = 0;

if (argcount > 1) {
size_t len = lua_rawlen(L, 2);
for (size_t i = 1; i <= len; i++) {
lua_rawgeti(L, 2, i);
options |= lupb_checkuint32(L, -1);
lua_pop(L, 1);
}
}

lua_getiuservalue(L, 1, LUPB_MSGDEF_INDEX);
layout = upb_msgdef_layout(lupb_msgdef_check(L, -1));
lua_pop(L, 1);

result = upb_encode(msg, (const void*)layout, arena, &size);
result = upb_encode_ex(msg, (const void*)layout, options, arena, &size);

if (!result) {
lua_pushstring(L, "Error encoding protobuf.");
Expand All @@ -936,11 +1003,17 @@ static int lupb_encode(lua_State *L) {
return 1;
}

static void lupb_setfieldi(lua_State *L, const char *field, int i) {
lua_pushinteger(L, i);
lua_setfield(L, -2, field);
}

static const struct luaL_Reg lupb_msg_toplevel_m[] = {
{"Array", lupb_array_new},
{"Map", lupb_map_new},
{"decode", lupb_decode},
{"encode", lupb_encode},
{"text_encode", lupb_textencode},
{NULL, NULL}
};

Expand All @@ -952,5 +1025,11 @@ void lupb_msg_registertypes(lua_State *L) {
lupb_register_type(L, LUPB_MAP, NULL, lupb_map_mm);
lupb_register_type(L, LUPB_MSG, NULL, lupb_msg_mm);

lupb_setfieldi(L, "TXTENC_SINGLELINE", UPB_TXTENC_SINGLELINE);
lupb_setfieldi(L, "TXTENC_SKIPUNKNOWN", UPB_TXTENC_SKIPUNKNOWN);
lupb_setfieldi(L, "TXTENC_NOSORT", UPB_TXTENC_NOSORT);

lupb_setfieldi(L, "ENCODE_DETERMINISTIC", UPB_ENCODE_DETERMINISTIC);

lupb_cacheinit(L);
}
35 changes: 28 additions & 7 deletions upb/bindings/lua/upb.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,24 @@ int lua_getiuservalue(lua_State *L, int index, int n) {
}
#endif

/* We use this function as the __index metamethod when a type has both methods
* and an __index metamethod. */
int lupb_indexmm(lua_State *L) {
/* Look up in __index table (which is a closure param). */
lua_pushvalue(L, 2);
lua_rawget(L, lua_upvalueindex(1));
if (!lua_isnil(L, -1)) {
return 1;
}

/* Not found, chain to user __index metamethod. */
lua_pushvalue(L, lua_upvalueindex(2));
lua_pushvalue(L, 1);
lua_pushvalue(L, 2);
lua_call(L, 2, 1);
return 1;
}

void lupb_register_type(lua_State *L, const char *name, const luaL_Reg *m,
const luaL_Reg *mm) {
luaL_newmetatable(L, name);
Expand All @@ -93,14 +111,17 @@ void lupb_register_type(lua_State *L, const char *name, const luaL_Reg *m,
}

if (m) {
/* Methods go in the mt's __index method. This implies that you can'
* implement __index and also have methods. */
lua_getfield(L, -1, "__index");
lupb_assert(L, lua_isnil(L, -1));
lua_pop(L, 1);

lua_createtable(L, 0, 0);
lua_createtable(L, 0, 0); /* __index table */
lupb_setfuncs(L, m);

/* Methods go in the mt's __index slot. If the user also specified an
* __index metamethod, use our custom lupb_indexmm() that can check both. */
lua_getfield(L, -2, "__index");
if (lua_isnil(L, -1)) {
lua_pop(L, 1);
} else {
lua_pushcclosure(L, &lupb_indexmm, 2);
}
lua_setfield(L, -2, "__index");
}

Expand Down
2 changes: 2 additions & 0 deletions upb/decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ extern "C" {
#endif

enum {
/* If set, strings will alias the input buffer instead of copying into the
* arena. */
UPB_DECODE_ALIAS = 1,
};

Expand Down
Loading

0 comments on commit a04627a

Please sign in to comment.