Skip to content

Commit

Permalink
Restructure tables for C89 port and smaller size.
Browse files Browse the repository at this point in the history
Changes the data layout of tables slightly so that string
keys are prefixed with their size, rather than the size
being inline in the table itself.

This has a few benefits:

1. inttables shrink a bit, because there is no longer a wasted
   and unused size field sitting in them.

2. This avoids the need to have a union in the table.  This is
   important for an impending C89 port of upb, since C89 has
   literally no way of statically initializing a non-first union
   member.
  • Loading branch information
haberman committed May 17, 2015
1 parent 0c7eb66 commit e2840a4
Show file tree
Hide file tree
Showing 7 changed files with 250 additions and 192 deletions.
2 changes: 2 additions & 0 deletions tests/test_table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ void test_strtable(const vector<std::string>& keys, uint32_t num_to_insert) {
upb_strtable_next(&iter)) {
const char *key = upb_strtable_iter_key(&iter);
std::string tmp(key, strlen(key));
std::string tmp2(key, upb_strtable_iter_keylength(&iter));
ASSERT(tmp == tmp2);
std::set<std::string>::iterator i = all.find(tmp);
ASSERT(i != all.end());
all.erase(i);
Expand Down
22 changes: 21 additions & 1 deletion tools/dump_cinit.lua
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,20 @@ function handler_types(base)
return ret
end

function octchar(num)
assert(num < 8)
local idx = num + 1 -- 1-based index
return string.sub("01234567", idx, idx)
end

function c_escape(num)
assert(num < 256)
return string.format("\\%s%s%s",
octchar(math.floor(num / 64)),
octchar(math.floor(num / 8) % 8),
octchar(num % 8));
end

-- const(f, label) -> UPB_LABEL_REPEATED, where f:label() == upb.LABEL_REPEATED
function const(obj, name, base)
local val = obj[name]
Expand Down Expand Up @@ -214,7 +228,13 @@ function Dumper:tabkey(key)
if type(key) == "nil" then
return "UPB_TABKEY_NONE"
elseif type(key) == "string" then
return string.format('UPB_TABKEY_STR("%s")', key)
local len = #key
local len1 = c_escape(len % 256)
local len2 = c_escape(math.floor(len / 256) % 256)
local len3 = c_escape(math.floor(len / (256 * 256)) % 256)
local len4 = c_escape(math.floor(len / (256 * 256 * 256)) % 256)
return string.format('UPB_TABKEY_STR("%s", "%s", "%s", "%s", "%s")',
len1, len2, len3, len4, key)
else
return string.format("UPB_TABKEY_NUM(%d)", key)
end
Expand Down
3 changes: 3 additions & 0 deletions travis.sh
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ coverage_after_success() {
set -e
set -x

$CC --version
$CXX --version

if [ "$1" == "after_failure" ]; then
# Upload failing tree to S3.
curl -sL https://raw.githubusercontent.com/travis-ci/artifacts/master/install | bash
Expand Down
6 changes: 4 additions & 2 deletions upb/bindings/lua/upb/table.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,11 @@ static void lupbtable_pushent(lua_State *L, const upb_tabent *e,
lua_newtable(L);
if (!upb_tabent_isempty(e)) {
if (inttab) {
lua_pushnumber(L, e->key.num);
lua_pushnumber(L, e->key);
} else {
lua_pushlstring(L, e->key.s.str, e->key.s.length);
uint32_t len;
const char *str = upb_tabstr(e->key, &len);
lua_pushlstring(L, str, len);
}
lua_setfield(L, -2, "key");
lupbtable_pushval(L, e->val, ctype);
Expand Down
264 changes: 132 additions & 132 deletions upb/descriptor/descriptor.upb.c

Large diffs are not rendered by default.

76 changes: 48 additions & 28 deletions upb/table.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,20 +54,24 @@ char *upb_strdup2(const char *s, size_t len) {
}

// A type to represent the lookup key of either a strtable or an inttable.
typedef struct {
upb_tabkey key;
typedef union {
uintptr_t num;
struct {
const char *str;
size_t len;
} str;
} lookupkey_t;

static lookupkey_t strkey2(const char *str, size_t len) {
lookupkey_t k;
k.key.s.str = (char*)str;
k.key.s.length = len;
k.str.str = str;
k.str.len = len;
return k;
}

static lookupkey_t intkey(uintptr_t key) {
lookupkey_t k;
k.key = upb_intkey(key);
k.num = key;
return k;
}

Expand Down Expand Up @@ -142,9 +146,11 @@ static bool lookup(const upb_table *t, lookupkey_t key, upb_value *v,
}

// The given key must not already exist in the table.
static void insert(upb_table *t, lookupkey_t key, upb_value val,
uint32_t hash, hashfunc_t *hashfunc, eqlfunc_t *eql) {
static void insert(upb_table *t, lookupkey_t key, upb_tabkey tabkey,
upb_value val, uint32_t hash,
hashfunc_t *hashfunc, eqlfunc_t *eql) {
UPB_UNUSED(eql);
UPB_UNUSED(key);
assert(findentry(t, key, hash, eql) == NULL);
assert(val.ctype == t->ctype);
t->count++;
Expand Down Expand Up @@ -178,7 +184,7 @@ static void insert(upb_table *t, lookupkey_t key, upb_value val,
our_e->next = NULL;
}
}
our_e->key = key.key;
our_e->key = tabkey;
our_e->val = val.val;
assert(findentry(t, key, hash, eql) == our_e);
}
Expand All @@ -197,10 +203,10 @@ static bool rm(upb_table *t, lookupkey_t key, upb_value *val,
upb_tabent *move = (upb_tabent*)chain->next;
*chain = *move;
if (removed) *removed = move->key;
move->key.num = 0; // Make the slot empty.
move->key = 0; // Make the slot empty.
} else {
if (removed) *removed = chain->key;
chain->key.num = 0; // Make the slot empty.
chain->key = 0; // Make the slot empty.
}
return true;
} else {
Expand All @@ -214,7 +220,7 @@ static bool rm(upb_table *t, lookupkey_t key, upb_value *val,
}
upb_tabent *rm = (upb_tabent*)chain->next;
if (removed) *removed = rm->key;
rm->key.num = 0;
rm->key = 0;
chain->next = rm->next;
t->count--;
return true;
Expand Down Expand Up @@ -242,13 +248,24 @@ static size_t begin(const upb_table *t) {

// A simple "subclass" of upb_table that only adds a hash function for strings.

static upb_tabkey strcopy(lookupkey_t k2) {
char *str = malloc(k2.str.len + sizeof(uint32_t) + 1);
if (str == NULL) return 0;
memcpy(str, &k2.str.len, sizeof(uint32_t));
memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len + 1);
return (uintptr_t)str;
}

static uint32_t strhash(upb_tabkey key) {
return MurmurHash2(key.s.str, key.s.length, 0);
uint32_t len;
char *str = upb_tabstr(key, &len);
return MurmurHash2(str, len, 0);
}

static bool streql(upb_tabkey k1, lookupkey_t k2) {
return k1.s.length == k2.key.s.length &&
memcmp(k1.s.str, k2.key.s.str, k1.s.length) == 0;
uint32_t len;
char *str = upb_tabstr(k1, &len);
return len == k2.str.len && memcmp(str, k2.str.str, len) == 0;
}

bool upb_strtable_init(upb_strtable *t, upb_ctype_t ctype) {
Expand All @@ -257,7 +274,7 @@ bool upb_strtable_init(upb_strtable *t, upb_ctype_t ctype) {

void upb_strtable_uninit(upb_strtable *t) {
for (size_t i = 0; i < upb_table_size(&t->t); i++)
free((void*)t->t.entries[i].key.s.str);
free((void*)t->t.entries[i].key);
uninit(&t->t);
}

Expand Down Expand Up @@ -287,11 +304,12 @@ bool upb_strtable_insert2(upb_strtable *t, const char *k, size_t len,
return false;
}
}
if ((k = upb_strdup2(k, len)) == NULL) return false;

lookupkey_t key = strkey2(k, len);
uint32_t hash = MurmurHash2(key.key.s.str, key.key.s.length, 0);
insert(&t->t, key, v, hash, &strhash, &streql);
upb_tabkey tabkey = strcopy(key);
if (tabkey == 0) return false;

uint32_t hash = MurmurHash2(key.str.str, key.str.len, 0);
insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
return true;
}

Expand All @@ -306,7 +324,7 @@ bool upb_strtable_remove2(upb_strtable *t, const char *key, size_t len,
uint32_t hash = MurmurHash2(key, strlen(key), 0);
upb_tabkey tabkey;
if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) {
free((void*)tabkey.s.str);
free((void*)tabkey);
return true;
} else {
return false;
Expand Down Expand Up @@ -335,12 +353,14 @@ bool upb_strtable_done(const upb_strtable_iter *i) {

const char *upb_strtable_iter_key(upb_strtable_iter *i) {
assert(!upb_strtable_done(i));
return str_tabent(i)->key.s.str;
return upb_tabstr(str_tabent(i)->key, NULL);
}

size_t upb_strtable_iter_keylength(upb_strtable_iter *i) {
assert(!upb_strtable_done(i));
return str_tabent(i)->key.s.length;
uint32_t len;
upb_tabstr(str_tabent(i)->key, &len);
return len;
}

upb_value upb_strtable_iter_value(const upb_strtable_iter *i) {
Expand All @@ -365,10 +385,10 @@ bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
// For inttables we use a hybrid structure where small keys are kept in an
// array and large keys are put in the hash table.

static uint32_t inthash(upb_tabkey key) { return upb_inthash(key.num); }
static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); }

static bool inteql(upb_tabkey k1, lookupkey_t k2) {
return k1.num == k2.key.num;
return k1 == k2.num;
}

static _upb_value *mutable_array(upb_inttable *t) {
Expand Down Expand Up @@ -452,16 +472,16 @@ bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) {
const upb_tabent *e = &t->t.entries[i];
upb_value v;
_upb_value_setval(&v, e->val, t->t.ctype);
uint32_t hash = upb_inthash(e->key.num);
insert(&new_table, intkey(e->key.num), v, hash, &inthash, &inteql);
uint32_t hash = upb_inthash(e->key);
insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql);
}

assert(t->t.count == new_table.count);

uninit(&t->t);
t->t = new_table;
}
insert(&t->t, intkey(key), val, upb_inthash(key), &inthash, &inteql);
insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql);
}
check(t);
return true;
Expand Down Expand Up @@ -629,7 +649,7 @@ bool upb_inttable_done(const upb_inttable_iter *i) {

uintptr_t upb_inttable_iter_key(const upb_inttable_iter *i) {
assert(!upb_inttable_done(i));
return i->array_part ? i->index : int_tabent(i)->key.num;
return i->array_part ? i->index : int_tabent(i)->key;
}

upb_value upb_inttable_iter_value(const upb_inttable_iter *i) {
Expand Down
69 changes: 40 additions & 29 deletions upb/table.int.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,33 +152,46 @@ FUNCS(fptr, fptr, upb_func*, UPB_CTYPE_FPTR);

/* upb_table ******************************************************************/

typedef union {
uintptr_t num;
struct {
// We own this. NULL-terminated but may also contain binary data; see
// explicit length below.
// TODO: move the length to the start of the string in order to reduce
// tabkey's size (to one machine word) in a way that supports static
// initialization.
const char *str;
size_t length;
} s;
} upb_tabkey;

#define UPB_TABKEY_NUM(n) {n}
#ifdef UPB_C99
// Given that |s| is a string literal, sizeof(s) gives us a
// compile-time-constant strlen(). We must ensure that this works for static
// data initializers.
#define UPB_TABKEY_STR(strval) { .s = { .str = strval, \
.length = sizeof(strval) - 1 } }
#endif
// TODO(haberman): C++
#define UPB_TABKEY_NONE {0}
#define UPB_TABKEY_NUM(n) n
#define UPB_TABKEY_NONE 0
// The preprocessor isn't quite powerful enough to turn the compile-time string
// length into a byte-wise string representation, so code generation needs to
// help it along.
//
// "len1" is the low byte and len4 is the high byte. For big endian we'll need
// to define a version of this that flips it around.
#define UPB_TABKEY_STR(len1, len2, len3, len4, strval) \
(uintptr_t)(len1 len2 len3 len4 strval)

// Either:
// 1. an actual integer key, or
// 2. a pointer to a string prefixed by its uint32_t length, owned by us.
//
// ...depending on whether this is a string table or an int table. We would
// make this a union of those two types, but C89 doesn't support statically
// initializing a non-first union member.
typedef uintptr_t upb_tabkey;

// Ideally we could use a structure like this instead of the memcpy() calls:
//
// typedef struct {
// uint32_t len;
// char data[1]; // Allocate to correct length.
// } upb_tabstr;
//
// But unfortuantely in C89 there is no way to statically initialize such a
// thing. So instead of memcpy() the length in and out of the string.

UPB_INLINE char *upb_tabstr(upb_tabkey key, uint32_t *len) {
char* mem = (char*)key;
if (len) memcpy(len, mem, sizeof(*len));
return mem + sizeof(*len);
}

typedef struct _upb_tabent {
upb_tabkey key;
_upb_value val;

// Internal chaining. This is const so we can create static initializers for
// tables. We cast away const sometimes, but *only* when the containing
// upb_table is known to be non-const. This requires a bit of care, but
Expand Down Expand Up @@ -235,16 +248,14 @@ UPB_INLINE size_t upb_table_size(const upb_table *t) {

// Internal-only functions, in .h file only out of necessity.
UPB_INLINE bool upb_tabent_isempty(const upb_tabent *e) {
return e->key.num == 0;
return e->key == 0;
}

// Used by some of the unit tests for generic hashing functionality.
uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed);

UPB_INLINE upb_tabkey upb_intkey(uintptr_t key) {
upb_tabkey k;
k.num = key;
return k;
UPB_INLINE uintptr_t upb_intkey(uintptr_t key) {
return key;
}

UPB_INLINE uint32_t upb_inthash(uintptr_t key) {
Expand Down Expand Up @@ -350,7 +361,7 @@ UPB_INLINE bool upb_inttable_lookup32(const upb_inttable *t, uint32_t key,
const upb_tabent *e;
if (t->t.entries == NULL) return false;
for (e = upb_getentry(&t->t, upb_inthash(key)); true; e = e->next) {
if ((uint32_t)e->key.num == key) {
if ((uint32_t)e->key == key) {
_upb_value_setval(v, e->val, t->t.ctype);
return true;
}
Expand Down

0 comments on commit e2840a4

Please sign in to comment.