Skip to content

Commit

Permalink
Fix a race condition from 1.4.10 on item_remove
Browse files Browse the repository at this point in the history
Updates the slab mover for the new method.

1.4.10 lacks some crucial protection around item freeing and removal,
resulting in some potential crashes. Moving the cache_lock around item_remove
caused a 30% performance drop, so it's been reimplemented with GCC atomics.

refcount of 1 now means an item is linked but has no reference, which allows
us to test an atomic sub and fetch of 0 as a clear indicator of when to free
an item.
  • Loading branch information
dormando committed Jan 9, 2012
1 parent 324975c commit f4983b2
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 48 deletions.
44 changes: 18 additions & 26 deletions items.c
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_tim
rel_time_t oldest_live = settings.oldest_live;

search = tails[id];
if (search != NULL && search->refcount == 0) {
if (search != NULL && (__sync_add_and_fetch(&search->refcount, 1) == 2)) {
if ((search->exptime != 0 && search->exptime < current_time)
|| (search->time < oldest_live)) { // dead by flush
STATS_LOCK();
Expand All @@ -118,7 +118,6 @@ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_tim
itemstats[id].expired_unfetched++;
}
it = search;
it->refcount = 1;
slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal);
do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0));
/* Initialize the item block: */
Expand All @@ -143,15 +142,18 @@ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_tim
stats.evictions++;
STATS_UNLOCK();
it = search;
it->refcount = 1;
slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal);
do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0));
/* Initialize the item block: */
it->slabs_clsid = 0;
} else {
__sync_sub_and_fetch(&search->refcount, 1);
}
} else {
/* If the LRU is empty or locked, attempt to allocate memory */
it = slabs_alloc(ntotal, id);
if (search != NULL)
__sync_sub_and_fetch(&search->refcount, 1);
}

if (it == NULL) {
Expand All @@ -163,10 +165,10 @@ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_tim
* free it anyway.
*/
if (search != NULL &&
search->refcount != 0 &&
search->refcount != 2 &&
search->time + TAIL_REPAIR_TIME < current_time) {
itemstats[id].tailrepairs++;
search->refcount = 0;
search->refcount = 1;
do_item_unlink_nolock(search, hash(ITEM_key(search), search->nkey, 0));
}
pthread_mutex_unlock(&cache_lock);
Expand Down Expand Up @@ -206,7 +208,6 @@ void item_free(item *it) {
/* so slab size changer can tell later if item is already free or not */
clsid = it->slabs_clsid;
it->slabs_clsid = 0;
it->it_flags |= ITEM_SLABBED;
DEBUG_REFCNT(it, 'F');
slabs_free(it, ntotal, clsid);
}
Expand Down Expand Up @@ -272,6 +273,7 @@ static void item_unlink_q(item *it) {
int do_item_link(item *it, const uint32_t hv) {
MEMCACHED_ITEM_LINK(ITEM_key(it), it->nkey, it->nbytes);
assert((it->it_flags & (ITEM_LINKED|ITEM_SLABBED)) == 0);
mutex_lock(&cache_lock);
it->it_flags |= ITEM_LINKED;
it->time = current_time;

Expand All @@ -281,11 +283,11 @@ int do_item_link(item *it, const uint32_t hv) {
stats.total_items += 1;
STATS_UNLOCK();

mutex_lock(&cache_lock);
/* Allocate a new CAS ID on link. */
ITEM_set_cas(it, (settings.use_cas) ? get_cas_id() : 0);
assoc_insert(it, hv);
item_link_q(it);
__sync_add_and_fetch(&it->refcount, 1);
pthread_mutex_unlock(&cache_lock);

return 1;
Expand All @@ -302,7 +304,7 @@ void do_item_unlink(item *it, const uint32_t hv) {
STATS_UNLOCK();
assoc_delete(ITEM_key(it), it->nkey, hv);
item_unlink_q(it);
if (it->refcount == 0) item_free(it);
do_item_remove(it);
}
pthread_mutex_unlock(&cache_lock);
}
Expand All @@ -318,23 +320,17 @@ void do_item_unlink_nolock(item *it, const uint32_t hv) {
STATS_UNLOCK();
assoc_delete(ITEM_key(it), it->nkey, hv);
item_unlink_q(it);
if (it->refcount == 0) item_free(it);
do_item_remove(it);
}
}

void do_item_remove(item *it) {
MEMCACHED_ITEM_REMOVE(ITEM_key(it), it->nkey, it->nbytes);
assert((it->it_flags & ITEM_SLABBED) == 0);

mutex_lock(&cache_lock);
if (it->refcount != 0) {
it->refcount--;
DEBUG_REFCNT(it, '-');
}
if (it->refcount == 0 && (it->it_flags & ITEM_LINKED) == 0) {
if (__sync_sub_and_fetch(&it->refcount, 1) == 0) {
item_free(it);
}
pthread_mutex_unlock(&cache_lock);
}

void do_item_update(item *it) {
Expand Down Expand Up @@ -488,14 +484,14 @@ item *do_item_get(const char *key, const size_t nkey, const uint32_t hv) {
mutex_lock(&cache_lock);
item *it = assoc_find(key, nkey, hv);
if (it != NULL) {
it->refcount++;
__sync_add_and_fetch(&it->refcount, 1);
/* Optimization for slab reassignment. prevents popular items from
* jamming in busy wait. Can only do this here to satisfy lock order
* of item_lock, cache_lock, slabs_lock. */
if (slab_rebalance_signal &&
((void *)it >= slab_rebal.slab_start && (void *)it < slab_rebal.slab_end)) {
it->refcount--;
do_item_unlink_nolock(it, hv);
do_item_remove(it);
it = NULL;
}
}
Expand All @@ -514,19 +510,15 @@ item *do_item_get(const char *key, const size_t nkey, const uint32_t hv) {
if (it != NULL) {
if (settings.oldest_live != 0 && settings.oldest_live <= current_time &&
it->time <= settings.oldest_live) {
mutex_lock(&cache_lock);
it->refcount--;
do_item_unlink_nolock(it, hv);
pthread_mutex_unlock(&cache_lock);
do_item_unlink(it, hv);
do_item_remove(it);
it = NULL;
if (was_found) {
fprintf(stderr, " -nuked by flush");
}
} else if (it->exptime != 0 && it->exptime <= current_time) {
mutex_lock(&cache_lock);
it->refcount--;
do_item_unlink_nolock(it, hv);
pthread_mutex_unlock(&cache_lock);
do_item_unlink(it, hv);
do_item_remove(it);
it = NULL;
if (was_found) {
fprintf(stderr, " -nuked by expire");
Expand Down
75 changes: 53 additions & 22 deletions slabs.c
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,7 @@ static void do_slabs_free(void *ptr, const size_t size, unsigned int id) {
#endif

it = (item *)ptr;
it->it_flags |= ITEM_SLABBED;
it->prev = 0;
it->next = p->slots;
if (it->next) it->next->prev = it;
Expand Down Expand Up @@ -521,6 +522,10 @@ static int slab_rebalance_start(void) {
return 0;
}

enum move_status {
MOVE_PASS=0, MOVE_DONE, MOVE_BUSY
};

/* refcount == 0 is safe since nobody can incr while cache_lock is held.
* refcount != 0 is impossible since flags/etc can be modified in other
* threads. instead, note we found a busy one and bail. logic in do_item_get
Expand All @@ -530,6 +535,8 @@ static int slab_rebalance_move(void) {
slabclass_t *s_cls;
int x;
int was_busy = 0;
int refcount = 0;
enum move_status status = MOVE_PASS;

pthread_mutex_lock(&cache_lock);
pthread_mutex_lock(&slabs_lock);
Expand All @@ -538,30 +545,54 @@ static int slab_rebalance_move(void) {

for (x = 0; x < slab_bulk_check; x++) {
item *it = slab_rebal.slab_pos;
if (it->refcount == 0) {
if (it->it_flags & ITEM_SLABBED) {
/* remove from freelist linked list */
if (s_cls->slots == it) {
s_cls->slots = it->next;
status = MOVE_PASS;
if (it->slabs_clsid != 255) {
refcount = __sync_add_and_fetch(&it->refcount, 1);
if (refcount == 1) { /* item is unlinked, unused */
if (it->it_flags & ITEM_SLABBED) {
/* remove from slab freelist */
if (s_cls->slots == it) {
s_cls->slots = it->next;
}
if (it->next) it->next->prev = it->prev;
if (it->prev) it->prev->next = it->next;
s_cls->sl_curr--;
status = MOVE_DONE;
} else {
status = MOVE_BUSY;
}
if (it->next) it->next->prev = it->prev;
if (it->prev) it->prev->next = it->next;
s_cls->sl_curr--;
} else if (it->it_flags != 0) {
it->refcount = 1;
/* Call unlink with refcount == 1 so it won't free */
do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0));
it->refcount = 0;
}
it->it_flags = 0;
it->slabs_clsid = 0;
} else {
if (settings.verbose > 2) {
fprintf(stderr, "Slab reassign hit a busy item: refcount: %d (%d -> %d)\n",
it->refcount, slab_rebal.s_clsid, slab_rebal.d_clsid);
} else if (refcount == 2) { /* item is linked but not busy */
if ((it->it_flags & ITEM_LINKED) != 0) {
do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0));
status = MOVE_DONE;
} else {
/* refcount == 1 + !ITEM_LINKED means the item is being
* uploaded to, or was just unlinked but hasn't been freed
* yet. Let it bleed off on its own and try again later */
status = MOVE_BUSY;
}
} else {
if (settings.verbose > 2) {
fprintf(stderr, "Slab reassign hit a busy item: refcount: %d (%d -> %d)\n",
it->refcount, slab_rebal.s_clsid, slab_rebal.d_clsid);
}
status = MOVE_BUSY;
}
slab_rebal.busy_items++;
was_busy++;
}

switch (status) {
case MOVE_DONE:
it->refcount = 0;
it->it_flags = 0;
it->slabs_clsid = 255;
break;
case MOVE_BUSY:
slab_rebal.busy_items++;
was_busy++;
__sync_sub_and_fetch(&it->refcount, 1);
break;
case MOVE_PASS:
break;
}

slab_rebal.slab_pos = (char *)slab_rebal.slab_pos + s_cls->size;
Expand Down

0 comments on commit f4983b2

Please sign in to comment.