Skip to content

Commit

Permalink
apacheGH-2738: Performance improvements to CacheSimple
Browse files Browse the repository at this point in the history
Slight performance improvements to org.apache.jena.atlas.lib.cache.CacheSimple:
- optimized bit-operations by using a cache size, that is always a power of 2
- removed already BiConsumer<K, V> dropHandler), which is used nowhere
- added more tests for CacheSimple
- fixed a small bug, where putting a key with the same hash as an existing one does not overridde the existing entry, when the values are equal.
- added new module jena-benchmarks-shadedJena51 to be able to perform regression tests againt the previous release
- added JHM test org.apache.jena.atlas.lib.cache.TestCaches in jena-benchmarks-jmh
- added more detailed javadoc comments to Cache interface

Downside: The fixed cache size must always be a power of 2. If the given size is already a power of two it will be used as fixed size for the cache, otherwise the next larger power of two will be used. (e. g. minimumSize = 10 results in 16 as fixed size for the cache)
  • Loading branch information
arne-bdt authored and afs committed Oct 1, 2024
1 parent 1a3e43f commit aff37bc
Show file tree
Hide file tree
Showing 8 changed files with 730 additions and 107 deletions.
38 changes: 32 additions & 6 deletions jena-base/src/main/java/org/apache/jena/atlas/lib/Cache.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

/**
* An abstraction of a cache for basic use.
* This cache does not support null as keys or values.
* <p>
* For more complex configuration of the
* cache, use the cache builder of the implementation of choice.
Expand All @@ -34,14 +35,23 @@
*/
public interface Cache<Key, Value>
{
/** Does the cache contain the key? */
/**
* Does the cache contain the key?
* @param key The key to find. The key must not be null.
* @return True, if the cache contains the key, otherwise false.
*/
public boolean containsKey(Key key) ;

/** Get from cache - or return null. */
/**
* Get from cache - or return null.
* @param key The key for which the value is requested. The key must not be null.
* @return If the cache contains an entry for the given key, the value is returned, otherwise null.
*/
public Value getIfPresent(Key key) ;

/** Get from cache; if not present, call the {@link Callable}
* to try to fill the cache. This operation should be atomic.
* The 'key' and 'callcable' must not be null.
* @deprecated Use {@link #get(Object, Function)}
*/
@Deprecated(forRemoval = true)
Expand All @@ -55,15 +65,31 @@ public default Value getOrFill(Key key, Callable<Value> callable) {
});
}

/** Get from cache; if not present, call the {@link Function}
* to fill the cache slot. This operation should be atomic.
/**
* Get from cache; if not present, call the {@link Function}
* to fill the cache slot. This operation should be atomic.
* @param key The key, for which the value should be returned or calculated. The key must not be null.
* @param callable If the cache does not contain the key, the callable is called to calculate a value.
* If the callable returns null, the key is not associated with hat value,
* as nulls are not accepted as values.
* The callable must not be null.
* @return Returns either the existing value or the calculated value.
* If callable is called and returns null, then null is returned.
*/
public Value get(Key key, Function<Key, Value> callable) ;

/** Insert into the cache */
/**
* Insert into the cache
* @param key The key for the 'thing' to store. The key must not be null.
* @param thing If 'thing' is null, it will not be used as value,
* instead any existing entry with the same key will be removed.
*/
public void put(Key key, Value thing) ;

/** Remove from cache - return true if key referenced an entry */
/**
* Remove from cache - return true if key referenced an entry
* @param key The key, which shall be removed along with its value. The key must not be null.
*/
public void remove(Key key) ;

/** Iterate over all keys. Iterating over the keys requires the caller be thread-safe. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@

package org.apache.jena.atlas.lib ;

import java.util.function.BiConsumer;
import org.apache.jena.atlas.lib.cache.*;

import org.apache.jena.atlas.lib.cache.* ;
import java.util.function.BiConsumer;

public class CacheFactory {
/**
Expand Down Expand Up @@ -80,7 +80,7 @@ public static <Key, Value> Cache<Key, Value> createNullCache() {
* This cache is not thread-safe.
*/
public static <Key, Value> Cache<Key, Value> createSimpleCache(int size) {
return new CacheSimple<>(size, null) ;
return new CacheSimple<>(size) ;
}

/** One slot cache */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,14 @@
import java.util.Arrays;
import java.util.Iterator;
import java.util.Objects;
import java.util.concurrent.Callable;
import java.util.function.BiConsumer;
import java.util.function.Function;

import org.apache.jena.atlas.AtlasException;
import org.apache.jena.atlas.iterator.Iter;
import org.apache.jena.atlas.lib.Cache;

/**
* A simple fixed size cache that uses the hash code to address a slot.
* The size is always a power of two, to be able to use optimized bit-operations.
* The clash policy is to overwrite.
* <p>
* The cache has very low overhead - there is no object creation during lookup or insert.
Expand All @@ -42,24 +40,30 @@
public class CacheSimple<K, V> implements Cache<K, V> {
private final V[] values;
private final K[] keys;
private final int size;
private final int sizeMinusOne;
private int currentSize = 0;
private BiConsumer<K, V> dropHandler = null;

public CacheSimple(int size) {
this(size, null);
}
/**
* Constructs a fixes size cache.
* The size is always a power of two, to be able to use optimized bit-operations.
* @param miniumSize If the size is already a power of two it will be used as fixed size for the cache,
* otherwise the next larger power of two will be used.
* (e.g. minimumSize = 10 results in 16 as fixed size for the cache)
*/
public CacheSimple(int miniumSize) {
var size = Integer.highestOneBit(miniumSize);
if (size < miniumSize){
size <<= 1;
}
this.sizeMinusOne = size-1;

public CacheSimple(int size, BiConsumer<K, V> dropHandler) {
@SuppressWarnings("unchecked")
V[] x = (V[])new Object[size];
values = x;

@SuppressWarnings("unchecked")
K[] z = (K[])new Object[size];
keys = z;
this.dropHandler = dropHandler;
this.size = size;
}

@Override
Expand All @@ -73,129 +77,95 @@ public void clear() {
@Override
public boolean containsKey(K key) {
Objects.requireNonNull(key);
return index(key) >= 0 ;
return key.equals(keys[calcIndex(key)]);
}

// Return key index (>=0): return -(index+1) if the key slot is empty.
private final int index(K key) {
int x = (key.hashCode() & 0x7fffffff) % size;
if ( key.equals(keys[x]) )
return x;
return -x - 1;
}

// Convert to a slot index.
private final int decode(int x) {
if ( x >= 0 )
return x;
return -(x+1);
private int calcIndex(K key) {
return key.hashCode() & sizeMinusOne;
}

@Override
public V getIfPresent(K key) {
Objects.requireNonNull(key);
int x = index(key);
if ( x < 0 )
return null;
return values[x];
final int idx = calcIndex(key);
if (key.equals(keys[idx])) {
return values[idx];
}
return null;
}

@Override
public V get(K key, Function<K, V> function) {
return getOrFillNoSync(this, key, function);
}

/**
* Implementation of getOrFill based on Cache.get and Cache.put
* This function is not thread safe.
*/
public static <K,V> V getOrFillNoSync(Cache<K,V> cache, K key, Function<K,V> function) {
V value = cache.getIfPresent(key) ;
if ( value == null ) {
try { value = function.apply(key) ; }
catch (RuntimeException ex) { throw ex; }
catch (Exception e) {
throw new AtlasException("Exception on cache fill", e) ;
}
if ( value != null )
cache.put(key, value) ;
}
return value ;
}

/**
* Implementation of getOrFill based on Cache.get and Cache.put
* This function is not thread safe.
*/
public static <K,V> V getOrFillNoSync(Cache<K,V> cache, K key, Callable<V> callable) {
V value = cache.getIfPresent(key) ;
if ( value == null ) {
try { value = callable.call() ; }
catch (RuntimeException ex) { throw ex; }
catch (Exception e) {
throw new AtlasException("Exception on cache fill", e) ;
Objects.requireNonNull(key);
Objects.requireNonNull(function);
final int idx = calcIndex(key);
final boolean isExistingKeyNotNull = keys[idx] != null;
if(isExistingKeyNotNull && keys[idx].equals(key)) {
return values[idx];
} else {
final var value = function.apply(key);
if(value != null) {
values[idx] = value;
if(!isExistingKeyNotNull) {
currentSize++;
}
keys[idx] = key;
}
if ( value != null )
cache.put(key, value) ;
return value;
}
return value ;
}


@Override
public void put(K key, V thing) {
// thing may be null.
int x = index(key);
x = decode(x);
V old = values[x];
// Drop the old K->V
if ( old != null ) {
if ( old.equals(thing) )
// Replace like-with-like.
return;
if ( dropHandler != null )
dropHandler.accept(keys[x], old);
currentSize--;
//keys[x] = null;
//values[x] = null;
Objects.requireNonNull(key);
final int idx = calcIndex(key);
if(thing == null) { //null value causes removal of entry
if (keys[idx] != null) {
keys[idx] = null;
values[idx] = null;
currentSize--;
}
return;
}

// Already decremented if we are overwriting a full slot.
values[x] = thing;
if ( thing == null ) {
// put(,null) is a remove.
keys[x] = null;
} else {
currentSize++;
keys[x] = key;
if(!thing.equals(values[idx])) {
values[idx] = thing;
}
if(!key.equals(keys[idx])) {
if(keys[idx] == null) { //add value
currentSize++;
}
keys[idx] = key;
}
}

@Override
public void remove(K key) {
put(key, null);
Objects.requireNonNull(key);
final int idx = calcIndex(key);
if (key.equals(keys[idx])) {
keys[idx] = null;
values[idx] = null;
currentSize--;
}
}

@Override
public long size() {
return currentSize;
// long x = 0;
// for ( int i = 0 ; i < size ; i++ ) {
// K key = keys[i];
// if ( key != null )
// x++;
// }
// return x;
}

@Override
public Iterator<K> keys() {
Iterator<K> iter = asList(keys).iterator();
return Iter.removeNulls(iter);
return Iter.iter(asList(keys)).filter(Objects::nonNull);
}

@Override
public boolean isEmpty() {
return currentSize == 0;
}

int getAllocatedSize() {
return keys.length;
}
}
Loading

0 comments on commit aff37bc

Please sign in to comment.