forked from opensearch-project/OpenSearch
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add microbenchmark for LongKeyedBucketOrds (#58608) (#59459)
I've always been confused by the strange behavior that I saw when working on #57304. Specifically, I saw switching from a bimorphic invocation to a monomorphic invocation to give us a 7%-15% performance bump. This felt *bonkers* to me. And, it also made me wonder whether it'd be worth looking into doing it everywhere. It turns out that, no, it isn't needed everywhere. This benchmark shows that a bimorphic invocation like: ``` LongKeyedBucketOrds ords = new LongKeyedBucketOrds.ForSingle(); ords.add(0, 0); <------ this line ``` is 19% slower than a monomorphic invocation like: ``` LongKeyedBucketOrds.ForSingle ords = new LongKeyedBucketOrds.ForSingle(); ords.add(0, 0); <------ this line ``` But *only* when the reference is mutable. In the example above, if `ords` is never changed then both perform the same. But if the `ords` reference is assigned twice then we start to see the difference: ``` immutable bimorphic avgt 10 6.468 ± 0.045 ns/op immutable monomorphic avgt 10 6.756 ± 0.026 ns/op mutable bimorphic avgt 10 9.741 ± 0.073 ns/op mutable monomorphic avgt 10 8.190 ± 0.016 ns/op ``` So the conclusion from all this is that we've done the right thing: `auto_date_histogram` is the only aggregation in which `ords` isn't final and it is the only aggregation that forces monomorphic invocations. All other aggregations use an immutable bimorphic invocation. Which is fine. Relates to #56487
- Loading branch information
Showing
1 changed file
with
172 additions
and
0 deletions.
There are no files selected for viewing
172 changes: 172 additions & 0 deletions
172
...lasticsearch/benchmark/search/aggregations/bucket/terms/LongKeyedBucketOrdsBenchmark.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
/* | ||
* Licensed to Elasticsearch under one or more contributor | ||
* license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright | ||
* ownership. Elasticsearch licenses this file to you under | ||
* the Apache License, Version 2.0 (the "License"); you may | ||
* not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.elasticsearch.benchmark.search.aggregations.bucket.terms; | ||
|
||
import org.elasticsearch.common.settings.Settings; | ||
import org.elasticsearch.common.util.BigArrays; | ||
import org.elasticsearch.common.util.PageCacheRecycler; | ||
import org.elasticsearch.search.aggregations.CardinalityUpperBound; | ||
import org.elasticsearch.search.aggregations.bucket.histogram.AutoDateHistogramAggregationBuilder; | ||
import org.elasticsearch.search.aggregations.bucket.terms.LongKeyedBucketOrds; | ||
import org.openjdk.jmh.annotations.Benchmark; | ||
import org.openjdk.jmh.annotations.BenchmarkMode; | ||
import org.openjdk.jmh.annotations.Fork; | ||
import org.openjdk.jmh.annotations.Measurement; | ||
import org.openjdk.jmh.annotations.Mode; | ||
import org.openjdk.jmh.annotations.OperationsPerInvocation; | ||
import org.openjdk.jmh.annotations.OutputTimeUnit; | ||
import org.openjdk.jmh.annotations.Scope; | ||
import org.openjdk.jmh.annotations.Setup; | ||
import org.openjdk.jmh.annotations.State; | ||
import org.openjdk.jmh.annotations.Warmup; | ||
import org.openjdk.jmh.infra.Blackhole; | ||
|
||
import java.util.concurrent.TimeUnit; | ||
|
||
@Fork(2) | ||
@Warmup(iterations = 10) | ||
@Measurement(iterations = 5) | ||
@BenchmarkMode(Mode.AverageTime) | ||
@OutputTimeUnit(TimeUnit.NANOSECONDS) | ||
@OperationsPerInvocation(1_000_000) | ||
@State(Scope.Benchmark) | ||
public class LongKeyedBucketOrdsBenchmark { | ||
private static final long LIMIT = 1_000_000; | ||
/** | ||
* The number of distinct values to add to the buckets. | ||
*/ | ||
private static final long DISTINCT_VALUES = 10; | ||
/** | ||
* The number of buckets to create in the {@link #multiBucket} case. | ||
* <p> | ||
* If this is not relatively prime to {@link #DISTINCT_VALUES} then the | ||
* values won't be scattered evenly across the buckets. | ||
*/ | ||
private static final long DISTINCT_BUCKETS = 21; | ||
|
||
private final PageCacheRecycler recycler = new PageCacheRecycler(Settings.EMPTY); | ||
private final BigArrays bigArrays = new BigArrays(recycler, null, "REQUEST"); | ||
|
||
/** | ||
* Force loading all of the implementations just for extra paranoia's sake. | ||
* We really don't want the JVM to be able to eliminate one of them just | ||
* because we don't use it in the particular benchmark. That is totally a | ||
* thing it'd do. It is sneaky. | ||
*/ | ||
@Setup | ||
public void forceLoadClasses(Blackhole bh) { | ||
bh.consume(LongKeyedBucketOrds.FromSingle.class); | ||
bh.consume(LongKeyedBucketOrds.FromMany.class); | ||
} | ||
|
||
/** | ||
* Emulates a way that we do <strong>not</strong> use {@link LongKeyedBucketOrds} | ||
* because it is not needed. | ||
*/ | ||
@Benchmark | ||
public void singleBucketIntoSingleImmutableMonmorphicInvocation(Blackhole bh) { | ||
try (LongKeyedBucketOrds.FromSingle ords = new LongKeyedBucketOrds.FromSingle(bigArrays)) { | ||
for (long i = 0; i < LIMIT; i++) { | ||
ords.add(0, i % DISTINCT_VALUES); | ||
} | ||
bh.consume(ords); | ||
} | ||
} | ||
|
||
/** | ||
* Emulates the way that most aggregations use {@link LongKeyedBucketOrds}. | ||
*/ | ||
@Benchmark | ||
public void singleBucketIntoSingleImmutableBimorphicInvocation(Blackhole bh) { | ||
try (LongKeyedBucketOrds ords = LongKeyedBucketOrds.build(bigArrays, CardinalityUpperBound.ONE)) { | ||
for (long i = 0; i < LIMIT; i++) { | ||
ords.add(0, i % DISTINCT_VALUES); | ||
} | ||
bh.consume(ords); | ||
} | ||
} | ||
|
||
/** | ||
* Emulates the way that {@link AutoDateHistogramAggregationBuilder} uses {@link LongKeyedBucketOrds}. | ||
*/ | ||
@Benchmark | ||
public void singleBucketIntoSingleMutableMonmorphicInvocation(Blackhole bh) { | ||
LongKeyedBucketOrds.FromSingle ords = new LongKeyedBucketOrds.FromSingle(bigArrays); | ||
for (long i = 0; i < LIMIT; i++) { | ||
if (i % 100_000 == 0) { | ||
ords.close(); | ||
bh.consume(ords); | ||
ords = new LongKeyedBucketOrds.FromSingle(bigArrays); | ||
} | ||
ords.add(0, i % DISTINCT_VALUES); | ||
} | ||
bh.consume(ords); | ||
ords.close(); | ||
} | ||
|
||
/** | ||
* Emulates a way that we do <strong>not</strong> use {@link LongKeyedBucketOrds} | ||
* because it is significantly slower than the | ||
* {@link #singleBucketIntoSingleMutableMonmorphicInvocation monomorphic invocation}. | ||
*/ | ||
@Benchmark | ||
public void singleBucketIntoSingleMutableBimorphicInvocation(Blackhole bh) { | ||
LongKeyedBucketOrds ords = LongKeyedBucketOrds.build(bigArrays, CardinalityUpperBound.ONE); | ||
for (long i = 0; i < LIMIT; i++) { | ||
if (i % 100_000 == 0) { | ||
ords.close(); | ||
bh.consume(ords); | ||
ords = LongKeyedBucketOrds.build(bigArrays, CardinalityUpperBound.ONE); | ||
} | ||
ords.add(0, i % DISTINCT_VALUES); | ||
|
||
} | ||
bh.consume(ords); | ||
ords.close(); | ||
} | ||
|
||
/** | ||
* Emulates an aggregation that collects from a single bucket "by accident". | ||
* This can happen if an aggregation is under, say, a {@code terms} | ||
* aggregation and there is only a single value for that term in the index. | ||
*/ | ||
@Benchmark | ||
public void singleBucketIntoMulti(Blackhole bh) { | ||
try (LongKeyedBucketOrds ords = LongKeyedBucketOrds.build(bigArrays, CardinalityUpperBound.MANY)) { | ||
for (long i = 0; i < LIMIT; i++) { | ||
ords.add(0, i % DISTINCT_VALUES); | ||
} | ||
bh.consume(ords); | ||
} | ||
} | ||
|
||
/** | ||
* Emulates an aggregation that collects from many buckets. | ||
*/ | ||
@Benchmark | ||
public void multiBucket(Blackhole bh) { | ||
try (LongKeyedBucketOrds ords = LongKeyedBucketOrds.build(bigArrays, CardinalityUpperBound.MANY)) { | ||
for (long i = 0; i < LIMIT; i++) { | ||
ords.add(i % DISTINCT_BUCKETS, i % DISTINCT_VALUES); | ||
} | ||
bh.consume(ords); | ||
} | ||
} | ||
} |