Skip to content

Commit

Permalink
Optimize AbstractByteBuf.getCharSequence() in US_ASCII case (netty#8392)
Browse files Browse the repository at this point in the history
* Optimize AbstractByteBuf.getCharSequence() in US_ASCII case

Motivation:

Inspired by netty#8388, I noticed this
simple optimization to avoid char[] allocation (also suggested in a TODO
here).

Modifications:

Return an AsciiString from AbstractByteBuf.getCharSequence() if
requested charset is US_ASCII or ISO_8859_1 (latter thanks to
@Scottmitch's suggestion). Also tweak unit tests not to require Strings
and include a new benchmark to demonstrate the speedup.

Result:

Speed-up of AbstractByteBuf.getCharSequence() in ascii and iso 8859/1
cases
  • Loading branch information
njhill authored and normanmaurer committed Oct 26, 2018
1 parent ce39773 commit 583d838
Show file tree
Hide file tree
Showing 6 changed files with 161 additions and 12 deletions.
6 changes: 5 additions & 1 deletion buffer/src/main/java/io/netty/buffer/AbstractByteBuf.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/
package io.netty.buffer;

import io.netty.util.AsciiString;
import io.netty.util.ByteProcessor;
import io.netty.util.CharsetUtil;
import io.netty.util.IllegalReferenceCountException;
Expand Down Expand Up @@ -503,7 +504,10 @@ public ByteBuf getBytes(int index, ByteBuf dst, int length) {

@Override
public CharSequence getCharSequence(int index, int length, Charset charset) {
// TODO: We could optimize this for UTF8 and US_ASCII
if (CharsetUtil.US_ASCII.equals(charset) || CharsetUtil.ISO_8859_1.equals(charset)) {
// ByteBufUtil.getBytes(...) will return a new copy which the AsciiString uses directly
return new AsciiString(ByteBufUtil.getBytes(this, index, length, true), false);
}
return toString(index, length, charset);
}

Expand Down
26 changes: 20 additions & 6 deletions buffer/src/test/java/io/netty/buffer/AbstractByteBufTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.CharBuffer;
import java.nio.ReadOnlyBufferException;
import java.nio.channels.Channels;
import java.nio.channels.FileChannel;
Expand Down Expand Up @@ -3648,11 +3649,23 @@ public void testSetUtf16CharSequence() {
testSetGetCharSequence(CharsetUtil.UTF_16);
}

private static final CharBuffer EXTENDED_ASCII_CHARS, ASCII_CHARS;

static {
char[] chars = new char[256];
for (char c = 0; c < chars.length; c++) {
chars[c] = c;
}
EXTENDED_ASCII_CHARS = CharBuffer.wrap(chars);
ASCII_CHARS = CharBuffer.wrap(chars, 0, 128);
}

private void testSetGetCharSequence(Charset charset) {
ByteBuf buf = newBuffer(16);
String sequence = "AB";
ByteBuf buf = newBuffer(1024);
CharBuffer sequence = CharsetUtil.US_ASCII.equals(charset)
? ASCII_CHARS : EXTENDED_ASCII_CHARS;
int bytes = buf.setCharSequence(1, sequence, charset);
assertEquals(sequence, buf.getCharSequence(1, bytes, charset));
assertEquals(sequence, CharBuffer.wrap(buf.getCharSequence(1, bytes, charset)));
buf.release();
}

Expand All @@ -3677,12 +3690,13 @@ public void testWriteReadUtf16CharSequence() {
}

private void testWriteReadCharSequence(Charset charset) {
ByteBuf buf = newBuffer(16);
String sequence = "AB";
ByteBuf buf = newBuffer(1024);
CharBuffer sequence = CharsetUtil.US_ASCII.equals(charset)
? ASCII_CHARS : EXTENDED_ASCII_CHARS;
buf.writerIndex(1);
int bytes = buf.writeCharSequence(sequence, charset);
buf.readerIndex(1);
assertEquals(sequence, buf.readCharSequence(bytes, charset));
assertEquals(sequence, CharBuffer.wrap(buf.readCharSequence(bytes, charset)));
buf.release();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.junit.Test;

import java.net.IDN;
import java.nio.CharBuffer;

import static org.junit.Assert.*;

Expand Down Expand Up @@ -101,7 +102,7 @@ public void testHostNotEncodedForUnknown() {
@Test
public void testIDNEncodeToAsciiForDomain() {
String host = "тест.рф";
String asciiHost = IDN.toASCII(host);
CharBuffer asciiHost = CharBuffer.wrap(IDN.toASCII(host));
short port = 10000;

SocksCmdRequest rq = new SocksCmdRequest(SocksCmdType.BIND, SocksAddressType.DOMAIN, host, port);
Expand All @@ -116,7 +117,8 @@ public void testIDNEncodeToAsciiForDomain() {
assertEquals((byte) 0x00, buffer.readByte());
assertEquals(SocksAddressType.DOMAIN.byteValue(), buffer.readByte());
assertEquals((byte) asciiHost.length(), buffer.readUnsignedByte());
assertEquals(asciiHost, buffer.readCharSequence(asciiHost.length(), CharsetUtil.US_ASCII));
assertEquals(asciiHost,
CharBuffer.wrap(buffer.readCharSequence(asciiHost.length(), CharsetUtil.US_ASCII)));
assertEquals(port, buffer.readUnsignedShort());

buffer.release();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.junit.Test;

import java.net.IDN;
import java.nio.CharBuffer;

import static org.junit.Assert.*;

Expand Down Expand Up @@ -135,7 +136,7 @@ public void testHostNotEncodedForUnknown() {
@Test
public void testIDNEncodeToAsciiForDomain() {
String host = "тест.рф";
String asciiHost = IDN.toASCII(host);
CharBuffer asciiHost = CharBuffer.wrap(IDN.toASCII(host));
short port = 10000;

SocksCmdResponse rs = new SocksCmdResponse(SocksCmdStatus.SUCCESS, SocksAddressType.DOMAIN, host, port);
Expand All @@ -150,7 +151,8 @@ public void testIDNEncodeToAsciiForDomain() {
assertEquals((byte) 0x00, buffer.readByte());
assertEquals(SocksAddressType.DOMAIN.byteValue(), buffer.readByte());
assertEquals((byte) asciiHost.length(), buffer.readUnsignedByte());
assertEquals(asciiHost, buffer.readCharSequence(asciiHost.length(), CharsetUtil.US_ASCII));
assertEquals(asciiHost,
CharBuffer.wrap(buffer.readCharSequence(asciiHost.length(), CharsetUtil.US_ASCII)));
assertEquals(port, buffer.readUnsignedShort());

buffer.release();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
import static io.netty.handler.codec.compression.Snappy.*;
import static org.junit.Assert.*;

import java.nio.CharBuffer;

public class SnappyTest {
private final Snappy snappy = new Snappy();

Expand Down Expand Up @@ -219,7 +221,8 @@ public void encodeAndDecodeLongTextUsesCopy() throws Exception {
// Decode
ByteBuf outDecoded = Unpooled.buffer();
snappy.decode(out, outDecoded);
assertEquals(srcStr, outDecoded.getCharSequence(0, outDecoded.writerIndex(), CharsetUtil.US_ASCII));
assertEquals(CharBuffer.wrap(srcStr),
CharBuffer.wrap(outDecoded.getCharSequence(0, outDecoded.writerIndex(), CharsetUtil.US_ASCII)));

in.release();
out.release();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
/*
* Copyright 2018 The Netty Project
*
* The Netty Project licenses this file to you under the Apache License,
* version 2.0 (the "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package io.netty.buffer;

import io.netty.microbench.util.AbstractMicrobenchmark;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.TearDown;
import org.openjdk.jmh.annotations.Warmup;

import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.concurrent.TimeUnit;

@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 10, time = 1, timeUnit = TimeUnit.SECONDS)
public class AbstractByteBufGetCharSequenceBenchmark extends AbstractMicrobenchmark {

public enum ByteBufType {
DIRECT {
@Override
ByteBuf newBuffer(byte[] bytes, int length) {
ByteBuf buffer = Unpooled.directBuffer(length);
buffer.writeBytes(bytes, 0, length);
return buffer;
}
},
HEAP_OFFSET {
@Override
ByteBuf newBuffer(byte[] bytes, int length) {
return Unpooled.wrappedBuffer(bytes, 1, length);
}
},
HEAP {
@Override
ByteBuf newBuffer(byte[] bytes, int length) {
return Unpooled.wrappedBuffer(bytes, 0, length);
}
},
COMPOSITE {
@Override
ByteBuf newBuffer(byte[] bytes, int length) {
CompositeByteBuf buffer = Unpooled.compositeBuffer();
int offset = 0;
// 8 buffers per composite.
int capacity = length / 8;

while (length > 0) {
buffer.addComponent(true, Unpooled.wrappedBuffer(bytes, offset, Math.min(length, capacity)));
length -= capacity;
offset += capacity;
}
return buffer;
}
};
abstract ByteBuf newBuffer(byte[] bytes, int length);
}

@Param({ "8", "64", "1024", "10240", "1073741824" })
public int size;

@Param({ "US-ASCII", "ISO_8859_1" })
public String charsetName;

@Param
public ByteBufType bufferType;

private ByteBuf buffer;
private Charset charset;

@Override
protected String[] jvmArgs() {
// Ensure we minimize the GC overhead by sizing the heap big enough.
return new String[] { "-XX:MaxDirectMemorySize=2g", "-Xmx8g", "-Xms8g", "-Xmn6g" };
}

@Setup
public void setup() {
byte[] bytes = new byte[size + 2];
Arrays.fill(bytes, (byte) 'a');

// Use an offset to not allow any optimizations because we use the exact passed in byte[] for heap buffers.
buffer = bufferType.newBuffer(bytes, size);
charset = Charset.forName(charsetName);
}

@TearDown
public void teardown() {
buffer.release();
}

@Benchmark
public int getCharSequence() {
return traverse(buffer.getCharSequence(buffer.readerIndex(), size, charset));
}

@Benchmark
public int getCharSequenceOld() {
return traverse(buffer.toString(buffer.readerIndex(), size, charset));
}

private static int traverse(CharSequence cs) {
int i = 0, len = cs.length();
while (i < len && cs.charAt(i++) != 0) {
// ensure result is "used"
}
return i;
}
}

0 comments on commit 583d838

Please sign in to comment.