Optimize AbstractByteBuf.getCharSequence() in US_ASCII case (netty#8392)

* Optimize AbstractByteBuf.getCharSequence() in US_ASCII case Motivation: Inspired by netty#8388, I noticed this simple optimization to avoid char[] allocation (also suggested in a TODO here). Modifications: Return an AsciiString from AbstractByteBuf.getCharSequence() if requested charset is US_ASCII or ISO_8859_1 (latter thanks to @Scottmitch's suggestion). Also tweak unit tests not to require Strings and include a new benchmark to demonstrate the speedup. Result: Speed-up of AbstractByteBuf.getCharSequence() in ascii and iso 8859/1 cases
wokym · Oct 26, 2018 · 583d838 · 583d838
1 parent ce39773
commit 583d838
Show file tree

Hide file tree

Showing 6 changed files with 161 additions and 12 deletions.
diff --git a/buffer/src/main/java/io/netty/buffer/AbstractByteBuf.java b/buffer/src/main/java/io/netty/buffer/AbstractByteBuf.java
@@ -15,6 +15,7 @@
  */
 package io.netty.buffer;
 
+import io.netty.util.AsciiString;
 import io.netty.util.ByteProcessor;
 import io.netty.util.CharsetUtil;
 import io.netty.util.IllegalReferenceCountException;
@@ -503,7 +504,10 @@ public ByteBuf getBytes(int index, ByteBuf dst, int length) {
 
     @Override
     public CharSequence getCharSequence(int index, int length, Charset charset) {
-        // TODO: We could optimize this for UTF8 and US_ASCII
+        if (CharsetUtil.US_ASCII.equals(charset) || CharsetUtil.ISO_8859_1.equals(charset)) {
+            // ByteBufUtil.getBytes(...) will return a new copy which the AsciiString uses directly
+            return new AsciiString(ByteBufUtil.getBytes(this, index, length, true), false);
+        }
         return toString(index, length, charset);
     }
 

diff --git a/buffer/src/test/java/io/netty/buffer/AbstractByteBufTest.java b/buffer/src/test/java/io/netty/buffer/AbstractByteBufTest.java
@@ -30,6 +30,7 @@
 import java.io.RandomAccessFile;
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
+import java.nio.CharBuffer;
 import java.nio.ReadOnlyBufferException;
 import java.nio.channels.Channels;
 import java.nio.channels.FileChannel;
@@ -3648,11 +3649,23 @@ public void testSetUtf16CharSequence() {
         testSetGetCharSequence(CharsetUtil.UTF_16);
     }
 
+    private static final CharBuffer EXTENDED_ASCII_CHARS, ASCII_CHARS;
+
+    static {
+        char[] chars = new char[256];
+        for (char c = 0; c < chars.length; c++) {
+            chars[c] = c;
+        }
+        EXTENDED_ASCII_CHARS = CharBuffer.wrap(chars);
+        ASCII_CHARS = CharBuffer.wrap(chars, 0, 128);
+    }
+
     private void testSetGetCharSequence(Charset charset) {
-        ByteBuf buf = newBuffer(16);
-        String sequence = "AB";
+        ByteBuf buf = newBuffer(1024);
+        CharBuffer sequence = CharsetUtil.US_ASCII.equals(charset)
+                ? ASCII_CHARS : EXTENDED_ASCII_CHARS;
         int bytes = buf.setCharSequence(1, sequence, charset);
-        assertEquals(sequence, buf.getCharSequence(1, bytes, charset));
+        assertEquals(sequence, CharBuffer.wrap(buf.getCharSequence(1, bytes, charset)));
         buf.release();
     }
 
@@ -3677,12 +3690,13 @@ public void testWriteReadUtf16CharSequence() {
     }
 
     private void testWriteReadCharSequence(Charset charset) {
-        ByteBuf buf = newBuffer(16);
-        String sequence = "AB";
+        ByteBuf buf = newBuffer(1024);
+        CharBuffer sequence = CharsetUtil.US_ASCII.equals(charset)
+                ? ASCII_CHARS : EXTENDED_ASCII_CHARS;
         buf.writerIndex(1);
         int bytes = buf.writeCharSequence(sequence, charset);
         buf.readerIndex(1);
-        assertEquals(sequence, buf.readCharSequence(bytes, charset));
+        assertEquals(sequence, CharBuffer.wrap(buf.readCharSequence(bytes, charset)));
         buf.release();
     }
 

diff --git a/codec-socks/src/test/java/io/netty/handler/codec/socks/SocksCmdRequestTest.java b/codec-socks/src/test/java/io/netty/handler/codec/socks/SocksCmdRequestTest.java
@@ -21,6 +21,7 @@
 import org.junit.Test;
 
 import java.net.IDN;
+import java.nio.CharBuffer;
 
 import static org.junit.Assert.*;
 
@@ -101,7 +102,7 @@ public void testHostNotEncodedForUnknown() {
     @Test
     public void testIDNEncodeToAsciiForDomain() {
         String host = "тест.рф";
-        String asciiHost = IDN.toASCII(host);
+        CharBuffer asciiHost = CharBuffer.wrap(IDN.toASCII(host));
         short port = 10000;
 
         SocksCmdRequest rq = new SocksCmdRequest(SocksCmdType.BIND, SocksAddressType.DOMAIN, host, port);
@@ -116,7 +117,8 @@ public void testIDNEncodeToAsciiForDomain() {
         assertEquals((byte) 0x00, buffer.readByte());
         assertEquals(SocksAddressType.DOMAIN.byteValue(), buffer.readByte());
         assertEquals((byte) asciiHost.length(), buffer.readUnsignedByte());
-        assertEquals(asciiHost, buffer.readCharSequence(asciiHost.length(), CharsetUtil.US_ASCII));
+        assertEquals(asciiHost,
+            CharBuffer.wrap(buffer.readCharSequence(asciiHost.length(), CharsetUtil.US_ASCII)));
         assertEquals(port, buffer.readUnsignedShort());
 
         buffer.release();

diff --git a/codec-socks/src/test/java/io/netty/handler/codec/socks/SocksCmdResponseTest.java b/codec-socks/src/test/java/io/netty/handler/codec/socks/SocksCmdResponseTest.java
@@ -21,6 +21,7 @@
 import org.junit.Test;
 
 import java.net.IDN;
+import java.nio.CharBuffer;
 
 import static org.junit.Assert.*;
 
@@ -135,7 +136,7 @@ public void testHostNotEncodedForUnknown() {
     @Test
     public void testIDNEncodeToAsciiForDomain() {
         String host = "тест.рф";
-        String asciiHost = IDN.toASCII(host);
+        CharBuffer asciiHost = CharBuffer.wrap(IDN.toASCII(host));
         short port = 10000;
 
         SocksCmdResponse rs = new SocksCmdResponse(SocksCmdStatus.SUCCESS, SocksAddressType.DOMAIN, host, port);
@@ -150,7 +151,8 @@ public void testIDNEncodeToAsciiForDomain() {
         assertEquals((byte) 0x00, buffer.readByte());
         assertEquals(SocksAddressType.DOMAIN.byteValue(), buffer.readByte());
         assertEquals((byte) asciiHost.length(), buffer.readUnsignedByte());
-        assertEquals(asciiHost, buffer.readCharSequence(asciiHost.length(), CharsetUtil.US_ASCII));
+        assertEquals(asciiHost,
+            CharBuffer.wrap(buffer.readCharSequence(asciiHost.length(), CharsetUtil.US_ASCII)));
         assertEquals(port, buffer.readUnsignedShort());
 
         buffer.release();

diff --git a/codec/src/test/java/io/netty/handler/codec/compression/SnappyTest.java b/codec/src/test/java/io/netty/handler/codec/compression/SnappyTest.java
@@ -24,6 +24,8 @@
 import static io.netty.handler.codec.compression.Snappy.*;
 import static org.junit.Assert.*;
 
+import java.nio.CharBuffer;
+
 public class SnappyTest {
     private final Snappy snappy = new Snappy();
 
@@ -219,7 +221,8 @@ public void encodeAndDecodeLongTextUsesCopy() throws Exception {
         // Decode
         ByteBuf outDecoded = Unpooled.buffer();
         snappy.decode(out, outDecoded);
-        assertEquals(srcStr, outDecoded.getCharSequence(0, outDecoded.writerIndex(), CharsetUtil.US_ASCII));
+        assertEquals(CharBuffer.wrap(srcStr),
+            CharBuffer.wrap(outDecoded.getCharSequence(0, outDecoded.writerIndex(), CharsetUtil.US_ASCII)));
 
         in.release();
         out.release();

diff --git a/microbench/src/main/java/io/netty/buffer/AbstractByteBufGetCharSequenceBenchmark.java b/microbench/src/main/java/io/netty/buffer/AbstractByteBufGetCharSequenceBenchmark.java
@@ -0,0 +1,124 @@
+/*
+ * Copyright 2018 The Netty Project
+ *
+ * The Netty Project licenses this file to you under the Apache License,
+ * version 2.0 (the "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at:
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+package io.netty.buffer;
+
+import io.netty.microbench.util.AbstractMicrobenchmark;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.annotations.Warmup;
+
+import java.nio.charset.Charset;
+import java.util.Arrays;
+import java.util.concurrent.TimeUnit;
+
+@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
+@Measurement(iterations = 10, time = 1, timeUnit = TimeUnit.SECONDS)
+public class AbstractByteBufGetCharSequenceBenchmark extends AbstractMicrobenchmark {
+
+    public enum ByteBufType {
+        DIRECT {
+            @Override
+            ByteBuf newBuffer(byte[] bytes, int length) {
+                ByteBuf buffer = Unpooled.directBuffer(length);
+                buffer.writeBytes(bytes, 0, length);
+                return buffer;
+            }
+        },
+        HEAP_OFFSET {
+            @Override
+            ByteBuf newBuffer(byte[] bytes, int length) {
+                return Unpooled.wrappedBuffer(bytes, 1, length);
+            }
+        },
+        HEAP {
+            @Override
+            ByteBuf newBuffer(byte[] bytes, int length) {
+                return Unpooled.wrappedBuffer(bytes, 0, length);
+            }
+        },
+        COMPOSITE {
+            @Override
+            ByteBuf newBuffer(byte[] bytes, int length) {
+                CompositeByteBuf buffer = Unpooled.compositeBuffer();
+                int offset = 0;
+                // 8 buffers per composite.
+                int capacity = length / 8;
+
+                while (length > 0) {
+                    buffer.addComponent(true, Unpooled.wrappedBuffer(bytes, offset, Math.min(length, capacity)));
+                    length -= capacity;
+                    offset += capacity;
+                }
+                return buffer;
+            }
+        };
+        abstract ByteBuf newBuffer(byte[] bytes, int length);
+    }
+
+    @Param({ "8", "64", "1024", "10240", "1073741824" })
+    public int size;
+
+    @Param({ "US-ASCII", "ISO_8859_1" })
+    public String charsetName;
+
+    @Param
+    public ByteBufType bufferType;
+
+    private ByteBuf buffer;
+    private Charset charset;
+
+    @Override
+    protected String[] jvmArgs() {
+        // Ensure we minimize the GC overhead by sizing the heap big enough.
+        return new String[] { "-XX:MaxDirectMemorySize=2g", "-Xmx8g", "-Xms8g", "-Xmn6g" };
+    }
+
+    @Setup
+    public void setup() {
+        byte[] bytes = new byte[size + 2];
+        Arrays.fill(bytes, (byte) 'a');
+
+        // Use an offset to not allow any optimizations because we use the exact passed in byte[] for heap buffers.
+        buffer = bufferType.newBuffer(bytes, size);
+        charset = Charset.forName(charsetName);
+    }
+
+    @TearDown
+    public void teardown() {
+        buffer.release();
+    }
+
+    @Benchmark
+    public int getCharSequence() {
+        return traverse(buffer.getCharSequence(buffer.readerIndex(), size, charset));
+    }
+
+    @Benchmark
+    public int getCharSequenceOld() {
+        return traverse(buffer.toString(buffer.readerIndex(), size, charset));
+    }
+
+    private static int traverse(CharSequence cs) {
+        int i = 0, len = cs.length();
+        while (i < len && cs.charAt(i++) != 0) {
+            // ensure result is "used"
+        }
+        return i;
+    }
+}