forked from nodejs/node
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Buffer.byteLength is important for speed because it is called whenever a new Buffer is created from a string. This commit optimizes Buffer.byteLength execution by: - moving base64 length calculation into JS-land, which is now much faster - remove redundant code and streamline the UTF8 length calculation It also adds a benchmark and better tests. PR-URL: nodejs#1713 Reviewed-By: Trevor Norris <[email protected]> Reviewed-By: Ben Noordhuis <[email protected]>
- Loading branch information
1 parent
2a71f02
commit 9da168b
Showing
5 changed files
with
155 additions
and
41 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
var common = require('../common'); | ||
|
||
var bench = common.createBenchmark(main, { | ||
encoding: ['utf8', 'base64'], | ||
len: [1, 2, 4, 16, 64, 256], // x16 | ||
n: [5e6] | ||
}); | ||
|
||
// 16 chars each | ||
var chars = [ | ||
'hello brendan!!!', // 1 byte | ||
'ΰαβγδεζηθικλμνξο', // 2 bytes | ||
'挰挱挲挳挴挵挶挷挸挹挺挻挼挽挾挿', // 3 bytes | ||
'𠜎𠜱𠝹𠱓𠱸𠲖𠳏𠳕𠴕𠵼𠵿𠸎𠸏𠹷𠺝𠺢' // 4 bytes | ||
]; | ||
|
||
function main(conf) { | ||
var n = conf.n | 0; | ||
var len = conf.len | 0; | ||
var encoding = conf.encoding; | ||
|
||
var strings = []; | ||
for (var string of chars) { | ||
// Strings must be built differently, depending on encoding | ||
var data = buildString(string, len); | ||
if (encoding === 'utf8') { | ||
strings.push(data); | ||
} else if (encoding === 'base64') { | ||
// Base64 strings will be much longer than their UTF8 counterparts | ||
strings.push(new Buffer(data, 'utf8').toString('base64')); | ||
} | ||
} | ||
|
||
// Check the result to ensure it is *properly* optimized | ||
var results = strings.map(function(val) { | ||
return Buffer.byteLength(val, encoding); | ||
}); | ||
|
||
bench.start(); | ||
for (var i = 0; i < n; i++) { | ||
var index = n % strings.length; | ||
// Go! | ||
var r = Buffer.byteLength(strings[index], encoding); | ||
|
||
if (r !== results[index]) | ||
throw Error('incorrect return value'); | ||
} | ||
bench.end(n); | ||
} | ||
|
||
function buildString(str, times) { | ||
if (times == 1) return str; | ||
|
||
return str + buildString(str, times - 1); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
'use strict'; | ||
|
||
var common = require('../common'); | ||
var assert = require('assert'); | ||
var Buffer = require('buffer').Buffer; | ||
|
||
// coerce values to string | ||
assert.equal(Buffer.byteLength(32, 'raw'), 2); | ||
assert.equal(Buffer.byteLength(NaN, 'utf8'), 3); | ||
assert.equal(Buffer.byteLength({}, 'raws'), 15); | ||
assert.equal(Buffer.byteLength(), 9); | ||
|
||
// special case: zero length string | ||
assert.equal(Buffer.byteLength('', 'ascii'), 0); | ||
assert.equal(Buffer.byteLength('', 'HeX'), 0); | ||
|
||
// utf8 | ||
assert.equal(Buffer.byteLength('∑éllö wørl∂!', 'utf-8'), 19); | ||
assert.equal(Buffer.byteLength('κλμνξο', 'utf8'), 12); | ||
assert.equal(Buffer.byteLength('挵挶挷挸挹', 'utf-8'), 15); | ||
assert.equal(Buffer.byteLength('𠝹𠱓𠱸', 'UTF8'), 12); | ||
// without an encoding, utf8 should be assumed | ||
assert.equal(Buffer.byteLength('hey there'), 9); | ||
assert.equal(Buffer.byteLength('𠱸挶νξ#xx :)'), 17); | ||
assert.equal(Buffer.byteLength('hello world', ''), 11); | ||
// it should also be assumed with unrecognized encoding | ||
assert.equal(Buffer.byteLength('hello world', 'abc'), 11); | ||
assert.equal(Buffer.byteLength('ßœ∑≈', 'unkn0wn enc0ding'), 10); | ||
|
||
// base64 | ||
assert.equal(Buffer.byteLength('aGVsbG8gd29ybGQ=', 'base64'), 11); | ||
assert.equal(Buffer.byteLength('bm9kZS5qcyByb2NrcyE=', 'base64'), 14); | ||
assert.equal(Buffer.byteLength('aGkk', 'base64'), 3); | ||
assert.equal(Buffer.byteLength('bHNrZGZsa3NqZmtsc2xrZmFqc2RsZmtqcw==', | ||
'base64'), 25); | ||
// special padding | ||
assert.equal(Buffer.byteLength('aaa=', 'base64'), 2); | ||
assert.equal(Buffer.byteLength('aaaa==', 'base64'), 3); | ||
|
||
assert.equal(Buffer.byteLength('Il était tué'), 14); | ||
assert.equal(Buffer.byteLength('Il était tué', 'utf8'), 14); | ||
assert.equal(Buffer.byteLength('Il était tué', 'ascii'), 12); | ||
assert.equal(Buffer.byteLength('Il était tué', 'binary'), 12); | ||
['ucs2', 'ucs-2', 'utf16le', 'utf-16le'].forEach(function(encoding) { | ||
assert.equal(24, Buffer.byteLength('Il était tué', encoding)); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters