forked from emscripten-core/emscripten
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathruntime_strings.js
206 lines (193 loc) · 8.92 KB
/
runtime_strings.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
/**
* @license
* Copyright 2019 The Emscripten Authors
* SPDX-License-Identifier: MIT
*/
// runtime_strings.js: Strings related runtime functions that are part of both MINIMAL_RUNTIME and regular runtime.
// Given a pointer 'ptr' to a null-terminated UTF8-encoded string in the given array that contains uint8 values, returns
// a copy of that string as a Javascript String object.
#if TEXTDECODER == 2
var UTF8Decoder = new TextDecoder('utf8');
#else // TEXTDECODER == 2
#if TEXTDECODER
var UTF8Decoder = typeof TextDecoder !== 'undefined' ? new TextDecoder('utf8') : undefined;
#endif // TEXTDECODER
#endif // TEXTDECODER == 2
/**
* @param {number} idx
* @param {number=} maxBytesToRead
* @return {string}
*/
function UTF8ArrayToString(heap, idx, maxBytesToRead) {
#if CAN_ADDRESS_2GB
idx >>>= 0;
#endif
var endIdx = idx + maxBytesToRead;
#if TEXTDECODER
var endPtr = idx;
// TextDecoder needs to know the byte length in advance, it doesn't stop on null terminator by itself.
// Also, use the length info to avoid running tiny strings through TextDecoder, since .subarray() allocates garbage.
// (As a tiny code save trick, compare endPtr against endIdx using a negation, so that undefined means Infinity)
while (heap[endPtr] && !(endPtr >= endIdx)) ++endPtr;
#endif // TEXTDECODER
#if TEXTDECODER == 2
return UTF8Decoder.decode(
heap.subarray ? heap.subarray(idx, endPtr) : new Uint8Array(heap.slice(idx, endPtr))
);
#else // TEXTDECODER == 2
#if TEXTDECODER
if (endPtr - idx > 16 && heap.subarray && UTF8Decoder) {
return UTF8Decoder.decode(heap.subarray(idx, endPtr));
} else {
#endif // TEXTDECODER
var str = '';
#if TEXTDECODER
// If building with TextDecoder, we have already computed the string length above, so test loop end condition against that
while (idx < endPtr) {
#else
while (!(idx >= endIdx)) {
#endif
// For UTF8 byte structure, see:
// http://en.wikipedia.org/wiki/UTF-8#Description
// https://www.ietf.org/rfc/rfc2279.txt
// https://tools.ietf.org/html/rfc3629
var u0 = heap[idx++];
#if !TEXTDECODER
// If not building with TextDecoder enabled, we don't know the string length, so scan for \0 byte.
// If building with TextDecoder, we know exactly at what byte index the string ends, so checking for nulls here would be redundant.
if (!u0) return str;
#endif
if (!(u0 & 0x80)) { str += String.fromCharCode(u0); continue; }
var u1 = heap[idx++] & 63;
if ((u0 & 0xE0) == 0xC0) { str += String.fromCharCode(((u0 & 31) << 6) | u1); continue; }
var u2 = heap[idx++] & 63;
if ((u0 & 0xF0) == 0xE0) {
u0 = ((u0 & 15) << 12) | (u1 << 6) | u2;
} else {
#if ASSERTIONS
if ((u0 & 0xF8) != 0xF0) warnOnce('Invalid UTF-8 leading byte 0x' + u0.toString(16) + ' encountered when deserializing a UTF-8 string in wasm memory to a JS string!');
#endif
u0 = ((u0 & 7) << 18) | (u1 << 12) | (u2 << 6) | (heap[idx++] & 63);
}
if (u0 < 0x10000) {
str += String.fromCharCode(u0);
} else {
var ch = u0 - 0x10000;
str += String.fromCharCode(0xD800 | (ch >> 10), 0xDC00 | (ch & 0x3FF));
}
}
#if TEXTDECODER
}
#endif // TEXTDECODER
return str;
#endif // TEXTDECODER == 2
}
// Given a pointer 'ptr' to a null-terminated UTF8-encoded string in the emscripten HEAP, returns a
// copy of that string as a Javascript String object.
// maxBytesToRead: an optional length that specifies the maximum number of bytes to read. You can omit
// this parameter to scan the string until the first \0 byte. If maxBytesToRead is
// passed, and the string at [ptr, ptr+maxBytesToReadr[ contains a null byte in the
// middle, then the string will cut short at that byte index (i.e. maxBytesToRead will
// not produce a string of exact length [ptr, ptr+maxBytesToRead[)
// N.B. mixing frequent uses of UTF8ToString() with and without maxBytesToRead may
// throw JS JIT optimizations off, so it is worth to consider consistently using one
// style or the other.
/**
* @param {number} ptr
* @param {number=} maxBytesToRead
* @return {string}
*/
function UTF8ToString(ptr, maxBytesToRead) {
#if CAN_ADDRESS_2GB
ptr >>>= 0;
#endif
#if TEXTDECODER == 2
if (!ptr) return '';
var maxPtr = ptr + maxBytesToRead;
for (var end = ptr; !(end >= maxPtr) && HEAPU8[end];) ++end;
return UTF8Decoder.decode(HEAPU8.subarray(ptr, end));
#else
return ptr ? UTF8ArrayToString(HEAPU8, ptr, maxBytesToRead) : '';
#endif
}
// Copies the given Javascript String object 'str' to the given byte array at address 'outIdx',
// encoded in UTF8 form and null-terminated. The copy will require at most str.length*4+1 bytes of space in the HEAP.
// Use the function lengthBytesUTF8 to compute the exact number of bytes (excluding null terminator) that this function will write.
// Parameters:
// str: the Javascript string to copy.
// heap: the array to copy to. Each index in this array is assumed to be one 8-byte element.
// outIdx: The starting offset in the array to begin the copying.
// maxBytesToWrite: The maximum number of bytes this function can write to the array.
// This count should include the null terminator,
// i.e. if maxBytesToWrite=1, only the null terminator will be written and nothing else.
// maxBytesToWrite=0 does not write any bytes to the output, not even the null terminator.
// Returns the number of bytes written, EXCLUDING the null terminator.
function stringToUTF8Array(str, heap, outIdx, maxBytesToWrite) {
#if CAN_ADDRESS_2GB
outIdx >>>= 0;
#endif
if (!(maxBytesToWrite > 0)) // Parameter maxBytesToWrite is not optional. Negative values, 0, null, undefined and false each don't write out any bytes.
return 0;
var startIdx = outIdx;
var endIdx = outIdx + maxBytesToWrite - 1; // -1 for string null terminator.
for (var i = 0; i < str.length; ++i) {
// Gotcha: charCodeAt returns a 16-bit word that is a UTF-16 encoded code unit, not a Unicode code point of the character! So decode UTF16->UTF32->UTF8.
// See http://unicode.org/faq/utf_bom.html#utf16-3
// For UTF8 byte structure, see http://en.wikipedia.org/wiki/UTF-8#Description and https://www.ietf.org/rfc/rfc2279.txt and https://tools.ietf.org/html/rfc3629
var u = str.charCodeAt(i); // possibly a lead surrogate
if (u >= 0xD800 && u <= 0xDFFF) {
var u1 = str.charCodeAt(++i);
u = 0x10000 + ((u & 0x3FF) << 10) | (u1 & 0x3FF);
}
if (u <= 0x7F) {
if (outIdx >= endIdx) break;
heap[outIdx++] = u;
} else if (u <= 0x7FF) {
if (outIdx + 1 >= endIdx) break;
heap[outIdx++] = 0xC0 | (u >> 6);
heap[outIdx++] = 0x80 | (u & 63);
} else if (u <= 0xFFFF) {
if (outIdx + 2 >= endIdx) break;
heap[outIdx++] = 0xE0 | (u >> 12);
heap[outIdx++] = 0x80 | ((u >> 6) & 63);
heap[outIdx++] = 0x80 | (u & 63);
} else {
if (outIdx + 3 >= endIdx) break;
#if ASSERTIONS
if (u >= 0x200000) warnOnce('Invalid Unicode code point 0x' + u.toString(16) + ' encountered when serializing a JS string to a UTF-8 string in wasm memory! (Valid unicode code points should be in range 0-0x1FFFFF).');
#endif
heap[outIdx++] = 0xF0 | (u >> 18);
heap[outIdx++] = 0x80 | ((u >> 12) & 63);
heap[outIdx++] = 0x80 | ((u >> 6) & 63);
heap[outIdx++] = 0x80 | (u & 63);
}
}
// Null-terminate the pointer to the buffer.
heap[outIdx] = 0;
return outIdx - startIdx;
}
// Copies the given Javascript String object 'str' to the emscripten HEAP at address 'outPtr',
// null-terminated and encoded in UTF8 form. The copy will require at most str.length*4+1 bytes of space in the HEAP.
// Use the function lengthBytesUTF8 to compute the exact number of bytes (excluding null terminator) that this function will write.
// Returns the number of bytes written, EXCLUDING the null terminator.
function stringToUTF8(str, outPtr, maxBytesToWrite) {
#if ASSERTIONS
assert(typeof maxBytesToWrite == 'number', 'stringToUTF8(str, outPtr, maxBytesToWrite) is missing the third parameter that specifies the length of the output buffer!');
#endif
return stringToUTF8Array(str, {{{ heapAndOffset('HEAPU8', 'outPtr') }}}, maxBytesToWrite);
}
// Returns the number of bytes the given Javascript string takes if encoded as a UTF8 byte array, EXCLUDING the null terminator byte.
function lengthBytesUTF8(str) {
var len = 0;
for (var i = 0; i < str.length; ++i) {
// Gotcha: charCodeAt returns a 16-bit word that is a UTF-16 encoded code unit, not a Unicode code point of the character! So decode UTF16->UTF32->UTF8.
// See http://unicode.org/faq/utf_bom.html#utf16-3
var u = str.charCodeAt(i); // possibly a lead surrogate
if (u >= 0xD800 && u <= 0xDFFF) u = 0x10000 + ((u & 0x3FF) << 10) | (str.charCodeAt(++i) & 0x3FF);
if (u <= 0x7F) ++len;
else if (u <= 0x7FF) len += 2;
else if (u <= 0xFFFF) len += 3;
else len += 4;
}
return len;
}