forked from btcsuite/btcd
-
Notifications
You must be signed in to change notification settings - Fork 3
/
compress.go
586 lines (519 loc) · 21.3 KB
/
compress.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
// Copyright (c) 2015-2016 The btcsuite developers
// Use of this source code is governed by an ISC
// license that can be found in the LICENSE file.
package blockchain
import (
"github.com/btcsuite/btcd/btcec"
"github.com/btcsuite/btcd/txscript"
)
// -----------------------------------------------------------------------------
// A variable length quantity (VLQ) is an encoding that uses an arbitrary number
// of binary octets to represent an arbitrarily large integer. The scheme
// employs a most significant byte (MSB) base-128 encoding where the high bit in
// each byte indicates whether or not the byte is the final one. In addition,
// to ensure there are no redundant encodings, an offset is subtracted every
// time a group of 7 bits is shifted out. Therefore each integer can be
// represented in exactly one way, and each representation stands for exactly
// one integer.
//
// Another nice property of this encoding is that it provides a compact
// representation of values that are typically used to indicate sizes. For
// example, the values 0 - 127 are represented with a single byte, 128 - 16511
// with two bytes, and 16512 - 2113663 with three bytes.
//
// While the encoding allows arbitrarily large integers, it is artificially
// limited in this code to an unsigned 64-bit integer for efficiency purposes.
//
// Example encodings:
// 0 -> [0x00]
// 127 -> [0x7f] * Max 1-byte value
// 128 -> [0x80 0x00]
// 129 -> [0x80 0x01]
// 255 -> [0x80 0x7f]
// 256 -> [0x81 0x00]
// 16511 -> [0xff 0x7f] * Max 2-byte value
// 16512 -> [0x80 0x80 0x00]
// 32895 -> [0x80 0xff 0x7f]
// 2113663 -> [0xff 0xff 0x7f] * Max 3-byte value
// 270549119 -> [0xff 0xff 0xff 0x7f] * Max 4-byte value
// 2^64-1 -> [0x80 0xfe 0xfe 0xfe 0xfe 0xfe 0xfe 0xfe 0xfe 0x7f]
//
// References:
// https://en.wikipedia.org/wiki/Variable-length_quantity
// http://www.codecodex.com/wiki/Variable-Length_Integers
// -----------------------------------------------------------------------------
// serializeSizeVLQ returns the number of bytes it would take to serialize the
// passed number as a variable-length quantity according to the format described
// above.
func serializeSizeVLQ(n uint64) int {
size := 1
for ; n > 0x7f; n = (n >> 7) - 1 {
size++
}
return size
}
// putVLQ serializes the provided number to a variable-length quantity according
// to the format described above and returns the number of bytes of the encoded
// value. The result is placed directly into the passed byte slice which must
// be at least large enough to handle the number of bytes returned by the
// serializeSizeVLQ function or it will panic.
func putVLQ(target []byte, n uint64) int {
offset := 0
for ; ; offset++ {
// The high bit is set when another byte follows.
highBitMask := byte(0x80)
if offset == 0 {
highBitMask = 0x00
}
target[offset] = byte(n&0x7f) | highBitMask
if n <= 0x7f {
break
}
n = (n >> 7) - 1
}
// Reverse the bytes so it is MSB-encoded.
for i, j := 0, offset; i < j; i, j = i+1, j-1 {
target[i], target[j] = target[j], target[i]
}
return offset + 1
}
// deserializeVLQ deserializes the provided variable-length quantity according
// to the format described above. It also returns the number of bytes
// deserialized.
func deserializeVLQ(serialized []byte) (uint64, int) {
var n uint64
var size int
for _, val := range serialized {
size++
n = (n << 7) | uint64(val&0x7f)
if val&0x80 != 0x80 {
break
}
n++
}
return n, size
}
// -----------------------------------------------------------------------------
// In order to reduce the size of stored scripts, a domain specific compression
// algorithm is used which recognizes standard scripts and stores them using
// less bytes than the original script. The compression algorithm used here was
// obtained from Bitcoin Core, so all credits for the algorithm go to it.
//
// The general serialized format is:
//
// <script size or type><script data>
//
// Field Type Size
// script size or type VLQ variable
// script data []byte variable
//
// The specific serialized format for each recognized standard script is:
//
// - Pay-to-pubkey-hash: (21 bytes) - <0><20-byte pubkey hash>
// - Pay-to-script-hash: (21 bytes) - <1><20-byte script hash>
// - Pay-to-pubkey**: (33 bytes) - <2, 3, 4, or 5><32-byte pubkey X value>
// 2, 3 = compressed pubkey with bit 0 specifying the y coordinate to use
// 4, 5 = uncompressed pubkey with bit 0 specifying the y coordinate to use
// ** Only valid public keys starting with 0x02, 0x03, and 0x04 are supported.
//
// Any scripts which are not recognized as one of the aforementioned standard
// scripts are encoded using the general serialized format and encode the script
// size as the sum of the actual size of the script and the number of special
// cases.
// -----------------------------------------------------------------------------
// The following constants specify the special constants used to identify a
// special script type in the domain-specific compressed script encoding.
//
// NOTE: This section specifically does not use iota since these values are
// serialized and must be stable for long-term storage.
const (
// cstPayToPubKeyHash identifies a compressed pay-to-pubkey-hash script.
cstPayToPubKeyHash = 0
// cstPayToScriptHash identifies a compressed pay-to-script-hash script.
cstPayToScriptHash = 1
// cstPayToPubKeyComp2 identifies a compressed pay-to-pubkey script to
// a compressed pubkey. Bit 0 specifies which y-coordinate to use
// to reconstruct the full uncompressed pubkey.
cstPayToPubKeyComp2 = 2
// cstPayToPubKeyComp3 identifies a compressed pay-to-pubkey script to
// a compressed pubkey. Bit 0 specifies which y-coordinate to use
// to reconstruct the full uncompressed pubkey.
cstPayToPubKeyComp3 = 3
// cstPayToPubKeyUncomp4 identifies a compressed pay-to-pubkey script to
// an uncompressed pubkey. Bit 0 specifies which y-coordinate to use
// to reconstruct the full uncompressed pubkey.
cstPayToPubKeyUncomp4 = 4
// cstPayToPubKeyUncomp5 identifies a compressed pay-to-pubkey script to
// an uncompressed pubkey. Bit 0 specifies which y-coordinate to use
// to reconstruct the full uncompressed pubkey.
cstPayToPubKeyUncomp5 = 5
// numSpecialScripts is the number of special scripts recognized by the
// domain-specific script compression algorithm.
numSpecialScripts = 6
)
// isPubKeyHash returns whether or not the passed public key script is a
// standard pay-to-pubkey-hash script along with the pubkey hash it is paying to
// if it is.
func isPubKeyHash(script []byte) (bool, []byte) {
if len(script) == 25 && script[0] == txscript.OP_DUP &&
script[1] == txscript.OP_HASH160 &&
script[2] == txscript.OP_DATA_20 &&
script[23] == txscript.OP_EQUALVERIFY &&
script[24] == txscript.OP_CHECKSIG {
return true, script[3:23]
}
return false, nil
}
// isScriptHash returns whether or not the passed public key script is a
// standard pay-to-script-hash script along with the script hash it is paying to
// if it is.
func isScriptHash(script []byte) (bool, []byte) {
if len(script) == 23 && script[0] == txscript.OP_HASH160 &&
script[1] == txscript.OP_DATA_20 &&
script[22] == txscript.OP_EQUAL {
return true, script[2:22]
}
return false, nil
}
// isPubKey returns whether or not the passed public key script is a standard
// pay-to-pubkey script that pays to a valid compressed or uncompressed public
// key along with the serialized pubkey it is paying to if it is.
//
// NOTE: This function ensures the public key is actually valid since the
// compression algorithm requires valid pubkeys. It does not support hybrid
// pubkeys. This means that even if the script has the correct form for a
// pay-to-pubkey script, this function will only return true when it is paying
// to a valid compressed or uncompressed pubkey.
func isPubKey(script []byte) (bool, []byte) {
// Pay-to-compressed-pubkey script.
if len(script) == 35 && script[0] == txscript.OP_DATA_33 &&
script[34] == txscript.OP_CHECKSIG && (script[1] == 0x02 ||
script[1] == 0x03) {
// Ensure the public key is valid.
serializedPubKey := script[1:34]
_, err := btcec.ParsePubKey(serializedPubKey, btcec.S256())
if err == nil {
return true, serializedPubKey
}
}
// Pay-to-uncompressed-pubkey script.
if len(script) == 67 && script[0] == txscript.OP_DATA_65 &&
script[66] == txscript.OP_CHECKSIG && script[1] == 0x04 {
// Ensure the public key is valid.
serializedPubKey := script[1:66]
_, err := btcec.ParsePubKey(serializedPubKey, btcec.S256())
if err == nil {
return true, serializedPubKey
}
}
return false, nil
}
// compressedScriptSize returns the number of bytes the passed script would take
// when encoded with the domain specific compression algorithm described above.
func compressedScriptSize(pkScript []byte) int {
// Pay-to-pubkey-hash script.
if valid, _ := isPubKeyHash(pkScript); valid {
return 21
}
// Pay-to-script-hash script.
if valid, _ := isScriptHash(pkScript); valid {
return 21
}
// Pay-to-pubkey (compressed or uncompressed) script.
if valid, _ := isPubKey(pkScript); valid {
return 33
}
// When none of the above special cases apply, encode the script as is
// preceded by the sum of its size and the number of special cases
// encoded as a variable length quantity.
return serializeSizeVLQ(uint64(len(pkScript)+numSpecialScripts)) +
len(pkScript)
}
// decodeCompressedScriptSize treats the passed serialized bytes as a compressed
// script, possibly followed by other data, and returns the number of bytes it
// occupies taking into account the special encoding of the script size by the
// domain specific compression algorithm described above.
func decodeCompressedScriptSize(serialized []byte) int {
scriptSize, bytesRead := deserializeVLQ(serialized)
if bytesRead == 0 {
return 0
}
switch scriptSize {
case cstPayToPubKeyHash:
return 21
case cstPayToScriptHash:
return 21
case cstPayToPubKeyComp2, cstPayToPubKeyComp3, cstPayToPubKeyUncomp4,
cstPayToPubKeyUncomp5:
return 33
}
scriptSize -= numSpecialScripts
scriptSize += uint64(bytesRead)
return int(scriptSize)
}
// putCompressedScript compresses the passed script according to the domain
// specific compression algorithm described above directly into the passed
// target byte slice. The target byte slice must be at least large enough to
// handle the number of bytes returned by the compressedScriptSize function or
// it will panic.
func putCompressedScript(target, pkScript []byte) int {
// Pay-to-pubkey-hash script.
if valid, hash := isPubKeyHash(pkScript); valid {
target[0] = cstPayToPubKeyHash
copy(target[1:21], hash)
return 21
}
// Pay-to-script-hash script.
if valid, hash := isScriptHash(pkScript); valid {
target[0] = cstPayToScriptHash
copy(target[1:21], hash)
return 21
}
// Pay-to-pubkey (compressed or uncompressed) script.
if valid, serializedPubKey := isPubKey(pkScript); valid {
pubKeyFormat := serializedPubKey[0]
switch pubKeyFormat {
case 0x02, 0x03:
target[0] = pubKeyFormat
copy(target[1:33], serializedPubKey[1:33])
return 33
case 0x04:
// Encode the oddness of the serialized pubkey into the
// compressed script type.
target[0] = pubKeyFormat | (serializedPubKey[64] & 0x01)
copy(target[1:33], serializedPubKey[1:33])
return 33
}
}
// When none of the above special cases apply, encode the unmodified
// script preceded by the sum of its size and the number of special
// cases encoded as a variable length quantity.
encodedSize := uint64(len(pkScript) + numSpecialScripts)
vlqSizeLen := putVLQ(target, encodedSize)
copy(target[vlqSizeLen:], pkScript)
return vlqSizeLen + len(pkScript)
}
// decompressScript returns the original script obtained by decompressing the
// passed compressed script according to the domain specific compression
// algorithm described above.
//
// NOTE: The script parameter must already have been proven to be long enough
// to contain the number of bytes returned by decodeCompressedScriptSize or it
// will panic. This is acceptable since it is only an internal function.
func decompressScript(compressedPkScript []byte) []byte {
// In practice this function will not be called with a zero-length or
// nil script since the nil script encoding includes the length, however
// the code below assumes the length exists, so just return nil now if
// the function ever ends up being called with a nil script in the
// future.
if len(compressedPkScript) == 0 {
return nil
}
// Decode the script size and examine it for the special cases.
encodedScriptSize, bytesRead := deserializeVLQ(compressedPkScript)
switch encodedScriptSize {
// Pay-to-pubkey-hash script. The resulting script is:
// <OP_DUP><OP_HASH160><20 byte hash><OP_EQUALVERIFY><OP_CHECKSIG>
case cstPayToPubKeyHash:
pkScript := make([]byte, 25)
pkScript[0] = txscript.OP_DUP
pkScript[1] = txscript.OP_HASH160
pkScript[2] = txscript.OP_DATA_20
copy(pkScript[3:], compressedPkScript[bytesRead:bytesRead+20])
pkScript[23] = txscript.OP_EQUALVERIFY
pkScript[24] = txscript.OP_CHECKSIG
return pkScript
// Pay-to-script-hash script. The resulting script is:
// <OP_HASH160><20 byte script hash><OP_EQUAL>
case cstPayToScriptHash:
pkScript := make([]byte, 23)
pkScript[0] = txscript.OP_HASH160
pkScript[1] = txscript.OP_DATA_20
copy(pkScript[2:], compressedPkScript[bytesRead:bytesRead+20])
pkScript[22] = txscript.OP_EQUAL
return pkScript
// Pay-to-compressed-pubkey script. The resulting script is:
// <OP_DATA_33><33 byte compressed pubkey><OP_CHECKSIG>
case cstPayToPubKeyComp2, cstPayToPubKeyComp3:
pkScript := make([]byte, 35)
pkScript[0] = txscript.OP_DATA_33
pkScript[1] = byte(encodedScriptSize)
copy(pkScript[2:], compressedPkScript[bytesRead:bytesRead+32])
pkScript[34] = txscript.OP_CHECKSIG
return pkScript
// Pay-to-uncompressed-pubkey script. The resulting script is:
// <OP_DATA_65><65 byte uncompressed pubkey><OP_CHECKSIG>
case cstPayToPubKeyUncomp4, cstPayToPubKeyUncomp5:
// Change the leading byte to the appropriate compressed pubkey
// identifier (0x02 or 0x03) so it can be decoded as a
// compressed pubkey. This really should never fail since the
// encoding ensures it is valid before compressing to this type.
compressedKey := make([]byte, 33)
compressedKey[0] = byte(encodedScriptSize - 2)
copy(compressedKey[1:], compressedPkScript[1:])
key, err := btcec.ParsePubKey(compressedKey, btcec.S256())
if err != nil {
return nil
}
pkScript := make([]byte, 67)
pkScript[0] = txscript.OP_DATA_65
copy(pkScript[1:], key.SerializeUncompressed())
pkScript[66] = txscript.OP_CHECKSIG
return pkScript
}
// When none of the special cases apply, the script was encoded using
// the general format, so reduce the script size by the number of
// special cases and return the unmodified script.
scriptSize := int(encodedScriptSize - numSpecialScripts)
pkScript := make([]byte, scriptSize)
copy(pkScript, compressedPkScript[bytesRead:bytesRead+scriptSize])
return pkScript
}
// -----------------------------------------------------------------------------
// In order to reduce the size of stored amounts, a domain specific compression
// algorithm is used which relies on there typically being a lot of zeroes at
// end of the amounts. The compression algorithm used here was obtained from
// Bitcoin Core, so all credits for the algorithm go to it.
//
// While this is simply exchanging one uint64 for another, the resulting value
// for typical amounts has a much smaller magnitude which results in fewer bytes
// when encoded as variable length quantity. For example, consider the amount
// of 0.1 BTC which is 10000000 satoshi. Encoding 10000000 as a VLQ would take
// 4 bytes while encoding the compressed value of 8 as a VLQ only takes 1 byte.
//
// Essentially the compression is achieved by splitting the value into an
// exponent in the range [0-9] and a digit in the range [1-9], when possible,
// and encoding them in a way that can be decoded. More specifically, the
// encoding is as follows:
// - 0 is 0
// - Find the exponent, e, as the largest power of 10 that evenly divides the
// value up to a maximum of 9
// - When e < 9, the final digit can't be 0 so store it as d and remove it by
// dividing the value by 10 (call the result n). The encoded value is thus:
// 1 + 10*(9*n + d-1) + e
// - When e==9, the only thing known is the amount is not 0. The encoded value
// is thus:
// 1 + 10*(n-1) + e == 10 + 10*(n-1)
//
// Example encodings:
// (The numbers in parenthesis are the number of bytes when serialized as a VLQ)
// 0 (1) -> 0 (1) * 0.00000000 BTC
// 1000 (2) -> 4 (1) * 0.00001000 BTC
// 10000 (2) -> 5 (1) * 0.00010000 BTC
// 12345678 (4) -> 111111101(4) * 0.12345678 BTC
// 50000000 (4) -> 47 (1) * 0.50000000 BTC
// 100000000 (4) -> 9 (1) * 1.00000000 BTC
// 500000000 (5) -> 49 (1) * 5.00000000 BTC
// 1000000000 (5) -> 10 (1) * 10.00000000 BTC
// -----------------------------------------------------------------------------
// compressTxOutAmount compresses the passed amount according to the domain
// specific compression algorithm described above.
func compressTxOutAmount(amount uint64) uint64 {
// No need to do any work if it's zero.
if amount == 0 {
return 0
}
// Find the largest power of 10 (max of 9) that evenly divides the
// value.
exponent := uint64(0)
for amount%10 == 0 && exponent < 9 {
amount /= 10
exponent++
}
// The compressed result for exponents less than 9 is:
// 1 + 10*(9*n + d-1) + e
if exponent < 9 {
lastDigit := amount % 10
amount /= 10
return 1 + 10*(9*amount+lastDigit-1) + exponent
}
// The compressed result for an exponent of 9 is:
// 1 + 10*(n-1) + e == 10 + 10*(n-1)
return 10 + 10*(amount-1)
}
// decompressTxOutAmount returns the original amount the passed compressed
// amount represents according to the domain specific compression algorithm
// described above.
func decompressTxOutAmount(amount uint64) uint64 {
// No need to do any work if it's zero.
if amount == 0 {
return 0
}
// The decompressed amount is either of the following two equations:
// x = 1 + 10*(9*n + d - 1) + e
// x = 1 + 10*(n - 1) + 9
amount--
// The decompressed amount is now one of the following two equations:
// x = 10*(9*n + d - 1) + e
// x = 10*(n - 1) + 9
exponent := amount % 10
amount /= 10
// The decompressed amount is now one of the following two equations:
// x = 9*n + d - 1 | where e < 9
// x = n - 1 | where e = 9
n := uint64(0)
if exponent < 9 {
lastDigit := amount%9 + 1
amount /= 9
n = amount*10 + lastDigit
} else {
n = amount + 1
}
// Apply the exponent.
for ; exponent > 0; exponent-- {
n *= 10
}
return n
}
// -----------------------------------------------------------------------------
// Compressed transaction outputs consist of an amount and a public key script
// both compressed using the domain specific compression algorithms previously
// described.
//
// The serialized format is:
//
// <compressed amount><compressed script>
//
// Field Type Size
// compressed amount VLQ variable
// compressed script []byte variable
// -----------------------------------------------------------------------------
// compressedTxOutSize returns the number of bytes the passed transaction output
// fields would take when encoded with the format described above.
func compressedTxOutSize(amount uint64, pkScript []byte) int {
return serializeSizeVLQ(compressTxOutAmount(amount)) +
compressedScriptSize(pkScript)
}
// putCompressedTxOut compresses the passed amount and script according to their
// domain specific compression algorithms and encodes them directly into the
// passed target byte slice with the format described above. The target byte
// slice must be at least large enough to handle the number of bytes returned by
// the compressedTxOutSize function or it will panic.
func putCompressedTxOut(target []byte, amount uint64, pkScript []byte) int {
offset := putVLQ(target, compressTxOutAmount(amount))
offset += putCompressedScript(target[offset:], pkScript)
return offset
}
// decodeCompressedTxOut decodes the passed compressed txout, possibly followed
// by other data, into its uncompressed amount and script and returns them along
// with the number of bytes they occupied prior to decompression.
func decodeCompressedTxOut(serialized []byte) (uint64, []byte, int, error) {
// Deserialize the compressed amount and ensure there are bytes
// remaining for the compressed script.
compressedAmount, bytesRead := deserializeVLQ(serialized)
if bytesRead >= len(serialized) {
return 0, nil, bytesRead, errDeserialize("unexpected end of " +
"data after compressed amount")
}
// Decode the compressed script size and ensure there are enough bytes
// left in the slice for it.
scriptSize := decodeCompressedScriptSize(serialized[bytesRead:])
if len(serialized[bytesRead:]) < scriptSize {
return 0, nil, bytesRead, errDeserialize("unexpected end of " +
"data after script size")
}
// Decompress and return the amount and script.
amount := decompressTxOutAmount(compressedAmount)
script := decompressScript(serialized[bytesRead : bytesRead+scriptSize])
return amount, script, bytesRead + scriptSize, nil
}