From cc23b46da146bdc6a8ffd789fae7b81330ee17e4 Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Fri, 20 May 2022 12:09:09 -0700 Subject: [PATCH] Support using ZDICT_finalizeDictionary to generate zstd dictionary (#9857) Summary: An untrained dictionary is currently simply the concatenation of several samples. The ZSTD API, ZDICT_finalizeDictionary(), can improve such a dictionary's effectiveness at low cost. This PR changes how dictionary is created by calling the ZSTD ZDICT_finalizeDictionary() API instead of creating raw content dictionary (when max_dict_buffer_bytes > 0), and pass in all buffered uncompressed data blocks as samples. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9857 Test Plan: #### db_bench test for cpu/memory of compression+decompression and space saving on synthetic data: Set up: change the parameter [here](https://github.com/facebook/rocksdb/blob/fb9a167a55e0970b1ef6f67c1600c8d9c4c6114f/tools/db_bench_tool.cc#L1766) to 16384 to make synthetic data more compressible. ``` # linked local ZSTD with version 1.5.2 # DEBUG_LEVEL=0 ROCKSDB_NO_FBCODE=1 ROCKSDB_DISABLE_ZSTD=1 EXTRA_CXXFLAGS="-DZSTD_STATIC_LINKING_ONLY -DZSTD -I/data/users/changyubi/install/include/" EXTRA_LDFLAGS="-L/data/users/changyubi/install/lib/ -l:libzstd.a" make -j32 db_bench dict_bytes=16384 train_bytes=1048576 echo "========== No Dictionary ==========" TEST_TMPDIR=/dev/shm ./db_bench -benchmarks=filluniquerandom,compact -num=10000000 -compression_type=zstd -compression_max_dict_bytes=0 -block_size=4096 -max_background_jobs=24 -memtablerep=vector -allow_concurrent_memtable_write=false -disable_wal=true -max_write_buffer_number=8 >/dev/null 2>&1 TEST_TMPDIR=/dev/shm /usr/bin/time ./db_bench -use_existing_db=true -benchmarks=compact -compression_type=zstd -compression_max_dict_bytes=0 -block_size=4096 2>&1 | grep elapsed du -hc /dev/shm/dbbench/*sst | grep total echo "========== Raw Content Dictionary ==========" TEST_TMPDIR=/dev/shm ./db_bench_main -benchmarks=filluniquerandom,compact -num=10000000 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -block_size=4096 -max_background_jobs=24 -memtablerep=vector -allow_concurrent_memtable_write=false -disable_wal=true -max_write_buffer_number=8 >/dev/null 2>&1 TEST_TMPDIR=/dev/shm /usr/bin/time ./db_bench_main -use_existing_db=true -benchmarks=compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -block_size=4096 2>&1 | grep elapsed du -hc /dev/shm/dbbench/*sst | grep total echo "========== FinalizeDictionary ==========" TEST_TMPDIR=/dev/shm ./db_bench -benchmarks=filluniquerandom,compact -num=10000000 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -compression_use_zstd_dict_trainer=false -block_size=4096 -max_background_jobs=24 -memtablerep=vector -allow_concurrent_memtable_write=false -disable_wal=true -max_write_buffer_number=8 >/dev/null 2>&1 TEST_TMPDIR=/dev/shm /usr/bin/time ./db_bench -use_existing_db=true -benchmarks=compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -compression_use_zstd_dict_trainer=false -block_size=4096 2>&1 | grep elapsed du -hc /dev/shm/dbbench/*sst | grep total echo "========== TrainDictionary ==========" TEST_TMPDIR=/dev/shm ./db_bench -benchmarks=filluniquerandom,compact -num=10000000 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -block_size=4096 -max_background_jobs=24 -memtablerep=vector -allow_concurrent_memtable_write=false -disable_wal=true -max_write_buffer_number=8 >/dev/null 2>&1 TEST_TMPDIR=/dev/shm /usr/bin/time ./db_bench -use_existing_db=true -benchmarks=compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -block_size=4096 2>&1 | grep elapsed du -hc /dev/shm/dbbench/*sst | grep total # Result: TrainDictionary is much better on space saving, but FinalizeDictionary seems to use less memory. # before compression data size: 1.2GB dict_bytes=16384 max_dict_buffer_bytes = 1048576 space cpu/memory No Dictionary 468M 14.93user 1.00system 0:15.92elapsed 100%CPU (0avgtext+0avgdata 23904maxresident)k Raw Dictionary 251M 15.81user 0.80system 0:16.56elapsed 100%CPU (0avgtext+0avgdata 156808maxresident)k FinalizeDictionary 236M 11.93user 0.64system 0:12.56elapsed 100%CPU (0avgtext+0avgdata 89548maxresident)k TrainDictionary 84M 7.29user 0.45system 0:07.75elapsed 100%CPU (0avgtext+0avgdata 97288maxresident)k ``` #### Benchmark on 10 sample SST files for spacing saving and CPU time on compression: FinalizeDictionary is comparable to TrainDictionary in terms of space saving, and takes less time in compression. ``` dict_bytes=16384 train_bytes=1048576 for sst_file in `ls ../temp/myrock-sst/` do echo "********** $sst_file **********" echo "========== No Dictionary ==========" ./sst_dump --file="../temp/myrock-sst/$sst_file" --command=recompress --compression_level_from=6 --compression_level_to=6 --compression_types=kZSTD echo "========== Raw Content Dictionary ==========" ./sst_dump --file="../temp/myrock-sst/$sst_file" --command=recompress --compression_level_from=6 --compression_level_to=6 --compression_types=kZSTD --compression_max_dict_bytes=$dict_bytes echo "========== FinalizeDictionary ==========" ./sst_dump --file="../temp/myrock-sst/$sst_file" --command=recompress --compression_level_from=6 --compression_level_to=6 --compression_types=kZSTD --compression_max_dict_bytes=$dict_bytes --compression_zstd_max_train_bytes=$train_bytes --compression_use_zstd_finalize_dict echo "========== TrainDictionary ==========" ./sst_dump --file="../temp/myrock-sst/$sst_file" --command=recompress --compression_level_from=6 --compression_level_to=6 --compression_types=kZSTD --compression_max_dict_bytes=$dict_bytes --compression_zstd_max_train_bytes=$train_bytes done 010240.sst (Size/Time) 011029.sst 013184.sst 021552.sst 185054.sst 185137.sst 191666.sst 7560381.sst 7604174.sst 7635312.sst No Dictionary 28165569 / 2614419 32899411 / 2976832 32977848 / 3055542 31966329 / 2004590 33614351 / 1755877 33429029 / 1717042 33611933 / 1776936 33634045 / 2771417 33789721 / 2205414 33592194 / 388254 Raw Content Dictionary 28019950 / 2697961 33748665 / 3572422 33896373 / 3534701 26418431 / 2259658 28560825 / 1839168 28455030 / 1846039 28494319 / 1861349 32391599 / 3095649 33772142 / 2407843 33592230 / 474523 FinalizeDictionary 27896012 / 2650029 33763886 / 3719427 33904283 / 3552793 26008225 / 2198033 28111872 / 1869530 28014374 / 1789771 28047706 / 1848300 32296254 / 3204027 33698698 / 2381468 33592344 / 517433 TrainDictionary 28046089 / 2740037 33706480 / 3679019 33885741 / 3629351 25087123 / 2204558 27194353 / 1970207 27234229 / 1896811 27166710 / 1903119 32011041 / 3322315 32730692 / 2406146 33608631 / 570593 ``` #### Decompression/Read test: With FinalizeDictionary/TrainDictionary, some data structure used for decompression are in stored in dictionary, so they are expected to be faster in terms of decompression/reads. ``` dict_bytes=16384 train_bytes=1048576 echo "No Dictionary" TEST_TMPDIR=/dev/shm/ ./db_bench -benchmarks=filluniquerandom,compact -compression_type=zstd -compression_max_dict_bytes=0 > /dev/null 2>&1 TEST_TMPDIR=/dev/shm/ ./db_bench -use_existing_db=true -benchmarks=readrandom -cache_size=0 -compression_type=zstd -compression_max_dict_bytes=0 2>&1 | grep MB/s echo "Raw Dictionary" TEST_TMPDIR=/dev/shm/ ./db_bench -benchmarks=filluniquerandom,compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes > /dev/null 2>&1 TEST_TMPDIR=/dev/shm/ ./db_bench -use_existing_db=true -benchmarks=readrandom -cache_size=0 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes 2>&1 | grep MB/s echo "FinalizeDict" TEST_TMPDIR=/dev/shm/ ./db_bench -benchmarks=filluniquerandom,compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -compression_use_zstd_dict_trainer=false > /dev/null 2>&1 TEST_TMPDIR=/dev/shm/ ./db_bench -use_existing_db=true -benchmarks=readrandom -cache_size=0 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -compression_use_zstd_dict_trainer=false 2>&1 | grep MB/s echo "Train Dictionary" TEST_TMPDIR=/dev/shm/ ./db_bench -benchmarks=filluniquerandom,compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes > /dev/null 2>&1 TEST_TMPDIR=/dev/shm/ ./db_bench -use_existing_db=true -benchmarks=readrandom -cache_size=0 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes 2>&1 | grep MB/s No Dictionary readrandom : 12.183 micros/op 82082 ops/sec 12.183 seconds 1000000 operations; 9.1 MB/s (1000000 of 1000000 found) Raw Dictionary readrandom : 12.314 micros/op 81205 ops/sec 12.314 seconds 1000000 operations; 9.0 MB/s (1000000 of 1000000 found) FinalizeDict readrandom : 9.787 micros/op 102180 ops/sec 9.787 seconds 1000000 operations; 11.3 MB/s (1000000 of 1000000 found) Train Dictionary readrandom : 9.698 micros/op 103108 ops/sec 9.699 seconds 1000000 operations; 11.4 MB/s (1000000 of 1000000 found) ``` Reviewed By: ajkr Differential Revision: D35720026 Pulled By: cbi42 fbshipit-source-id: 24d230fdff0fd28a1bb650658798f00dfcfb2a1f --- HISTORY.md | 1 + db/c.cc | 24 +++++++++ db/c_test.c | 3 ++ db/column_family.cc | 10 +++- db/db_test2.cc | 20 ++++++++ db_stress_tool/db_stress_common.h | 1 + db_stress_tool/db_stress_gflags.cc | 7 +++ db_stress_tool/db_stress_test_base.cc | 11 +++++ include/rocksdb/advanced_options.h | 28 +++++++++-- include/rocksdb/c.h | 12 +++++ java/rocksjni/compression_options.cc | 24 +++++++++ options/cf_options.cc | 13 +++++ options/options.cc | 7 +++ options/options_settable_test.cc | 4 +- options/options_test.cc | 49 +++++++++++++++---- .../block_based/block_based_table_builder.cc | 13 +++-- table/sst_file_dumper.cc | 3 +- table/sst_file_dumper.h | 2 +- tools/db_bench_tool.cc | 7 +++ tools/db_crashtest.py | 1 + tools/sst_dump_tool.cc | 10 +++- util/compression.h | 49 +++++++++++++++++++ 22 files changed, 274 insertions(+), 25 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index fa0e9d016cf..fd25c9659c5 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -26,6 +26,7 @@ * Changed `GetUniqueIdFromTableProperties` to return a 128-bit unique identifier, which will be the standard size now. The old functionality (192-bit) is available from `GetExtendedUniqueIdFromTableProperties`. Both functions are no longer "experimental" and are ready for production use. * In IOOptions, mark `prio` as deprecated for future removal. * In `file_system.h`, mark `IOPriority` as deprecated for future removal. +* Add an option, `CompressionOptions::use_zstd_dict_trainer`, to indicate whether zstd dictionary trainer should be used for generating zstd compression dictionaries. The default value of this option is true for backward compatibility. When this option is set to false, zstd API `ZDICT_finalizeDictionary` is used to generate compression dictionaries. ### Bug Fixes * RocksDB calls FileSystem::Poll API during FilePrefetchBuffer destruction which impacts performance as it waits for read requets completion which is not needed anymore. Calling FileSystem::AbortIO to abort those requests instead fixes that performance issue. diff --git a/db/c.cc b/db/c.cc index a10a0c843f6..9d1a89ec4ac 100644 --- a/db/c.cc +++ b/db/c.cc @@ -2855,6 +2855,20 @@ void rocksdb_options_set_bottommost_compression_options_zstd_max_train_bytes( opt->rep.bottommost_compression_opts.enabled = enabled; } +void rocksdb_options_set_bottommost_compression_options_use_zstd_dict_trainer( + rocksdb_options_t* opt, unsigned char use_zstd_dict_trainer, + unsigned char enabled) { + opt->rep.bottommost_compression_opts.use_zstd_dict_trainer = + use_zstd_dict_trainer; + opt->rep.bottommost_compression_opts.enabled = enabled; +} + +unsigned char +rocksdb_options_get_bottommost_compression_options_use_zstd_dict_trainer( + rocksdb_options_t* opt) { + return opt->rep.bottommost_compression_opts.use_zstd_dict_trainer; +} + void rocksdb_options_set_bottommost_compression_options_max_dict_buffer_bytes( rocksdb_options_t* opt, uint64_t max_dict_buffer_bytes, unsigned char enabled) { @@ -2882,6 +2896,16 @@ int rocksdb_options_get_compression_options_zstd_max_train_bytes( return opt->rep.compression_opts.zstd_max_train_bytes; } +void rocksdb_options_set_compression_options_use_zstd_dict_trainer( + rocksdb_options_t* opt, unsigned char use_zstd_dict_trainer) { + opt->rep.compression_opts.use_zstd_dict_trainer = use_zstd_dict_trainer; +} + +unsigned char rocksdb_options_get_compression_options_use_zstd_dict_trainer( + rocksdb_options_t* opt) { + return opt->rep.compression_opts.use_zstd_dict_trainer; +} + void rocksdb_options_set_compression_options_parallel_threads( rocksdb_options_t* opt, int value) { opt->rep.compression_opts.parallel_threads = value; diff --git a/db/c_test.c b/db/c_test.c index 53c312a98ca..975708068ca 100644 --- a/db/c_test.c +++ b/db/c_test.c @@ -2539,6 +2539,9 @@ int main(int argc, char** argv) { 200 == rocksdb_options_get_compression_options_max_dict_buffer_bytes(co)); + rocksdb_options_set_compression_options_use_zstd_dict_trainer(co, 0); + CheckCondition( + 0 == rocksdb_options_get_compression_options_use_zstd_dict_trainer(co)); rocksdb_options_destroy(co); } diff --git a/db/column_family.cc b/db/column_family.cc index 3eb4aab8e27..469f0e771d7 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -136,9 +136,15 @@ Status CheckCompressionSupported(const ColumnFamilyOptions& cf_options) { } } if (cf_options.compression_opts.zstd_max_train_bytes > 0) { - if (!ZSTD_TrainDictionarySupported()) { + if (cf_options.compression_opts.use_zstd_dict_trainer) { + if (!ZSTD_TrainDictionarySupported()) { + return Status::InvalidArgument( + "zstd dictionary trainer cannot be used because ZSTD 1.1.3+ " + "is not linked with the binary."); + } + } else if (!ZSTD_FinalizeDictionarySupported()) { return Status::InvalidArgument( - "zstd dictionary trainer cannot be used because ZSTD 1.1.3+ " + "zstd finalizeDictionary cannot be used because ZSTD 1.4.5+ " "is not linked with the binary."); } if (cf_options.compression_opts.max_dict_bytes == 0) { diff --git a/db/db_test2.cc b/db/db_test2.cc index cb715f0d32e..7de28b0d31b 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -1296,6 +1296,7 @@ TEST_F(DBTest2, PresetCompressionDict) { enum DictionaryTypes : int { kWithoutDict, kWithDict, + kWithZSTDfinalizeDict, kWithZSTDTrainedDict, kDictEnd, }; @@ -1304,6 +1305,7 @@ TEST_F(DBTest2, PresetCompressionDict) { options.compression = compression_type; size_t bytes_without_dict = 0; size_t bytes_with_dict = 0; + size_t bytes_with_zstd_finalize_dict = 0; size_t bytes_with_zstd_trained_dict = 0; for (int i = kWithoutDict; i < kDictEnd; i++) { // First iteration: compress without preset dictionary @@ -1323,12 +1325,21 @@ TEST_F(DBTest2, PresetCompressionDict) { options.compression_opts.max_dict_bytes = kBlockSizeBytes; options.compression_opts.zstd_max_train_bytes = 0; break; + case kWithZSTDfinalizeDict: + if (compression_type != kZSTD) { + continue; + } + options.compression_opts.max_dict_bytes = kBlockSizeBytes; + options.compression_opts.zstd_max_train_bytes = kL0FileBytes; + options.compression_opts.use_zstd_dict_trainer = false; + break; case kWithZSTDTrainedDict: if (compression_type != kZSTD) { continue; } options.compression_opts.max_dict_bytes = kBlockSizeBytes; options.compression_opts.zstd_max_train_bytes = kL0FileBytes; + options.compression_opts.use_zstd_dict_trainer = true; break; default: assert(false); @@ -1365,6 +1376,8 @@ TEST_F(DBTest2, PresetCompressionDict) { bytes_without_dict = total_sst_bytes; } else if (i == kWithDict) { bytes_with_dict = total_sst_bytes; + } else if (i == kWithZSTDfinalizeDict) { + bytes_with_zstd_finalize_dict = total_sst_bytes; } else if (i == kWithZSTDTrainedDict) { bytes_with_zstd_trained_dict = total_sst_bytes; } @@ -1375,6 +1388,13 @@ TEST_F(DBTest2, PresetCompressionDict) { } if (i == kWithDict) { ASSERT_GT(bytes_without_dict, bytes_with_dict); + } else if (i == kWithZSTDTrainedDict) { + // In zstd compression, it is sometimes possible that using a finalized + // dictionary does not get as good a compression ratio as raw content + // dictionary. But using a dictionary should always get better + // compression ratio than not using one. + ASSERT_TRUE(bytes_with_dict > bytes_with_zstd_finalize_dict || + bytes_without_dict > bytes_with_zstd_finalize_dict); } else if (i == kWithZSTDTrainedDict) { // In zstd compression, it is sometimes possible that using a trained // dictionary does not get as good a compression ratio as without diff --git a/db_stress_tool/db_stress_common.h b/db_stress_tool/db_stress_common.h index b7f11c6f4d8..c1d75d1376a 100644 --- a/db_stress_tool/db_stress_common.h +++ b/db_stress_tool/db_stress_common.h @@ -221,6 +221,7 @@ DECLARE_int32(compression_max_dict_bytes); DECLARE_int32(compression_zstd_max_train_bytes); DECLARE_int32(compression_parallel_threads); DECLARE_uint64(compression_max_dict_buffer_bytes); +DECLARE_bool(compression_use_zstd_dict_trainer); DECLARE_string(checksum_type); DECLARE_string(env_uri); DECLARE_string(fs_uri); diff --git a/db_stress_tool/db_stress_gflags.cc b/db_stress_tool/db_stress_gflags.cc index 6f44151da7c..eb1a5b0c45a 100644 --- a/db_stress_tool/db_stress_gflags.cc +++ b/db_stress_tool/db_stress_gflags.cc @@ -752,6 +752,13 @@ DEFINE_uint64(compression_max_dict_buffer_bytes, 0, "Buffering limit for SST file data to sample for dictionary " "compression."); +DEFINE_bool( + compression_use_zstd_dict_trainer, true, + "Use zstd's trainer to generate dictionary. If the options is false, " + "zstd's finalizeDictionary() API is used to generate dictionary. " + "ZSTD 1.4.5+ is required. If ZSTD 1.4.5+ is not linked with the binary, " + "this flag will have the default value true."); + DEFINE_string(bottommost_compression_type, "disable", "Algorithm to use to compress bottommost level of the database. " "\"disable\" means disabling the feature"); diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index b8315b5115d..a342f294aae 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -8,6 +8,7 @@ // found in the LICENSE file. See the AUTHORS file for names of contributors. // +#include "util/compression.h" #ifdef GFLAGS #include "db_stress_tool/db_stress_common.h" #include "db_stress_tool/db_stress_compaction_filter.h" @@ -2915,6 +2916,16 @@ void InitializeOptionsFromFlags( FLAGS_compression_parallel_threads; options.compression_opts.max_dict_buffer_bytes = FLAGS_compression_max_dict_buffer_bytes; + if (ZSTD_FinalizeDictionarySupported()) { + options.compression_opts.use_zstd_dict_trainer = + FLAGS_compression_use_zstd_dict_trainer; + } else if (!FLAGS_compression_use_zstd_dict_trainer) { + fprintf( + stderr, + "WARNING: use_zstd_dict_trainer is false but zstd finalizeDictionary " + "cannot be used because ZSTD 1.4.5+ is not linked with the binary." + " zstd dictionary trainer will be used.\n"); + } options.max_manifest_file_size = FLAGS_max_manifest_file_size; options.inplace_update_support = FLAGS_in_place_update; options.max_subcompactions = static_cast(FLAGS_subcompactions); diff --git a/include/rocksdb/advanced_options.h b/include/rocksdb/advanced_options.h index 7e0fe29b108..313f289922e 100644 --- a/include/rocksdb/advanced_options.h +++ b/include/rocksdb/advanced_options.h @@ -100,8 +100,9 @@ struct CompressionOptions { // // The dictionary is created by sampling the SST file data. If // `zstd_max_train_bytes` is nonzero, the samples are passed through zstd's - // dictionary generator. Otherwise, the random samples are used directly as - // the dictionary. + // dictionary generator (see comments for option `use_zstd_dict_trainer` for + // detail on dictionary generator). If `zstd_max_train_bytes` is zero, the + // random samples are used directly as the dictionary. // // When compression dictionary is disabled, we compress and write each block // before buffering data for the next one. When compression dictionary is @@ -173,6 +174,20 @@ struct CompressionOptions { // Default: 0 (unlimited) uint64_t max_dict_buffer_bytes; + // Use zstd trainer to generate dictionaries. When this option is set to true, + // zstd_max_train_bytes of training data sampled from max_dict_buffer_bytes + // buffered data will be passed to zstd dictionary trainer to generate a + // dictionary of size max_dict_bytes. + // + // When this option is false, zstd's API ZDICT_finalizeDictionary() will be + // called to generate dictionaries. zstd_max_train_bytes of training sampled + // data will be passed to this API. Using this API should save CPU time on + // dictionary training, but the compression ratio may not be as good as using + // a dictionary trainer. + // + // Default: true + bool use_zstd_dict_trainer; + CompressionOptions() : window_bits(-14), level(kDefaultCompressionLevel), @@ -181,11 +196,13 @@ struct CompressionOptions { zstd_max_train_bytes(0), parallel_threads(1), enabled(false), - max_dict_buffer_bytes(0) {} + max_dict_buffer_bytes(0), + use_zstd_dict_trainer(true) {} CompressionOptions(int wbits, int _lev, int _strategy, uint32_t _max_dict_bytes, uint32_t _zstd_max_train_bytes, uint32_t _parallel_threads, bool _enabled, - uint64_t _max_dict_buffer_bytes) + uint64_t _max_dict_buffer_bytes, + bool _use_zstd_dict_trainer) : window_bits(wbits), level(_lev), strategy(_strategy), @@ -193,7 +210,8 @@ struct CompressionOptions { zstd_max_train_bytes(_zstd_max_train_bytes), parallel_threads(_parallel_threads), enabled(_enabled), - max_dict_buffer_bytes(_max_dict_buffer_bytes) {} + max_dict_buffer_bytes(_max_dict_buffer_bytes), + use_zstd_dict_trainer(_use_zstd_dict_trainer) {} }; // Temperature of a file. Used to pass to FileSystem for a different diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index 05a30027787..86e5dfea81b 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -1041,6 +1041,12 @@ extern ROCKSDB_LIBRARY_API int rocksdb_options_get_compression_options_zstd_max_train_bytes( rocksdb_options_t* opt); extern ROCKSDB_LIBRARY_API void +rocksdb_options_set_compression_options_use_zstd_dict_trainer( + rocksdb_options_t*, unsigned char); +extern ROCKSDB_LIBRARY_API unsigned char +rocksdb_options_get_compression_options_use_zstd_dict_trainer( + rocksdb_options_t* opt); +extern ROCKSDB_LIBRARY_API void rocksdb_options_set_compression_options_parallel_threads(rocksdb_options_t*, int); extern ROCKSDB_LIBRARY_API int @@ -1059,6 +1065,12 @@ extern ROCKSDB_LIBRARY_API void rocksdb_options_set_bottommost_compression_options_zstd_max_train_bytes( rocksdb_options_t*, int, unsigned char); extern ROCKSDB_LIBRARY_API void +rocksdb_options_set_bottommost_compression_options_use_zstd_dict_trainer( + rocksdb_options_t*, unsigned char, unsigned char); +extern ROCKSDB_LIBRARY_API unsigned char +rocksdb_options_get_bottommost_compression_options_use_zstd_dict_trainer( + rocksdb_options_t* opt); +extern ROCKSDB_LIBRARY_API void rocksdb_options_set_bottommost_compression_options_max_dict_buffer_bytes( rocksdb_options_t*, uint64_t, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_prefix_extractor( diff --git a/java/rocksjni/compression_options.cc b/java/rocksjni/compression_options.cc index 7f6d1441a3d..78ea2a5592f 100644 --- a/java/rocksjni/compression_options.cc +++ b/java/rocksjni/compression_options.cc @@ -154,6 +154,30 @@ jlong Java_org_rocksdb_CompressionOptions_maxDictBufferBytes(JNIEnv*, jobject, auto* opt = reinterpret_cast(jhandle); return static_cast(opt->max_dict_buffer_bytes); } + +/* + * Class: org_rocksdb_CompressionOptions + * Method: setZstdMaxTrainBytes + * Signature: (JZ)V + */ +void Java_org_rocksdb_CompressionOptions_setUseZstdDictTrainer( + JNIEnv*, jobject, jlong jhandle, jboolean juse_zstd_dict_trainer) { + auto* opt = reinterpret_cast(jhandle); + opt->use_zstd_dict_trainer = juse_zstd_dict_trainer == JNI_TRUE; +} + +/* + * Class: org_rocksdb_CompressionOptions + * Method: zstdMaxTrainBytes + * Signature: (J)Z + */ +jboolean Java_org_rocksdb_CompressionOptions_useZstdDictTrainer(JNIEnv*, + jobject, + jlong jhandle) { + auto* opt = reinterpret_cast(jhandle); + return static_cast(opt->use_zstd_dict_trainer); +} + /* * Class: org_rocksdb_CompressionOptions * Method: setEnabled diff --git a/options/cf_options.cc b/options/cf_options.cc index d0b859251c5..96cdca44a8d 100644 --- a/options/cf_options.cc +++ b/options/cf_options.cc @@ -116,6 +116,15 @@ static Status ParseCompressionOptions(const std::string& value, compression_opts.max_dict_buffer_bytes = ParseUint64(field); } + // use_zstd_dict_trainer is optional for backwards compatibility + if (!field_stream.eof()) { + if (!std::getline(field_stream, field, kDelimiter)) { + return Status::InvalidArgument( + "unable to parse the specified CF option " + name); + } + compression_opts.use_zstd_dict_trainer = ParseBoolean("", field); + } + if (!field_stream.eof()) { return Status::InvalidArgument("unable to parse the specified CF option " + name); @@ -156,6 +165,10 @@ static std::unordered_map {offsetof(struct CompressionOptions, max_dict_buffer_bytes), OptionType::kUInt64T, OptionVerificationType::kNormal, OptionTypeFlags::kMutable}}, + {"use_zstd_dict_trainer", + {offsetof(struct CompressionOptions, use_zstd_dict_trainer), + OptionType::kBoolean, OptionVerificationType::kNormal, + OptionTypeFlags::kMutable}}, }; static std::unordered_map diff --git a/options/options.cc b/options/options.cc index 26b5a4d3353..77639f7a999 100644 --- a/options/options.cc +++ b/options/options.cc @@ -211,6 +211,10 @@ void ColumnFamilyOptions::Dump(Logger* log) const { " Options.bottommost_compression_opts.max_dict_buffer_bytes: " "%" PRIu64, bottommost_compression_opts.max_dict_buffer_bytes); + ROCKS_LOG_HEADER( + log, + " Options.bottommost_compression_opts.use_zstd_dict_trainer: %s", + bottommost_compression_opts.use_zstd_dict_trainer ? "true" : "false"); ROCKS_LOG_HEADER(log, " Options.compression_opts.window_bits: %d", compression_opts.window_bits); ROCKS_LOG_HEADER(log, " Options.compression_opts.level: %d", @@ -225,6 +229,9 @@ void ColumnFamilyOptions::Dump(Logger* log) const { " Options.compression_opts.zstd_max_train_bytes: " "%" PRIu32, compression_opts.zstd_max_train_bytes); + ROCKS_LOG_HEADER( + log, " Options.compression_opts.use_zstd_dict_trainer: %s", + compression_opts.use_zstd_dict_trainer ? "true" : "false"); ROCKS_LOG_HEADER(log, " Options.compression_opts.parallel_threads: " "%" PRIu32, diff --git a/options/options_settable_test.cc b/options/options_settable_test.cc index 0137d72c273..3fe65f2adc0 100644 --- a/options/options_settable_test.cc +++ b/options/options_settable_test.cc @@ -481,8 +481,8 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) { "max_bytes_for_level_multiplier=60;" "memtable_factory=SkipListFactory;" "compression=kNoCompression;" - "compression_opts=5:6:7:8:9:10:true:11;" - "bottommost_compression_opts=4:5:6:7:8:9:true:10;" + "compression_opts=5:6:7:8:9:10:true:11:false;" + "bottommost_compression_opts=4:5:6:7:8:9:true:10:true;" "bottommost_compression=kDisableCompressionOption;" "level0_stop_writes_trigger=33;" "num_levels=99;" diff --git a/options/options_test.cc b/options/options_test.cc index fe66a2ce3d0..10914fc7989 100644 --- a/options/options_test.cc +++ b/options/options_test.cc @@ -68,7 +68,7 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) { "kZSTDNotFinalCompression"}, {"bottommost_compression", "kLZ4Compression"}, {"bottommost_compression_opts", "5:6:7:8:10:true"}, - {"compression_opts", "4:5:6:7:8:true"}, + {"compression_opts", "4:5:6:7:8:2:true:100:false"}, {"num_levels", "8"}, {"level0_file_num_compaction_trigger", "8"}, {"level0_slowdown_writes_trigger", "9"}, @@ -191,9 +191,10 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) { ASSERT_EQ(new_cf_opt.compression_opts.strategy, 6); ASSERT_EQ(new_cf_opt.compression_opts.max_dict_bytes, 7u); ASSERT_EQ(new_cf_opt.compression_opts.zstd_max_train_bytes, 8u); - ASSERT_EQ(new_cf_opt.compression_opts.parallel_threads, - CompressionOptions().parallel_threads); + ASSERT_EQ(new_cf_opt.compression_opts.parallel_threads, 2u); ASSERT_EQ(new_cf_opt.compression_opts.enabled, true); + ASSERT_EQ(new_cf_opt.compression_opts.max_dict_buffer_bytes, 100u); + ASSERT_EQ(new_cf_opt.compression_opts.use_zstd_dict_trainer, false); ASSERT_EQ(new_cf_opt.bottommost_compression, kLZ4Compression); ASSERT_EQ(new_cf_opt.bottommost_compression_opts.window_bits, 5); ASSERT_EQ(new_cf_opt.bottommost_compression_opts.level, 6); @@ -203,6 +204,8 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) { ASSERT_EQ(new_cf_opt.bottommost_compression_opts.parallel_threads, CompressionOptions().parallel_threads); ASSERT_EQ(new_cf_opt.bottommost_compression_opts.enabled, true); + ASSERT_EQ(new_cf_opt.bottommost_compression_opts.use_zstd_dict_trainer, + CompressionOptions().use_zstd_dict_trainer); ASSERT_EQ(new_cf_opt.num_levels, 8); ASSERT_EQ(new_cf_opt.level0_file_num_compaction_trigger, 8); ASSERT_EQ(new_cf_opt.level0_slowdown_writes_trigger, 9); @@ -604,6 +607,8 @@ TEST_F(OptionsTest, CompressionOptionsFromString) { ASSERT_EQ(base_cf_opt.compression_opts.parallel_threads, dflt.parallel_threads); ASSERT_EQ(base_cf_opt.compression_opts.enabled, dflt.enabled); + ASSERT_EQ(base_cf_opt.compression_opts.use_zstd_dict_trainer, + dflt.use_zstd_dict_trainer); ASSERT_EQ(base_cf_opt.bottommost_compression_opts.window_bits, 4); ASSERT_EQ(base_cf_opt.bottommost_compression_opts.level, 5); ASSERT_EQ(base_cf_opt.bottommost_compression_opts.strategy, 6); @@ -613,10 +618,12 @@ TEST_F(OptionsTest, CompressionOptionsFromString) { ASSERT_EQ(base_cf_opt.bottommost_compression_opts.parallel_threads, dflt.parallel_threads); ASSERT_EQ(base_cf_opt.bottommost_compression_opts.enabled, dflt.enabled); + ASSERT_EQ(base_cf_opt.bottommost_compression_opts.use_zstd_dict_trainer, + dflt.use_zstd_dict_trainer); ASSERT_OK(GetColumnFamilyOptionsFromString( config_options, ColumnFamilyOptions(), - "compression_opts=4:5:6:7:8:9:true; " + "compression_opts=4:5:6:7:8:9:true:10:false; " "bottommost_compression_opts=5:6:7:8:9:false", &base_cf_opt)); ASSERT_EQ(base_cf_opt.compression_opts.window_bits, 4); @@ -626,6 +633,8 @@ TEST_F(OptionsTest, CompressionOptionsFromString) { ASSERT_EQ(base_cf_opt.compression_opts.zstd_max_train_bytes, 8u); ASSERT_EQ(base_cf_opt.compression_opts.parallel_threads, 9u); ASSERT_EQ(base_cf_opt.compression_opts.enabled, true); + ASSERT_EQ(base_cf_opt.compression_opts.max_dict_buffer_bytes, 10u); + ASSERT_EQ(base_cf_opt.compression_opts.use_zstd_dict_trainer, false); ASSERT_EQ(base_cf_opt.bottommost_compression_opts.window_bits, 5); ASSERT_EQ(base_cf_opt.bottommost_compression_opts.level, 6); ASSERT_EQ(base_cf_opt.bottommost_compression_opts.strategy, 7); @@ -634,6 +643,8 @@ TEST_F(OptionsTest, CompressionOptionsFromString) { ASSERT_EQ(base_cf_opt.bottommost_compression_opts.parallel_threads, dflt.parallel_threads); ASSERT_EQ(base_cf_opt.bottommost_compression_opts.enabled, false); + ASSERT_EQ(base_cf_opt.bottommost_compression_opts.use_zstd_dict_trainer, + dflt.use_zstd_dict_trainer); ASSERT_OK( GetStringFromColumnFamilyOptions(config_options, base_cf_opt, &opts_str)); @@ -646,6 +657,8 @@ TEST_F(OptionsTest, CompressionOptionsFromString) { ASSERT_EQ(new_cf_opt.compression_opts.zstd_max_train_bytes, 8u); ASSERT_EQ(new_cf_opt.compression_opts.parallel_threads, 9u); ASSERT_EQ(new_cf_opt.compression_opts.enabled, true); + ASSERT_EQ(base_cf_opt.compression_opts.max_dict_buffer_bytes, 10u); + ASSERT_EQ(base_cf_opt.compression_opts.use_zstd_dict_trainer, false); ASSERT_EQ(new_cf_opt.bottommost_compression_opts.window_bits, 5); ASSERT_EQ(new_cf_opt.bottommost_compression_opts.level, 6); ASSERT_EQ(new_cf_opt.bottommost_compression_opts.strategy, 7); @@ -654,15 +667,18 @@ TEST_F(OptionsTest, CompressionOptionsFromString) { ASSERT_EQ(new_cf_opt.bottommost_compression_opts.parallel_threads, dflt.parallel_threads); ASSERT_EQ(new_cf_opt.bottommost_compression_opts.enabled, false); + ASSERT_EQ(base_cf_opt.bottommost_compression_opts.use_zstd_dict_trainer, + dflt.use_zstd_dict_trainer); // Test as struct values ASSERT_OK(GetColumnFamilyOptionsFromString( config_options, ColumnFamilyOptions(), "compression_opts={window_bits=5; level=6; strategy=7; max_dict_bytes=8;" - "zstd_max_train_bytes=9;parallel_threads=10;enabled=true}; " + "zstd_max_train_bytes=9;parallel_threads=10;enabled=true;use_zstd_dict_" + "trainer=false}; " "bottommost_compression_opts={window_bits=4; level=5; strategy=6;" " max_dict_bytes=7;zstd_max_train_bytes=8;parallel_threads=9;" - "enabled=false}; ", + "enabled=false;use_zstd_dict_trainer=true}; ", &new_cf_opt)); ASSERT_EQ(new_cf_opt.compression_opts.window_bits, 5); ASSERT_EQ(new_cf_opt.compression_opts.level, 6); @@ -671,6 +687,7 @@ TEST_F(OptionsTest, CompressionOptionsFromString) { ASSERT_EQ(new_cf_opt.compression_opts.zstd_max_train_bytes, 9u); ASSERT_EQ(new_cf_opt.compression_opts.parallel_threads, 10u); ASSERT_EQ(new_cf_opt.compression_opts.enabled, true); + ASSERT_EQ(new_cf_opt.compression_opts.use_zstd_dict_trainer, false); ASSERT_EQ(new_cf_opt.bottommost_compression_opts.window_bits, 4); ASSERT_EQ(new_cf_opt.bottommost_compression_opts.level, 5); ASSERT_EQ(new_cf_opt.bottommost_compression_opts.strategy, 6); @@ -678,6 +695,7 @@ TEST_F(OptionsTest, CompressionOptionsFromString) { ASSERT_EQ(new_cf_opt.bottommost_compression_opts.zstd_max_train_bytes, 8u); ASSERT_EQ(new_cf_opt.bottommost_compression_opts.parallel_threads, 9u); ASSERT_EQ(new_cf_opt.bottommost_compression_opts.enabled, false); + ASSERT_EQ(new_cf_opt.bottommost_compression_opts.use_zstd_dict_trainer, true); ASSERT_OK(GetColumnFamilyOptionsFromString( config_options, base_cf_opt, @@ -709,6 +727,8 @@ TEST_F(OptionsTest, CompressionOptionsFromString) { base_cf_opt.bottommost_compression_opts.parallel_threads); ASSERT_EQ(new_cf_opt.bottommost_compression_opts.enabled, base_cf_opt.bottommost_compression_opts.enabled); + ASSERT_EQ(new_cf_opt.bottommost_compression_opts.use_zstd_dict_trainer, + base_cf_opt.bottommost_compression_opts.use_zstd_dict_trainer); // Test a few individual struct values ASSERT_OK(GetColumnFamilyOptionsFromString( @@ -1351,6 +1371,7 @@ TEST_F(OptionsTest, GetOptionsFromStringTest) { ASSERT_EQ(new_options.compression_opts.zstd_max_train_bytes, 0u); ASSERT_EQ(new_options.compression_opts.parallel_threads, 1u); ASSERT_EQ(new_options.compression_opts.enabled, false); + ASSERT_EQ(new_options.compression_opts.use_zstd_dict_trainer, true); ASSERT_EQ(new_options.bottommost_compression, kDisableCompressionOption); ASSERT_EQ(new_options.bottommost_compression_opts.window_bits, 5); ASSERT_EQ(new_options.bottommost_compression_opts.level, 6); @@ -1359,6 +1380,8 @@ TEST_F(OptionsTest, GetOptionsFromStringTest) { ASSERT_EQ(new_options.bottommost_compression_opts.zstd_max_train_bytes, 0u); ASSERT_EQ(new_options.bottommost_compression_opts.parallel_threads, 1u); ASSERT_EQ(new_options.bottommost_compression_opts.enabled, false); + ASSERT_EQ(new_options.bottommost_compression_opts.use_zstd_dict_trainer, + true); ASSERT_EQ(new_options.write_buffer_size, 10U); ASSERT_EQ(new_options.max_write_buffer_number, 16); const auto new_bbto = @@ -2263,7 +2286,7 @@ TEST_F(OptionsOldApiTest, GetOptionsFromMapTest) { "kZSTDNotFinalCompression"}, {"bottommost_compression", "kLZ4Compression"}, {"bottommost_compression_opts", "5:6:7:8:9:true"}, - {"compression_opts", "4:5:6:7:8:true"}, + {"compression_opts", "4:5:6:7:8:9:true:10:false"}, {"num_levels", "8"}, {"level0_file_num_compaction_trigger", "8"}, {"level0_slowdown_writes_trigger", "9"}, @@ -2381,9 +2404,10 @@ TEST_F(OptionsOldApiTest, GetOptionsFromMapTest) { ASSERT_EQ(new_cf_opt.compression_opts.strategy, 6); ASSERT_EQ(new_cf_opt.compression_opts.max_dict_bytes, 7u); ASSERT_EQ(new_cf_opt.compression_opts.zstd_max_train_bytes, 8u); - ASSERT_EQ(new_cf_opt.compression_opts.parallel_threads, - CompressionOptions().parallel_threads); + ASSERT_EQ(new_cf_opt.compression_opts.parallel_threads, 9u); ASSERT_EQ(new_cf_opt.compression_opts.enabled, true); + ASSERT_EQ(new_cf_opt.compression_opts.max_dict_buffer_bytes, 10u); + ASSERT_EQ(new_cf_opt.compression_opts.use_zstd_dict_trainer, false); ASSERT_EQ(new_cf_opt.bottommost_compression, kLZ4Compression); ASSERT_EQ(new_cf_opt.bottommost_compression_opts.window_bits, 5); ASSERT_EQ(new_cf_opt.bottommost_compression_opts.level, 6); @@ -2393,6 +2417,10 @@ TEST_F(OptionsOldApiTest, GetOptionsFromMapTest) { ASSERT_EQ(new_cf_opt.bottommost_compression_opts.parallel_threads, CompressionOptions().parallel_threads); ASSERT_EQ(new_cf_opt.bottommost_compression_opts.enabled, true); + ASSERT_EQ(new_cf_opt.bottommost_compression_opts.max_dict_buffer_bytes, + CompressionOptions().max_dict_buffer_bytes); + ASSERT_EQ(new_cf_opt.bottommost_compression_opts.use_zstd_dict_trainer, + CompressionOptions().use_zstd_dict_trainer); ASSERT_EQ(new_cf_opt.num_levels, 8); ASSERT_EQ(new_cf_opt.level0_file_num_compaction_trigger, 8); ASSERT_EQ(new_cf_opt.level0_slowdown_writes_trigger, 9); @@ -3055,6 +3083,7 @@ TEST_F(OptionsOldApiTest, GetOptionsFromStringTest) { ASSERT_EQ(new_options.compression_opts.zstd_max_train_bytes, 0u); ASSERT_EQ(new_options.compression_opts.parallel_threads, 1u); ASSERT_EQ(new_options.compression_opts.enabled, false); + ASSERT_EQ(new_options.compression_opts.use_zstd_dict_trainer, true); ASSERT_EQ(new_options.bottommost_compression, kDisableCompressionOption); ASSERT_EQ(new_options.bottommost_compression_opts.window_bits, 5); ASSERT_EQ(new_options.bottommost_compression_opts.level, 6); @@ -3063,6 +3092,8 @@ TEST_F(OptionsOldApiTest, GetOptionsFromStringTest) { ASSERT_EQ(new_options.bottommost_compression_opts.zstd_max_train_bytes, 0u); ASSERT_EQ(new_options.bottommost_compression_opts.parallel_threads, 1u); ASSERT_EQ(new_options.bottommost_compression_opts.enabled, false); + ASSERT_EQ(new_options.bottommost_compression_opts.use_zstd_dict_trainer, + true); ASSERT_EQ(new_options.write_buffer_size, 10U); ASSERT_EQ(new_options.max_write_buffer_number, 16); diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index 32144e41735..796451db564 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -1896,9 +1896,15 @@ void BlockBasedTableBuilder::EnterUnbuffered() { // OK if compression_dict_samples is empty, we'll just get empty dictionary. std::string dict; if (r->compression_opts.zstd_max_train_bytes > 0) { - dict = ZSTD_TrainDictionary(compression_dict_samples, - compression_dict_sample_lens, - r->compression_opts.max_dict_bytes); + if (r->compression_opts.use_zstd_dict_trainer) { + dict = ZSTD_TrainDictionary(compression_dict_samples, + compression_dict_sample_lens, + r->compression_opts.max_dict_bytes); + } else { + dict = ZSTD_FinalizeDictionary( + compression_dict_samples, compression_dict_sample_lens, + r->compression_opts.max_dict_bytes, r->compression_opts.level); + } } else { dict = std::move(compression_dict_samples); } @@ -1934,7 +1940,6 @@ void BlockBasedTableBuilder::EnterUnbuffered() { } auto& data_block = r->data_block_buffers[i]; - if (r->IsParallelCompressionEnabled()) { Slice first_key_in_next_block; const Slice* first_key_in_next_block_ptr = &first_key_in_next_block; diff --git a/table/sst_file_dumper.cc b/table/sst_file_dumper.cc index d02eb3bc142..2323b8504f0 100644 --- a/table/sst_file_dumper.cc +++ b/table/sst_file_dumper.cc @@ -253,7 +253,7 @@ Status SstFileDumper::ShowAllCompressionSizes( compression_types, int32_t compress_level_from, int32_t compress_level_to, uint32_t max_dict_bytes, uint32_t zstd_max_train_bytes, - uint64_t max_dict_buffer_bytes) { + uint64_t max_dict_buffer_bytes, bool use_zstd_dict_trainer) { fprintf(stdout, "Block Size: %" ROCKSDB_PRIszt "\n", block_size); for (auto& i : compression_types) { if (CompressionTypeSupported(i.first)) { @@ -262,6 +262,7 @@ Status SstFileDumper::ShowAllCompressionSizes( compress_opt.max_dict_bytes = max_dict_bytes; compress_opt.zstd_max_train_bytes = zstd_max_train_bytes; compress_opt.max_dict_buffer_bytes = max_dict_buffer_bytes; + compress_opt.use_zstd_dict_trainer = use_zstd_dict_trainer; for (int32_t j = compress_level_from; j <= compress_level_to; j++) { fprintf(stdout, "Compression level: %d", j); compress_opt.level = j; diff --git a/table/sst_file_dumper.h b/table/sst_file_dumper.h index eb3bbeb4fa4..7be8763909a 100644 --- a/table/sst_file_dumper.h +++ b/table/sst_file_dumper.h @@ -44,7 +44,7 @@ class SstFileDumper { compression_types, int32_t compress_level_from, int32_t compress_level_to, uint32_t max_dict_bytes, uint32_t zstd_max_train_bytes, - uint64_t max_dict_buffer_bytes); + uint64_t max_dict_buffer_bytes, bool use_zstd_dict_trainer); Status ShowCompressionSize(size_t block_size, CompressionType compress_type, const CompressionOptions& compress_opt); diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index ce2ac6fb765..ef4203fbf49 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -1216,6 +1216,11 @@ DEFINE_uint64(compression_max_dict_buffer_bytes, ROCKSDB_NAMESPACE::CompressionOptions().max_dict_buffer_bytes, "Maximum bytes to buffer to collect samples for dictionary."); +DEFINE_bool(compression_use_zstd_dict_trainer, + ROCKSDB_NAMESPACE::CompressionOptions().use_zstd_dict_trainer, + "If true, use ZSTD_TrainDictionary() to create dictionary, else" + "use ZSTD_FinalizeDictionary() to create dictionary"); + static bool ValidateTableCacheNumshardbits(const char* flagname, int32_t value) { if (0 >= value || value >= 20) { @@ -3976,6 +3981,8 @@ class Benchmark { FLAGS_compression_parallel_threads; options.compression_opts.max_dict_buffer_bytes = FLAGS_compression_max_dict_buffer_bytes; + options.compression_opts.use_zstd_dict_trainer = + FLAGS_compression_use_zstd_dict_trainer; options.max_open_files = FLAGS_open_files; if (FLAGS_cost_write_buffer_to_cache || FLAGS_db_write_buffer_size != 0) { diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 7565101092a..581d194f6cd 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -58,6 +58,7 @@ # lambda: random.choice([1] * 9 + [4]) "compression_parallel_threads": 1, "compression_max_dict_buffer_bytes": lambda: (1 << random.randint(0, 40)) - 1, + "compression_use_zstd_dict_trainer": lambda: random.randint(0, 1), "clear_column_family_one_in": 0, "compact_files_one_in": 1000000, "compact_range_one_in": 1000000, diff --git a/tools/sst_dump_tool.cc b/tools/sst_dump_tool.cc index 1b27cc33cf6..7053366e7e6 100644 --- a/tools/sst_dump_tool.cc +++ b/tools/sst_dump_tool.cc @@ -122,6 +122,9 @@ void print_help(bool to_stderr) { --compression_max_dict_buffer_bytes= Limit on buffer size from which we collect samples for dictionary generation. + + --compression_use_zstd_finalize_dict + Use zstd's finalizeDictionary() API instead of zstd's dictionary trainer to generate dictionary. )", supported_compressions.c_str()); } @@ -188,6 +191,8 @@ int SSTDumpTool::Run(int argc, char const* const* argv, Options options) { ROCKSDB_NAMESPACE::CompressionOptions().zstd_max_train_bytes; uint64_t compression_max_dict_buffer_bytes = ROCKSDB_NAMESPACE::CompressionOptions().max_dict_buffer_bytes; + bool compression_use_zstd_finalize_dict = + !ROCKSDB_NAMESPACE::CompressionOptions().use_zstd_dict_trainer; int64_t tmp_val; @@ -311,6 +316,8 @@ int SSTDumpTool::Run(int argc, char const* const* argv, Options options) { return 1; } compression_max_dict_buffer_bytes = static_cast(tmp_val); + } else if (strcmp(argv[i], "--compression_use_zstd_finalize_dict") == 0) { + compression_use_zstd_finalize_dict = true; } else if (strcmp(argv[i], "--help") == 0) { print_help(/*to_stderr*/ false); return 0; @@ -439,7 +446,8 @@ int SSTDumpTool::Run(int argc, char const* const* argv, Options options) { set_block_size ? block_size : 16384, compression_types.empty() ? kCompressions : compression_types, compress_level_from, compress_level_to, compression_max_dict_bytes, - compression_zstd_max_train_bytes, compression_max_dict_buffer_bytes); + compression_zstd_max_train_bytes, compression_max_dict_buffer_bytes, + !compression_use_zstd_finalize_dict); if (!st.ok()) { fprintf(stderr, "Failed to recompress: %s\n", st.ToString().c_str()); exit(1); diff --git a/util/compression.h b/util/compression.h index 6cb9e670f6b..909116780a7 100644 --- a/util/compression.h +++ b/util/compression.h @@ -653,6 +653,9 @@ inline std::string CompressionOptionsToString( result.append("max_dict_buffer_bytes=") .append(std::to_string(compression_options.max_dict_buffer_bytes)) .append("; "); + result.append("use_zstd_dict_trainer=") + .append(std::to_string(compression_options.use_zstd_dict_trainer)) + .append("; "); return result; } @@ -1482,6 +1485,52 @@ inline std::string ZSTD_TrainDictionary(const std::string& samples, #endif // ZSTD_VERSION_NUMBER >= 10103 } +inline bool ZSTD_FinalizeDictionarySupported() { +#ifdef ZSTD + // ZDICT_finalizeDictionary API is stable since v1.4.5 + return (ZSTD_versionNumber() >= 10405); +#else + return false; +#endif +} + +inline std::string ZSTD_FinalizeDictionary( + const std::string& samples, const std::vector& sample_lens, + size_t max_dict_bytes, int level) { + // ZDICT_finalizeDictionary is stable since version v1.4.5 +#if ZSTD_VERSION_NUMBER >= 10405 // v1.4.5+ + assert(samples.empty() == sample_lens.empty()); + if (samples.empty()) { + return ""; + } + if (level == CompressionOptions::kDefaultCompressionLevel) { + // 3 is the value of ZSTD_CLEVEL_DEFAULT (not exposed publicly), see + // https://github.com/facebook/zstd/issues/1148 + level = 3; + } + std::string dict_data(max_dict_bytes, '\0'); + size_t dict_len = ZDICT_finalizeDictionary( + dict_data.data(), max_dict_bytes, samples.data(), + std::min(static_cast(samples.size()), max_dict_bytes), + samples.data(), sample_lens.data(), + static_cast(sample_lens.size()), + {level, 0 /* notificationLevel */, 0 /* dictID */}); + if (ZDICT_isError(dict_len)) { + return ""; + } else { + assert(dict_len <= max_dict_bytes); + dict_data.resize(dict_len); + return dict_data; + } +#else // up to v1.4.4 + (void)samples; + (void)sample_lens; + (void)max_dict_bytes; + (void)level; + return ""; +#endif // ZSTD_VERSION_NUMBER >= 10405 +} + inline bool CompressData(const Slice& raw, const CompressionInfo& compression_info, uint32_t compress_format_version,