Skip to content

Commit

Permalink
Support using ZDICT_finalizeDictionary to generate zstd dictionary (f…
Browse files Browse the repository at this point in the history
…acebook#9857)

Summary:
An untrained dictionary is currently simply the concatenation of several samples. The ZSTD API, ZDICT_finalizeDictionary(), can improve such a dictionary's effectiveness at low cost. This PR changes how dictionary is created by calling the ZSTD ZDICT_finalizeDictionary() API instead of creating raw content dictionary (when max_dict_buffer_bytes > 0), and pass in all buffered uncompressed data blocks as samples.

Pull Request resolved: facebook#9857

Test Plan:
#### db_bench test for cpu/memory of compression+decompression and space saving on synthetic data:
Set up: change the parameter [here](https://github.com/facebook/rocksdb/blob/fb9a167a55e0970b1ef6f67c1600c8d9c4c6114f/tools/db_bench_tool.cc#L1766) to 16384 to make synthetic data more compressible.
```
# linked local ZSTD with version 1.5.2
# DEBUG_LEVEL=0 ROCKSDB_NO_FBCODE=1 ROCKSDB_DISABLE_ZSTD=1  EXTRA_CXXFLAGS="-DZSTD_STATIC_LINKING_ONLY -DZSTD -I/data/users/changyubi/install/include/" EXTRA_LDFLAGS="-L/data/users/changyubi/install/lib/ -l:libzstd.a" make -j32 db_bench

dict_bytes=16384
train_bytes=1048576
echo "========== No Dictionary =========="
TEST_TMPDIR=/dev/shm ./db_bench -benchmarks=filluniquerandom,compact -num=10000000 -compression_type=zstd -compression_max_dict_bytes=0 -block_size=4096 -max_background_jobs=24 -memtablerep=vector -allow_concurrent_memtable_write=false -disable_wal=true -max_write_buffer_number=8 >/dev/null 2>&1
TEST_TMPDIR=/dev/shm /usr/bin/time ./db_bench -use_existing_db=true -benchmarks=compact -compression_type=zstd -compression_max_dict_bytes=0 -block_size=4096 2>&1 | grep elapsed
du -hc /dev/shm/dbbench/*sst | grep total

echo "========== Raw Content Dictionary =========="
TEST_TMPDIR=/dev/shm ./db_bench_main -benchmarks=filluniquerandom,compact -num=10000000 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -block_size=4096 -max_background_jobs=24 -memtablerep=vector -allow_concurrent_memtable_write=false -disable_wal=true -max_write_buffer_number=8 >/dev/null 2>&1
TEST_TMPDIR=/dev/shm /usr/bin/time ./db_bench_main -use_existing_db=true -benchmarks=compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -block_size=4096 2>&1 | grep elapsed
du -hc /dev/shm/dbbench/*sst | grep total

echo "========== FinalizeDictionary =========="
TEST_TMPDIR=/dev/shm ./db_bench -benchmarks=filluniquerandom,compact -num=10000000 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -compression_use_zstd_dict_trainer=false -block_size=4096 -max_background_jobs=24 -memtablerep=vector -allow_concurrent_memtable_write=false -disable_wal=true -max_write_buffer_number=8 >/dev/null 2>&1
TEST_TMPDIR=/dev/shm /usr/bin/time ./db_bench -use_existing_db=true -benchmarks=compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -compression_use_zstd_dict_trainer=false -block_size=4096 2>&1 | grep elapsed
du -hc /dev/shm/dbbench/*sst | grep total

echo "========== TrainDictionary =========="
TEST_TMPDIR=/dev/shm ./db_bench -benchmarks=filluniquerandom,compact -num=10000000 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -block_size=4096 -max_background_jobs=24 -memtablerep=vector -allow_concurrent_memtable_write=false -disable_wal=true -max_write_buffer_number=8 >/dev/null 2>&1
TEST_TMPDIR=/dev/shm /usr/bin/time ./db_bench -use_existing_db=true -benchmarks=compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -block_size=4096 2>&1 | grep elapsed
du -hc /dev/shm/dbbench/*sst | grep total

# Result: TrainDictionary is much better on space saving, but FinalizeDictionary seems to use less memory.
# before compression data size: 1.2GB
dict_bytes=16384
max_dict_buffer_bytes =  1048576
                    space   cpu/memory
No Dictionary       468M    14.93user 1.00system 0:15.92elapsed 100%CPU (0avgtext+0avgdata 23904maxresident)k
Raw Dictionary      251M    15.81user 0.80system 0:16.56elapsed 100%CPU (0avgtext+0avgdata 156808maxresident)k
FinalizeDictionary  236M    11.93user 0.64system 0:12.56elapsed 100%CPU (0avgtext+0avgdata 89548maxresident)k
TrainDictionary     84M     7.29user 0.45system 0:07.75elapsed 100%CPU (0avgtext+0avgdata 97288maxresident)k
```

#### Benchmark on 10 sample SST files for spacing saving and CPU time on compression:
FinalizeDictionary is comparable to TrainDictionary in terms of space saving, and takes less time in compression.
```
dict_bytes=16384
train_bytes=1048576

for sst_file in `ls ../temp/myrock-sst/`
do
  echo "********** $sst_file **********"
  echo "========== No Dictionary =========="
  ./sst_dump --file="../temp/myrock-sst/$sst_file" --command=recompress --compression_level_from=6 --compression_level_to=6 --compression_types=kZSTD

  echo "========== Raw Content Dictionary =========="
  ./sst_dump --file="../temp/myrock-sst/$sst_file" --command=recompress --compression_level_from=6 --compression_level_to=6 --compression_types=kZSTD --compression_max_dict_bytes=$dict_bytes

  echo "========== FinalizeDictionary =========="
  ./sst_dump --file="../temp/myrock-sst/$sst_file" --command=recompress --compression_level_from=6 --compression_level_to=6 --compression_types=kZSTD --compression_max_dict_bytes=$dict_bytes --compression_zstd_max_train_bytes=$train_bytes --compression_use_zstd_finalize_dict

  echo "========== TrainDictionary =========="
  ./sst_dump --file="../temp/myrock-sst/$sst_file" --command=recompress --compression_level_from=6 --compression_level_to=6 --compression_types=kZSTD --compression_max_dict_bytes=$dict_bytes --compression_zstd_max_train_bytes=$train_bytes
done

                         010240.sst (Size/Time) 011029.sst              013184.sst              021552.sst              185054.sst              185137.sst              191666.sst              7560381.sst             7604174.sst             7635312.sst
No Dictionary           28165569 / 2614419      32899411 / 2976832      32977848 / 3055542      31966329 / 2004590      33614351 / 1755877      33429029 / 1717042      33611933 / 1776936      33634045 / 2771417      33789721 / 2205414      33592194 / 388254
Raw Content Dictionary  28019950 / 2697961      33748665 / 3572422      33896373 / 3534701      26418431 / 2259658      28560825 / 1839168      28455030 / 1846039      28494319 / 1861349      32391599 / 3095649      33772142 / 2407843      33592230 / 474523
FinalizeDictionary      27896012 / 2650029      33763886 / 3719427      33904283 / 3552793      26008225 / 2198033      28111872 / 1869530      28014374 / 1789771      28047706 / 1848300      32296254 / 3204027      33698698 / 2381468      33592344 / 517433
TrainDictionary         28046089 / 2740037      33706480 / 3679019      33885741 / 3629351      25087123 / 2204558      27194353 / 1970207      27234229 / 1896811      27166710 / 1903119      32011041 / 3322315      32730692 / 2406146      33608631 / 570593
```

#### Decompression/Read test:
With FinalizeDictionary/TrainDictionary, some data structure used for decompression are in stored in dictionary, so they are expected to be faster in terms of decompression/reads.
```
dict_bytes=16384
train_bytes=1048576
echo "No Dictionary"
TEST_TMPDIR=/dev/shm/ ./db_bench -benchmarks=filluniquerandom,compact -compression_type=zstd -compression_max_dict_bytes=0 > /dev/null 2>&1
TEST_TMPDIR=/dev/shm/ ./db_bench -use_existing_db=true -benchmarks=readrandom -cache_size=0 -compression_type=zstd -compression_max_dict_bytes=0 2>&1 | grep MB/s

echo "Raw Dictionary"
TEST_TMPDIR=/dev/shm/ ./db_bench -benchmarks=filluniquerandom,compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes > /dev/null 2>&1
TEST_TMPDIR=/dev/shm/ ./db_bench -use_existing_db=true -benchmarks=readrandom -cache_size=0 -compression_type=zstd  -compression_max_dict_bytes=$dict_bytes 2>&1 | grep MB/s

echo "FinalizeDict"
TEST_TMPDIR=/dev/shm/ ./db_bench -benchmarks=filluniquerandom,compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -compression_use_zstd_dict_trainer=false  > /dev/null 2>&1
TEST_TMPDIR=/dev/shm/ ./db_bench -use_existing_db=true -benchmarks=readrandom -cache_size=0 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -compression_use_zstd_dict_trainer=false 2>&1 | grep MB/s

echo "Train Dictionary"
TEST_TMPDIR=/dev/shm/ ./db_bench -benchmarks=filluniquerandom,compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes > /dev/null 2>&1
TEST_TMPDIR=/dev/shm/ ./db_bench -use_existing_db=true -benchmarks=readrandom -cache_size=0 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes 2>&1 | grep MB/s

No Dictionary
readrandom   :      12.183 micros/op 82082 ops/sec 12.183 seconds 1000000 operations;    9.1 MB/s (1000000 of 1000000 found)
Raw Dictionary
readrandom   :      12.314 micros/op 81205 ops/sec 12.314 seconds 1000000 operations;    9.0 MB/s (1000000 of 1000000 found)
FinalizeDict
readrandom   :       9.787 micros/op 102180 ops/sec 9.787 seconds 1000000 operations;   11.3 MB/s (1000000 of 1000000 found)
Train Dictionary
readrandom   :       9.698 micros/op 103108 ops/sec 9.699 seconds 1000000 operations;   11.4 MB/s (1000000 of 1000000 found)
```

Reviewed By: ajkr

Differential Revision: D35720026

Pulled By: cbi42

fbshipit-source-id: 24d230fdff0fd28a1bb650658798f00dfcfb2a1f
  • Loading branch information
cbi42 authored and facebook-github-bot committed May 20, 2022
1 parent 6255ac7 commit cc23b46
Show file tree
Hide file tree
Showing 22 changed files with 274 additions and 25 deletions.
1 change: 1 addition & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
* Changed `GetUniqueIdFromTableProperties` to return a 128-bit unique identifier, which will be the standard size now. The old functionality (192-bit) is available from `GetExtendedUniqueIdFromTableProperties`. Both functions are no longer "experimental" and are ready for production use.
* In IOOptions, mark `prio` as deprecated for future removal.
* In `file_system.h`, mark `IOPriority` as deprecated for future removal.
* Add an option, `CompressionOptions::use_zstd_dict_trainer`, to indicate whether zstd dictionary trainer should be used for generating zstd compression dictionaries. The default value of this option is true for backward compatibility. When this option is set to false, zstd API `ZDICT_finalizeDictionary` is used to generate compression dictionaries.

### Bug Fixes
* RocksDB calls FileSystem::Poll API during FilePrefetchBuffer destruction which impacts performance as it waits for read requets completion which is not needed anymore. Calling FileSystem::AbortIO to abort those requests instead fixes that performance issue.
Expand Down
24 changes: 24 additions & 0 deletions db/c.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2855,6 +2855,20 @@ void rocksdb_options_set_bottommost_compression_options_zstd_max_train_bytes(
opt->rep.bottommost_compression_opts.enabled = enabled;
}

void rocksdb_options_set_bottommost_compression_options_use_zstd_dict_trainer(
rocksdb_options_t* opt, unsigned char use_zstd_dict_trainer,
unsigned char enabled) {
opt->rep.bottommost_compression_opts.use_zstd_dict_trainer =
use_zstd_dict_trainer;
opt->rep.bottommost_compression_opts.enabled = enabled;
}

unsigned char
rocksdb_options_get_bottommost_compression_options_use_zstd_dict_trainer(
rocksdb_options_t* opt) {
return opt->rep.bottommost_compression_opts.use_zstd_dict_trainer;
}

void rocksdb_options_set_bottommost_compression_options_max_dict_buffer_bytes(
rocksdb_options_t* opt, uint64_t max_dict_buffer_bytes,
unsigned char enabled) {
Expand Down Expand Up @@ -2882,6 +2896,16 @@ int rocksdb_options_get_compression_options_zstd_max_train_bytes(
return opt->rep.compression_opts.zstd_max_train_bytes;
}

void rocksdb_options_set_compression_options_use_zstd_dict_trainer(
rocksdb_options_t* opt, unsigned char use_zstd_dict_trainer) {
opt->rep.compression_opts.use_zstd_dict_trainer = use_zstd_dict_trainer;
}

unsigned char rocksdb_options_get_compression_options_use_zstd_dict_trainer(
rocksdb_options_t* opt) {
return opt->rep.compression_opts.use_zstd_dict_trainer;
}

void rocksdb_options_set_compression_options_parallel_threads(
rocksdb_options_t* opt, int value) {
opt->rep.compression_opts.parallel_threads = value;
Expand Down
3 changes: 3 additions & 0 deletions db/c_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -2539,6 +2539,9 @@ int main(int argc, char** argv) {
200 ==
rocksdb_options_get_compression_options_max_dict_buffer_bytes(co));

rocksdb_options_set_compression_options_use_zstd_dict_trainer(co, 0);
CheckCondition(
0 == rocksdb_options_get_compression_options_use_zstd_dict_trainer(co));
rocksdb_options_destroy(co);
}

Expand Down
10 changes: 8 additions & 2 deletions db/column_family.cc
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,15 @@ Status CheckCompressionSupported(const ColumnFamilyOptions& cf_options) {
}
}
if (cf_options.compression_opts.zstd_max_train_bytes > 0) {
if (!ZSTD_TrainDictionarySupported()) {
if (cf_options.compression_opts.use_zstd_dict_trainer) {
if (!ZSTD_TrainDictionarySupported()) {
return Status::InvalidArgument(
"zstd dictionary trainer cannot be used because ZSTD 1.1.3+ "
"is not linked with the binary.");
}
} else if (!ZSTD_FinalizeDictionarySupported()) {
return Status::InvalidArgument(
"zstd dictionary trainer cannot be used because ZSTD 1.1.3+ "
"zstd finalizeDictionary cannot be used because ZSTD 1.4.5+ "
"is not linked with the binary.");
}
if (cf_options.compression_opts.max_dict_bytes == 0) {
Expand Down
20 changes: 20 additions & 0 deletions db/db_test2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1296,6 +1296,7 @@ TEST_F(DBTest2, PresetCompressionDict) {
enum DictionaryTypes : int {
kWithoutDict,
kWithDict,
kWithZSTDfinalizeDict,
kWithZSTDTrainedDict,
kDictEnd,
};
Expand All @@ -1304,6 +1305,7 @@ TEST_F(DBTest2, PresetCompressionDict) {
options.compression = compression_type;
size_t bytes_without_dict = 0;
size_t bytes_with_dict = 0;
size_t bytes_with_zstd_finalize_dict = 0;
size_t bytes_with_zstd_trained_dict = 0;
for (int i = kWithoutDict; i < kDictEnd; i++) {
// First iteration: compress without preset dictionary
Expand All @@ -1323,12 +1325,21 @@ TEST_F(DBTest2, PresetCompressionDict) {
options.compression_opts.max_dict_bytes = kBlockSizeBytes;
options.compression_opts.zstd_max_train_bytes = 0;
break;
case kWithZSTDfinalizeDict:
if (compression_type != kZSTD) {
continue;
}
options.compression_opts.max_dict_bytes = kBlockSizeBytes;
options.compression_opts.zstd_max_train_bytes = kL0FileBytes;
options.compression_opts.use_zstd_dict_trainer = false;
break;
case kWithZSTDTrainedDict:
if (compression_type != kZSTD) {
continue;
}
options.compression_opts.max_dict_bytes = kBlockSizeBytes;
options.compression_opts.zstd_max_train_bytes = kL0FileBytes;
options.compression_opts.use_zstd_dict_trainer = true;
break;
default:
assert(false);
Expand Down Expand Up @@ -1365,6 +1376,8 @@ TEST_F(DBTest2, PresetCompressionDict) {
bytes_without_dict = total_sst_bytes;
} else if (i == kWithDict) {
bytes_with_dict = total_sst_bytes;
} else if (i == kWithZSTDfinalizeDict) {
bytes_with_zstd_finalize_dict = total_sst_bytes;
} else if (i == kWithZSTDTrainedDict) {
bytes_with_zstd_trained_dict = total_sst_bytes;
}
Expand All @@ -1375,6 +1388,13 @@ TEST_F(DBTest2, PresetCompressionDict) {
}
if (i == kWithDict) {
ASSERT_GT(bytes_without_dict, bytes_with_dict);
} else if (i == kWithZSTDTrainedDict) {
// In zstd compression, it is sometimes possible that using a finalized
// dictionary does not get as good a compression ratio as raw content
// dictionary. But using a dictionary should always get better
// compression ratio than not using one.
ASSERT_TRUE(bytes_with_dict > bytes_with_zstd_finalize_dict ||
bytes_without_dict > bytes_with_zstd_finalize_dict);
} else if (i == kWithZSTDTrainedDict) {
// In zstd compression, it is sometimes possible that using a trained
// dictionary does not get as good a compression ratio as without
Expand Down
1 change: 1 addition & 0 deletions db_stress_tool/db_stress_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ DECLARE_int32(compression_max_dict_bytes);
DECLARE_int32(compression_zstd_max_train_bytes);
DECLARE_int32(compression_parallel_threads);
DECLARE_uint64(compression_max_dict_buffer_bytes);
DECLARE_bool(compression_use_zstd_dict_trainer);
DECLARE_string(checksum_type);
DECLARE_string(env_uri);
DECLARE_string(fs_uri);
Expand Down
7 changes: 7 additions & 0 deletions db_stress_tool/db_stress_gflags.cc
Original file line number Diff line number Diff line change
Expand Up @@ -752,6 +752,13 @@ DEFINE_uint64(compression_max_dict_buffer_bytes, 0,
"Buffering limit for SST file data to sample for dictionary "
"compression.");

DEFINE_bool(
compression_use_zstd_dict_trainer, true,
"Use zstd's trainer to generate dictionary. If the options is false, "
"zstd's finalizeDictionary() API is used to generate dictionary. "
"ZSTD 1.4.5+ is required. If ZSTD 1.4.5+ is not linked with the binary, "
"this flag will have the default value true.");

DEFINE_string(bottommost_compression_type, "disable",
"Algorithm to use to compress bottommost level of the database. "
"\"disable\" means disabling the feature");
Expand Down
11 changes: 11 additions & 0 deletions db_stress_tool/db_stress_test_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
// found in the LICENSE file. See the AUTHORS file for names of contributors.
//

#include "util/compression.h"
#ifdef GFLAGS
#include "db_stress_tool/db_stress_common.h"
#include "db_stress_tool/db_stress_compaction_filter.h"
Expand Down Expand Up @@ -2915,6 +2916,16 @@ void InitializeOptionsFromFlags(
FLAGS_compression_parallel_threads;
options.compression_opts.max_dict_buffer_bytes =
FLAGS_compression_max_dict_buffer_bytes;
if (ZSTD_FinalizeDictionarySupported()) {
options.compression_opts.use_zstd_dict_trainer =
FLAGS_compression_use_zstd_dict_trainer;
} else if (!FLAGS_compression_use_zstd_dict_trainer) {
fprintf(
stderr,
"WARNING: use_zstd_dict_trainer is false but zstd finalizeDictionary "
"cannot be used because ZSTD 1.4.5+ is not linked with the binary."
" zstd dictionary trainer will be used.\n");
}
options.max_manifest_file_size = FLAGS_max_manifest_file_size;
options.inplace_update_support = FLAGS_in_place_update;
options.max_subcompactions = static_cast<uint32_t>(FLAGS_subcompactions);
Expand Down
28 changes: 23 additions & 5 deletions include/rocksdb/advanced_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,9 @@ struct CompressionOptions {
//
// The dictionary is created by sampling the SST file data. If
// `zstd_max_train_bytes` is nonzero, the samples are passed through zstd's
// dictionary generator. Otherwise, the random samples are used directly as
// the dictionary.
// dictionary generator (see comments for option `use_zstd_dict_trainer` for
// detail on dictionary generator). If `zstd_max_train_bytes` is zero, the
// random samples are used directly as the dictionary.
//
// When compression dictionary is disabled, we compress and write each block
// before buffering data for the next one. When compression dictionary is
Expand Down Expand Up @@ -173,6 +174,20 @@ struct CompressionOptions {
// Default: 0 (unlimited)
uint64_t max_dict_buffer_bytes;

// Use zstd trainer to generate dictionaries. When this option is set to true,
// zstd_max_train_bytes of training data sampled from max_dict_buffer_bytes
// buffered data will be passed to zstd dictionary trainer to generate a
// dictionary of size max_dict_bytes.
//
// When this option is false, zstd's API ZDICT_finalizeDictionary() will be
// called to generate dictionaries. zstd_max_train_bytes of training sampled
// data will be passed to this API. Using this API should save CPU time on
// dictionary training, but the compression ratio may not be as good as using
// a dictionary trainer.
//
// Default: true
bool use_zstd_dict_trainer;

CompressionOptions()
: window_bits(-14),
level(kDefaultCompressionLevel),
Expand All @@ -181,19 +196,22 @@ struct CompressionOptions {
zstd_max_train_bytes(0),
parallel_threads(1),
enabled(false),
max_dict_buffer_bytes(0) {}
max_dict_buffer_bytes(0),
use_zstd_dict_trainer(true) {}
CompressionOptions(int wbits, int _lev, int _strategy,
uint32_t _max_dict_bytes, uint32_t _zstd_max_train_bytes,
uint32_t _parallel_threads, bool _enabled,
uint64_t _max_dict_buffer_bytes)
uint64_t _max_dict_buffer_bytes,
bool _use_zstd_dict_trainer)
: window_bits(wbits),
level(_lev),
strategy(_strategy),
max_dict_bytes(_max_dict_bytes),
zstd_max_train_bytes(_zstd_max_train_bytes),
parallel_threads(_parallel_threads),
enabled(_enabled),
max_dict_buffer_bytes(_max_dict_buffer_bytes) {}
max_dict_buffer_bytes(_max_dict_buffer_bytes),
use_zstd_dict_trainer(_use_zstd_dict_trainer) {}
};

// Temperature of a file. Used to pass to FileSystem for a different
Expand Down
12 changes: 12 additions & 0 deletions include/rocksdb/c.h
Original file line number Diff line number Diff line change
Expand Up @@ -1041,6 +1041,12 @@ extern ROCKSDB_LIBRARY_API int
rocksdb_options_get_compression_options_zstd_max_train_bytes(
rocksdb_options_t* opt);
extern ROCKSDB_LIBRARY_API void
rocksdb_options_set_compression_options_use_zstd_dict_trainer(
rocksdb_options_t*, unsigned char);
extern ROCKSDB_LIBRARY_API unsigned char
rocksdb_options_get_compression_options_use_zstd_dict_trainer(
rocksdb_options_t* opt);
extern ROCKSDB_LIBRARY_API void
rocksdb_options_set_compression_options_parallel_threads(rocksdb_options_t*,
int);
extern ROCKSDB_LIBRARY_API int
Expand All @@ -1059,6 +1065,12 @@ extern ROCKSDB_LIBRARY_API void
rocksdb_options_set_bottommost_compression_options_zstd_max_train_bytes(
rocksdb_options_t*, int, unsigned char);
extern ROCKSDB_LIBRARY_API void
rocksdb_options_set_bottommost_compression_options_use_zstd_dict_trainer(
rocksdb_options_t*, unsigned char, unsigned char);
extern ROCKSDB_LIBRARY_API unsigned char
rocksdb_options_get_bottommost_compression_options_use_zstd_dict_trainer(
rocksdb_options_t* opt);
extern ROCKSDB_LIBRARY_API void
rocksdb_options_set_bottommost_compression_options_max_dict_buffer_bytes(
rocksdb_options_t*, uint64_t, unsigned char);
extern ROCKSDB_LIBRARY_API void rocksdb_options_set_prefix_extractor(
Expand Down
24 changes: 24 additions & 0 deletions java/rocksjni/compression_options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,30 @@ jlong Java_org_rocksdb_CompressionOptions_maxDictBufferBytes(JNIEnv*, jobject,
auto* opt = reinterpret_cast<ROCKSDB_NAMESPACE::CompressionOptions*>(jhandle);
return static_cast<jlong>(opt->max_dict_buffer_bytes);
}

/*
* Class: org_rocksdb_CompressionOptions
* Method: setZstdMaxTrainBytes
* Signature: (JZ)V
*/
void Java_org_rocksdb_CompressionOptions_setUseZstdDictTrainer(
JNIEnv*, jobject, jlong jhandle, jboolean juse_zstd_dict_trainer) {
auto* opt = reinterpret_cast<ROCKSDB_NAMESPACE::CompressionOptions*>(jhandle);
opt->use_zstd_dict_trainer = juse_zstd_dict_trainer == JNI_TRUE;
}

/*
* Class: org_rocksdb_CompressionOptions
* Method: zstdMaxTrainBytes
* Signature: (J)Z
*/
jboolean Java_org_rocksdb_CompressionOptions_useZstdDictTrainer(JNIEnv*,
jobject,
jlong jhandle) {
auto* opt = reinterpret_cast<ROCKSDB_NAMESPACE::CompressionOptions*>(jhandle);
return static_cast<bool>(opt->use_zstd_dict_trainer);
}

/*
* Class: org_rocksdb_CompressionOptions
* Method: setEnabled
Expand Down
13 changes: 13 additions & 0 deletions options/cf_options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,15 @@ static Status ParseCompressionOptions(const std::string& value,
compression_opts.max_dict_buffer_bytes = ParseUint64(field);
}

// use_zstd_dict_trainer is optional for backwards compatibility
if (!field_stream.eof()) {
if (!std::getline(field_stream, field, kDelimiter)) {
return Status::InvalidArgument(
"unable to parse the specified CF option " + name);
}
compression_opts.use_zstd_dict_trainer = ParseBoolean("", field);
}

if (!field_stream.eof()) {
return Status::InvalidArgument("unable to parse the specified CF option " +
name);
Expand Down Expand Up @@ -156,6 +165,10 @@ static std::unordered_map<std::string, OptionTypeInfo>
{offsetof(struct CompressionOptions, max_dict_buffer_bytes),
OptionType::kUInt64T, OptionVerificationType::kNormal,
OptionTypeFlags::kMutable}},
{"use_zstd_dict_trainer",
{offsetof(struct CompressionOptions, use_zstd_dict_trainer),
OptionType::kBoolean, OptionVerificationType::kNormal,
OptionTypeFlags::kMutable}},
};

static std::unordered_map<std::string, OptionTypeInfo>
Expand Down
7 changes: 7 additions & 0 deletions options/options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,10 @@ void ColumnFamilyOptions::Dump(Logger* log) const {
" Options.bottommost_compression_opts.max_dict_buffer_bytes: "
"%" PRIu64,
bottommost_compression_opts.max_dict_buffer_bytes);
ROCKS_LOG_HEADER(
log,
" Options.bottommost_compression_opts.use_zstd_dict_trainer: %s",
bottommost_compression_opts.use_zstd_dict_trainer ? "true" : "false");
ROCKS_LOG_HEADER(log, " Options.compression_opts.window_bits: %d",
compression_opts.window_bits);
ROCKS_LOG_HEADER(log, " Options.compression_opts.level: %d",
Expand All @@ -225,6 +229,9 @@ void ColumnFamilyOptions::Dump(Logger* log) const {
" Options.compression_opts.zstd_max_train_bytes: "
"%" PRIu32,
compression_opts.zstd_max_train_bytes);
ROCKS_LOG_HEADER(
log, " Options.compression_opts.use_zstd_dict_trainer: %s",
compression_opts.use_zstd_dict_trainer ? "true" : "false");
ROCKS_LOG_HEADER(log,
" Options.compression_opts.parallel_threads: "
"%" PRIu32,
Expand Down
4 changes: 2 additions & 2 deletions options/options_settable_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -481,8 +481,8 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) {
"max_bytes_for_level_multiplier=60;"
"memtable_factory=SkipListFactory;"
"compression=kNoCompression;"
"compression_opts=5:6:7:8:9:10:true:11;"
"bottommost_compression_opts=4:5:6:7:8:9:true:10;"
"compression_opts=5:6:7:8:9:10:true:11:false;"
"bottommost_compression_opts=4:5:6:7:8:9:true:10:true;"
"bottommost_compression=kDisableCompressionOption;"
"level0_stop_writes_trigger=33;"
"num_levels=99;"
Expand Down
Loading

0 comments on commit cc23b46

Please sign in to comment.